Temporary commit of 10.0-merge

author: Michael Widenius <monty@askmonty.org> 2013-03-26 00:03:13 +0200
committer: Michael Widenius <monty@askmonty.org> 2013-03-26 00:03:13 +0200
commit: 068c61978e3a81836d52b8caf11e044290159ad1 (patch)
tree: 2cbca861ab2cebe3bd99379ca9668bb483ca0d2a
parent: 35bc8f9f4353b64da215e52ff6f1612a8ce66f43 (diff)
download: mariadb-git-068c61978e3a81836d52b8caf11e044290159ad1.tar.gz
649 files changed, 87932 insertions, 22445 deletions
diff --git a/.bzrignore b/.bzrignore
index d190c8be0f7..6ff0b3d3505 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -1067,6 +1067,7 @@ vio/viotest.cpp
 ylwrap
 zlib/*.ds?
 sql-bench/test-table-elimination
+sql/share/bulgarian
 sql/share/czech
 sql/share/danish
 sql/share/dutch
diff --git a/BUILD/SETUP.sh b/BUILD/SETUP.sh
index a4853c823ab..a51d13f75fb 100755
--- a/BUILD/SETUP.sh
+++ b/BUILD/SETUP.sh
@@ -164,8 +164,7 @@ valgrind_flags="$valgrind_flags -DMYSQL_SERVER_SUFFIX=-valgrind-max"
 valgrind_configs="--with-valgrind"
 #
 # Used in -debug builds
-debug_cflags="-DUNIV_MUST_NOT_INLINE -DEXTRA_DEBUG"
-debug_cflags="$debug_cflags -DSAFE_MUTEX -DSAFEMALLOC"
+debug_cflags="-DEXTRA_DEBUG -DSAFE_MUTEX -DSAFEMALLOC"
 error_inject="--with-error-inject "
 #
 # Base C++ flags for all builds
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1b29c559415..201406084f4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -143,7 +143,9 @@ INCLUDE(install_layout)
 
 # Add macros
 INCLUDE(character_sets)
+INCLUDE(cpu_info)
 INCLUDE(zlib)
+INCLUDE(libevent)
 INCLUDE(ssl)
 INCLUDE(readline)
 INCLUDE(libutils)
@@ -208,7 +210,7 @@ ENDFOREACH()
 
 # Add safemutex for debug configurations, except on Windows
 # (safemutex has never worked on Windows)
-IF(NOT WIN32)
+IF(WITH_DEBUG AND NOT WIN32 AND NOT WITH_INNODB_MEMCACHED)
   FOREACH(LANG C CXX)
       SET(CMAKE_${LANG}_FLAGS_DEBUG "${CMAKE_${LANG}_FLAGS_DEBUG} -DSAFE_MUTEX")
   ENDFOREACH()
@@ -260,10 +262,12 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/include)
 
 # Add bundled or system zlib.
 MYSQL_CHECK_ZLIB_WITH_COMPRESS()
-# Optionally add bundled yassl/taocrypt or system openssl.
+# Add bundled yassl/taocrypt or system openssl.
 MYSQL_CHECK_SSL()
 # Add readline or libedit.
 MYSQL_CHECK_READLINE()
+# Add libevent
+MYSQL_CHECK_LIBEVENT()
 
 #
 # Setup maintainer mode options. Platform checks are
@@ -301,6 +305,7 @@ ADD_SUBDIRECTORY(strings)
 ADD_SUBDIRECTORY(vio)
 ADD_SUBDIRECTORY(regex)
 ADD_SUBDIRECTORY(mysys)
+ADD_SUBDIRECTORY(mysys_ssl)
 ADD_SUBDIRECTORY(libmysql)
 ADD_SUBDIRECTORY(client)
 ADD_SUBDIRECTORY(extra)
diff --git a/client/CMakeLists.txt b/client/CMakeLists.txt
index e4507f9c8ba..9fed5b4ea19 100644
--- a/client/CMakeLists.txt
+++ b/client/CMakeLists.txt
@@ -15,6 +15,7 @@
 
 INCLUDE_DIRECTORIES(
   ${CMAKE_SOURCE_DIR}/include
+  ${CMAKE_SOURCE_DIR}/mysys_ssl
   ${ZLIB_INCLUDE_DIR}
   ${SSL_INCLUDE_DIRS}
   ${CMAKE_SOURCE_DIR}/libmysql
@@ -25,6 +26,9 @@ INCLUDE_DIRECTORIES(
   ${CMAKE_CURRENT_BINARY_DIR}
 )
 
+## We will need libeay32.dll and ssleay32.dll when running client executables.
+COPY_OPENSSL_DLLS(copy_openssl_client)
+
 ADD_DEFINITIONS(${SSL_DEFINES})
 MYSQL_ADD_EXECUTABLE(mysql completion_hash.cc mysql.cc readline.cc
                            ${CMAKE_SOURCE_DIR}/sql/sql_string.cc)
@@ -78,7 +82,7 @@ ENDIF(WIN32)
 ADD_EXECUTABLE(async_example async_example.c)
 TARGET_LINK_LIBRARIES(async_example mysqlclient)
 
-SET_TARGET_PROPERTIES (mysqlcheck mysqldump mysqlimport mysql_upgrade mysqlshow mysqlslap mysql_plugin 
+SET_TARGET_PROPERTIES (mysqlcheck mysqldump mysqlimport mysql_upgrade mysqlshow mysqlslap mysql_plugin async_example
 PROPERTIES HAS_CXX TRUE)
 
 ADD_DEFINITIONS(-DHAVE_DLOPEN)
diff --git a/client/mysql.cc b/client/mysql.cc
index 331302757a6..bb57c43674e 100644
--- a/client/mysql.cc
+++ b/client/mysql.cc
@@ -3339,9 +3339,9 @@ print_table_data(MYSQL_RES *result)
   {
     uint length= column_names ? field->name_length : 0;
     if (quick)
-      length=max(length,field->length);
+      length= MY_MAX(length,field->length);
     else
-      length=max(length,field->max_length);
+      length= MY_MAX(length,field->max_length);
     if (length < 4 && !IS_NOT_NULL(field->flags))
       length=4;					// Room for "NULL"
     field->max_length=length;
@@ -3361,8 +3361,8 @@ print_table_data(MYSQL_RES *result)
                                                   field->name,
                                                   field->name + name_length);
       uint display_length= field->max_length + name_length - numcells;
-      tee_fprintf(PAGER, " %-*s |",(int) min(display_length,
-                                            MAX_COLUMN_LENGTH),
+      tee_fprintf(PAGER, " %-*s |",(int) MY_MIN(display_length,
+                                                MAX_COLUMN_LENGTH),
                   field->name);
       num_flag[off]= IS_NUM(field->type);
     }
@@ -3451,9 +3451,9 @@ static int get_field_disp_length(MYSQL_FIELD *field)
   uint length= column_names ? field->name_length : 0;
 
   if (quick)
-    length= max(length, field->length);
+    length= MY_MAX(length, field->length);
   else
-    length= max(length, field->max_length);
+    length= MY_MAX(length, field->max_length);
 
   if (length < 4 && !IS_NOT_NULL(field->flags))
     length= 4;				/* Room for "NULL" */
@@ -3469,6 +3469,7 @@ static int get_field_disp_length(MYSQL_FIELD *field)
 
   @returns  The max number of characters in any row of this result
 */
+
 static int get_result_width(MYSQL_RES *result)
 {
   unsigned int len= 0;
diff --git a/client/mysql_upgrade.c b/client/mysql_upgrade.c
index feaf23b15ba..9a3c56507e7 100644
--- a/client/mysql_upgrade.c
+++ b/client/mysql_upgrade.c
@@ -576,7 +576,7 @@ static int extract_variable_from_show(DYNAMIC_STRING* ds, char* value)
   if ((value_end= strchr(value_start, '\n')) == NULL)
     return 1; /* Unexpected result */
 
-  strncpy(value, value_start, min(FN_REFLEN, value_end-value_start));
+  strncpy(value, value_start, MY_MIN(FN_REFLEN, value_end-value_start));
   return 0;
 }
 
diff --git a/client/mysqladmin.cc b/client/mysqladmin.cc
index daa1115514e..a45c3b85596 100644
--- a/client/mysqladmin.cc
+++ b/client/mysqladmin.cc
@@ -23,7 +23,8 @@
 #include <sys/stat.h>
 #include <mysql.h>
 #include <sql_common.h>
-#include <welcome_copyright_notice.h>           /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
+#include <welcome_copyright_notice.h>
+#include <my_rnd.h>
 
 #define ADMIN_VERSION "9.1"
 #define MAX_MYSQL_VAR 512
diff --git a/client/mysqlbinlog.cc b/client/mysqlbinlog.cc
index 87198347b10..32c02d03f37 100644
--- a/client/mysqlbinlog.cc
+++ b/client/mysqlbinlog.cc
@@ -2310,7 +2310,7 @@ static Exit_status dump_local_log_entries(PRINT_EVENT_INFO *print_event_info,
       my_off_t length,tmp;
       for (length= start_position_mot ; length > 0 ; length-=tmp)
       {
-	tmp=min(length,sizeof(buff));
+	tmp= MY_MIN(length,sizeof(buff));
 	if (my_b_read(file, buff, (uint) tmp))
         {
           error("Failed reading from file.");
diff --git a/client/mysqlcheck.c b/client/mysqlcheck.c
index 60778e12f06..f429837c1dd 100644
--- a/client/mysqlcheck.c
+++ b/client/mysqlcheck.c
@@ -787,8 +787,8 @@ static int handle_request_for_tables(char *tables, uint length)
 
     org= ptr= strmov(strmov(query, op), " TABLE ");
     ptr= fix_table_name(ptr, tables);
-    strmake(table_name_buff, org, min((int) sizeof(table_name_buff)-1,
-                                      (int) (ptr - org)));
+    strmake(table_name_buff, org, MY_MIN((int) sizeof(table_name_buff)-1,
+                                         (int) (ptr - org)));
     table_name= table_name_buff;
     ptr= strxmov(ptr, " ", options, NullS);
     query_length= (uint) (ptr - query);
diff --git a/client/mysqltest.cc b/client/mysqltest.cc
index e2a939dbd73..9e4b9da54af 100644
--- a/client/mysqltest.cc
+++ b/client/mysqltest.cc
@@ -6489,9 +6489,9 @@ int read_line(char *buf, int size)
       }
       else if ((c == '{' &&
                 (!my_strnncoll_simple(charset_info, (const uchar*) "while", 5,
-                                      (uchar*) buf, min(5, p - buf), 0) ||
+                                      (uchar*) buf, MY_MIN(5, p - buf), 0) ||
                  !my_strnncoll_simple(charset_info, (const uchar*) "if", 2,
-                                      (uchar*) buf, min(2, p - buf), 0))))
+                                      (uchar*) buf, MY_MIN(2, p - buf), 0))))
       {
         /* Only if and while commands can be terminated by { */
         *p++= c;
diff --git a/cmake/configure.pl b/cmake/configure.pl
index 51e83c2815c..d8e09a1bad9 100644
--- a/cmake/configure.pl
+++ b/cmake/configure.pl
@@ -150,6 +150,16 @@ foreach my $option (@ARGV)
     $cmakeargs = $cmakeargs." -DWITH_ZLIB=system";
     next;
   }
+  if($option =~ /with-libevent=/)
+  {
+    $cmakeargs = $cmakeargs." -DWITH_LIBEVENT=system";
+    next;
+  }
+  if($option =~ /with-libevent/)
+  {
+    $cmakeargs = $cmakeargs." -DWITH_LIBEVENT=bundled";
+    next;
+  }
   if($option =~ /with-ssl=/)
   {
     $cmakeargs = $cmakeargs." -DWITH_SSL=yes";
@@ -237,6 +247,16 @@ foreach my $option (@ARGV)
       print("configure.pl : ignoring $option\n");
       next;
   }
+  if ($option =~ /with-client-ldflags/)
+  {
+      print("configure.pl : ignoring $option\n");
+      next;
+  }
+  if ($option =~ /with-mysqld-ldflags=/)
+  {
+      print("configure.pl : ignoring $option\n");
+      next;
+  }
 
   $option = uc($option);
   $option =~ s/-/_/g;
diff --git a/cmake/cpu_info.cmake b/cmake/cpu_info.cmake
new file mode 100644
index 00000000000..32b98142ace
--- /dev/null
+++ b/cmake/cpu_info.cmake
@@ -0,0 +1,30 @@
+# Copyright (c) 2009, 2011, 2012 Oracle and/or its affiliates. All rights reserved.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA 
+
+# Symbols with information about the CPU.
+
+FIND_PROGRAM(GETCONF getconf)
+MARK_AS_ADVANCED(GETCONF)
+
+IF(GETCONF)
+  EXECUTE_PROCESS(
+    COMMAND ${GETCONF} LEVEL1_DCACHE_LINESIZE
+    OUTPUT_VARIABLE CPU_LEVEL1_DCACHE_LINESIZE
+    )
+ENDIF()
+IF(CPU_LEVEL1_DCACHE_LINESIZE AND CPU_LEVEL1_DCACHE_LINESIZE GREATER 0)
+ELSE()
+  SET(CPU_LEVEL1_DCACHE_LINESIZE 64)
+ENDIF()
diff --git a/cmake/libevent.cmake b/cmake/libevent.cmake
new file mode 100644
index 00000000000..54498e1bb15
--- /dev/null
+++ b/cmake/libevent.cmake
@@ -0,0 +1,89 @@
+# Copyright (C) 2011 Oracle and/or its affiliates. All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA 
+
+MACRO (MYSQL_USE_BUNDLED_LIBEVENT)
+  SET(LIBEVENT_LIBRARY  event)
+  SET(LIBEVENT_INCLUDE_DIR  ${CMAKE_SOURCE_DIR}/libevent)
+  SET(LIBEVENT_FOUND  TRUE)
+  SET(WITH_LIBEVENT "bundled" CACHE STRING "Use bundled libevent")
+  ADD_SUBDIRECTORY(libevent)
+  GET_TARGET_PROPERTY(src libevent SOURCES)
+  FOREACH(file ${src})
+    SET(LIBEVENT_SOURCES ${LIBEVENT_SOURCES} ${CMAKE_SOURCE_DIR}/libevent/${file})
+  ENDFOREACH()
+ENDMACRO()
+
+# MYSQL_CHECK_LIBEVENT
+#
+# Provides the following configure options:
+# WITH_LIBEVENT_BUNDLED
+# If this is set,we use bindled libevent
+# If this is not set,search for system libevent. 
+# if system libevent is not found, use bundled copy
+# LIBEVENT_LIBRARIES, LIBEVENT_INCLUDE_DIR and LIBEVENT_SOURCES
+# are set after this macro has run
+
+MACRO (MYSQL_CHECK_LIBEVENT)
+
+    IF (NOT WITH_LIBEVENT)
+      SET(WITH_LIBEVENT "bundled"  CACHE STRING "By default use bundled libevent on this platform")
+    ENDIF()
+  
+  IF(WITH_LIBEVENT STREQUAL "bundled")
+    MYSQL_USE_BUNDLED_LIBEVENT()
+  ELSEIF(WITH_LIBEVENT STREQUAL "system" OR WITH_LIBEVENT STREQUAL "yes")
+    SET(LIBEVENT_FIND_QUIETLY TRUE)
+
+    IF (NOT LIBEVENT_INCLUDE_PATH)
+      set(LIBEVENT_INCLUDE_PATH /usr/local/include /opt/local/include)
+    ENDIF()
+
+    find_path(LIBEVENT_INCLUDE_DIR event.h PATHS ${LIBEVENT_INCLUDE_PATH})
+
+    if (NOT LIBEVENT_INCLUDE_DIR)
+        MESSAGE(SEND_ERROR "Cannot find appropriate event.h in /usr/local/include or /opt/local/include. Use bundled libevent")
+    endif() 
+
+    IF (NOT LIBEVENT_LIB_PATHS) 
+      set(LIBEVENT_LIB_PATHS /usr/local/lib /opt/local/lib)
+    ENDIF()
+
+    find_library(LIBEVENT_LIB event PATHS ${LIBEVENT_LIB_PATHS})
+
+    if (NOT LIBEVENT_LIB)
+        MESSAGE(SEND_ERROR "Cannot find appropriate event lib in /usr/local/lib or /opt/local/lib. Use bundled libevent")
+    endif() 
+
+    IF (LIBEVENT_LIB AND LIBEVENT_INCLUDE_DIR)
+      set(LIBEVENT_FOUND TRUE)
+      set(LIBEVENT_LIBS ${LIBEVENT_LIB})
+    ELSE()
+      set(LIBEVENT_FOUND FALSE)
+    ENDIF()
+
+    IF(LIBEVENT_FOUND)
+      SET(LIBEVENT_SOURCES "")
+      SET(LIBEVENT_LIBRARIES ${LIBEVENT_LIBS})
+      SET(LIBEVENT_INCLUDE_DIRS ${LIBEVENT_INCLUDE_DIR})
+      SET(LIBEVENT_DEFINES "-DHAVE_LIBEVENT")
+    ELSE()
+      IF(WITH_LIBEVENT STREQUAL "system")
+        MESSAGE(SEND_ERROR "Cannot find appropriate system libraries for libevent. Use bundled libevent")
+      ENDIF()
+      MYSQL_USE_BUNDLED_LIBEVENT()
+    ENDIF()
+
+  ENDIF()
+ENDMACRO()
diff --git a/cmake/libutils.cmake b/cmake/libutils.cmake
index 7c13df05ca4..e161b67d25f 100644
--- a/cmake/libutils.cmake
+++ b/cmake/libutils.cmake
@@ -304,12 +304,15 @@ FUNCTION(GET_DEPENDEND_OS_LIBS target result)
   SET(${result} ${ret} PARENT_SCOPE)
 ENDFUNCTION()
 
-MACRO(RESTRICT_SYMBOL_EXPORTS target)
+# We try to hide the symbols in yassl/zlib to avoid name clashes with
+# other libraries like openssl.
+FUNCTION(RESTRICT_SYMBOL_EXPORTS target)
   SET(VISIBILITY_HIDDEN_FLAG)
 
   IF(CMAKE_COMPILER_IS_GNUCXX AND UNIX)
     CHECK_C_COMPILER_FLAG("-fvisibility=hidden" HAVE_VISIBILITY_HIDDEN)
     IF(HAVE_VISIBILITY_HIDDEN)
+      MESSAGE(STATUS "HAVE_VISIBILITY_HIDDEN")
       SET(VISIBILITY_HIDDEN_FLAG "-fvisibility=hidden")
     ENDIF()
   ENDIF()
@@ -327,5 +330,4 @@ MACRO(RESTRICT_SYMBOL_EXPORTS target)
     SET_TARGET_PROPERTIES(${target} PROPERTIES 
       COMPILE_FLAGS "${COMPILE_FLAGS} ${VISIBILITY_HIDDEN_FLAG}")
   ENDIF()
-
-ENDMACRO()
+ENDFUNCTION()
diff --git a/cmake/ssl.cmake b/cmake/ssl.cmake
index cabff530b47..a74ebc219e9 100644
--- a/cmake/ssl.cmake
+++ b/cmake/ssl.cmake
@@ -1,4 +1,4 @@
-# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
 # 
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -13,80 +13,222 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA 
 
+# We support different versions of SSL:
+# - "bundled" uses source code in <source dir>/extra/yassl
+# - "system"  (typically) uses headers/libraries in /usr/lib and /usr/lib64
+# - a custom installation of openssl can be used like this
+#     - cmake -DCMAKE_PREFIX_PATH=</path/to/custom/openssl> -DWITH_SSL="system"
+#   or
+#     - cmake -DWITH_SSL=</path/to/custom/openssl>
+#
+# The default value for WITH_SSL is "bundled"
+# set in cmake/build_configurations/feature_set.cmake
+#
+# For custom build/install of openssl, see the accompanying README and
+# INSTALL* files. When building with gcc, you must build the shared libraries
+# (in addition to the static ones):
+#   ./config --prefix=</path/to/custom/openssl> --shared; make; make install
+# On some platforms (mac) you need to choose 32/64 bit architecture.
+# Build/Install of openssl on windows is slightly different: you need to run
+# perl and nmake. You might also need to
+#   'set path=</path/to/custom/openssl>\bin;%PATH%
+# in order to find the .dll files at runtime.
+
+SET(WITH_SSL_DOC "bundled (use yassl)")
+SET(WITH_SSL_DOC
+  "${WITH_SSL_DOC}, yes (prefer os library if present, otherwise use bundled)")
+SET(WITH_SSL_DOC
+  "${WITH_SSL_DOC}, system (use os library)")
+SET(WITH_SSL_DOC
+  "${WITH_SSL_DOC}, </path/to/custom/installation>")
+
 MACRO (CHANGE_SSL_SETTINGS string)
-  SET(WITH_SSL ${string} CACHE STRING "Options are: no bundled yes(prefer os library if present otherwise use bundled) system(use os library)" FORCE)
+  SET(WITH_SSL ${string} CACHE STRING ${WITH_SSL_DOC} FORCE)
 ENDMACRO()
 
 MACRO (MYSQL_USE_BUNDLED_SSL)
   SET(INC_DIRS 
-  ${CMAKE_SOURCE_DIR}/extra/yassl/include
-  ${CMAKE_SOURCE_DIR}/extra/yassl/taocrypt/include
+    ${CMAKE_SOURCE_DIR}/extra/yassl/include
+    ${CMAKE_SOURCE_DIR}/extra/yassl/taocrypt/include
   )
   SET(SSL_LIBRARIES  yassl taocrypt)
   SET(SSL_INCLUDE_DIRS ${INC_DIRS})
   SET(SSL_INTERNAL_INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/extra/yassl/taocrypt/mySTL)
-  SET(SSL_DEFINES "-DHAVE_YASSL -DYASSL_PURE_C -DYASSL_PREFIX -DHAVE_OPENSSL -DMULTI_THREADED")
+  SET(SSL_DEFINES "-DHAVE_YASSL -DYASSL_PREFIX -DHAVE_OPENSSL -DMULTI_THREADED")
   CHANGE_SSL_SETTINGS("bundled")
-  #Remove -fno-implicit-templates 
-  #(yassl sources cannot  be compiled with  it)
-  SET(SAVE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
-  IF(CMAKE_CXX_FLAGS)
-  STRING(REPLACE "-fno-implicit-templates" "" CMAKE_CXX_FLAGS 
-    ${CMAKE_CXX_FLAGS})
-  ENDIF()
   ADD_SUBDIRECTORY(extra/yassl)
   ADD_SUBDIRECTORY(extra/yassl/taocrypt)
-  SET(CMAKE_CXX_FLAGS ${SAVE_CXX_FLAGS})
   GET_TARGET_PROPERTY(src yassl SOURCES)
   FOREACH(file ${src})
     SET(SSL_SOURCES ${SSL_SOURCES} ${CMAKE_SOURCE_DIR}/extra/yassl/${file})
   ENDFOREACH()
   GET_TARGET_PROPERTY(src taocrypt SOURCES)
   FOREACH(file ${src})
-    SET(SSL_SOURCES ${SSL_SOURCES} ${CMAKE_SOURCE_DIR}/extra/yassl/taocrypt/${file})
+    SET(SSL_SOURCES ${SSL_SOURCES}
+      ${CMAKE_SOURCE_DIR}/extra/yassl/taocrypt/${file})
   ENDFOREACH()
 ENDMACRO()
 
 # MYSQL_CHECK_SSL
 #
 # Provides the following configure options:
-# WITH_SSL=[yes|no|bundled]
+# WITH_SSL=[yes|bundled|system|<path/to/custom/installation>]
 MACRO (MYSQL_CHECK_SSL)
   IF(NOT WITH_SSL)
    IF(WIN32)
      CHANGE_SSL_SETTINGS("bundled")
-   ELSE()
-     CHANGE_SSL_SETTINGS("no")
    ENDIF()
   ENDIF()
 
+  # See if WITH_SSL is of the form </path/to/custom/installation>
+  FILE(GLOB WITH_SSL_HEADER ${WITH_SSL}/include/openssl/ssl.h)
+  IF (WITH_SSL_HEADER)
+    SET(WITH_SSL_PATH ${WITH_SSL} CACHE PATH "path to custom SSL installation")
+  ENDIF()
+
   IF(WITH_SSL STREQUAL "bundled")
     MYSQL_USE_BUNDLED_SSL()
-  ELSEIF(WITH_SSL STREQUAL "system" OR WITH_SSL STREQUAL "yes")
-    # Check for system library
-    SET(OPENSSL_FIND_QUIETLY TRUE)
-    INCLUDE(FindOpenSSL)
-    FIND_LIBRARY(CRYPTO_LIBRARY crypto)
-    MARK_AS_ADVANCED(CRYPTO_LIBRARY)
+    # Reset some variables, in case we switch from /path/to/ssl to "bundled".
+    IF (WITH_SSL_PATH)
+      UNSET(WITH_SSL_PATH)
+      UNSET(WITH_SSL_PATH CACHE)
+    ENDIF()
+    IF (OPENSSL_ROOT_DIR)
+      UNSET(OPENSSL_ROOT_DIR)
+      UNSET(OPENSSL_ROOT_DIR CACHE)
+    ENDIF()
+    IF (OPENSSL_INCLUDE_DIR)
+      UNSET(OPENSSL_INCLUDE_DIR)
+      UNSET(OPENSSL_INCLUDE_DIR CACHE)
+    ENDIF()
+    IF (WIN32 AND OPENSSL_APPLINK_C)
+      UNSET(OPENSSL_APPLINK_C)
+      UNSET(OPENSSL_APPLINK_C CACHE)
+    ENDIF()
+    IF (OPENSSL_LIBRARIES)
+      UNSET(OPENSSL_LIBRARIES)
+      UNSET(OPENSSL_LIBRARIES CACHE)
+    ENDIF()
+  ELSEIF(WITH_SSL STREQUAL "system" OR
+         WITH_SSL STREQUAL "yes" OR
+         WITH_SSL_PATH
+         )
+    # First search in WITH_SSL_PATH.
+    FIND_PATH(OPENSSL_ROOT_DIR
+      NAMES include/openssl/ssl.h
+      NO_CMAKE_PATH
+      NO_CMAKE_ENVIRONMENT_PATH
+      HINTS ${WITH_SSL_PATH}
+    )
+    # Then search in standard places (if not found above).
+    FIND_PATH(OPENSSL_ROOT_DIR
+      NAMES include/openssl/ssl.h
+    )
+
+    FIND_PATH(OPENSSL_INCLUDE_DIR
+      NAMES openssl/ssl.h
+      HINTS ${OPENSSL_ROOT_DIR}/include
+    )
+
+    IF (WIN32)
+      FIND_FILE(OPENSSL_APPLINK_C
+        NAMES openssl/applink.c
+        HINTS ${OPENSSL_ROOT_DIR}/include
+      )
+      MESSAGE(STATUS "OPENSSL_APPLINK_C ${OPENSSL_APPLINK_C}")
+    ENDIF()
+
+    # On mac this list is <.dylib;.so;.a>
+    # We prefer static libraries, so we revert it here.
+    LIST(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
+    MESSAGE(STATUS "suffixes <${CMAKE_FIND_LIBRARY_SUFFIXES}>")
+    FIND_LIBRARY(OPENSSL_LIBRARIES
+                 NAMES ssl ssleay32 ssleay32MD
+                 HINTS ${OPENSSL_ROOT_DIR}/lib)
+    FIND_LIBRARY(CRYPTO_LIBRARY
+                 NAMES crypto libeay32
+                 HINTS ${OPENSSL_ROOT_DIR}/lib)
+    LIST(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
+
+    # Verify version number. Version information looks like:
+    #   #define OPENSSL_VERSION_NUMBER 0x1000103fL
+    # Encoded as MNNFFPPS: major minor fix patch status
+    FILE(STRINGS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h"
+      OPENSSL_VERSION_NUMBER
+      REGEX "^#define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x[0-9].*"
+    )
+    STRING(REGEX REPLACE
+      "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9]).*$" "\\1"
+      OPENSSL_MAJOR_VERSION "${OPENSSL_VERSION_NUMBER}"
+    )
+
+    IF(OPENSSL_INCLUDE_DIR AND
+       OPENSSL_LIBRARIES   AND
+       CRYPTO_LIBRARY      AND
+       OPENSSL_MAJOR_VERSION STREQUAL "1"
+      )
+      SET(OPENSSL_FOUND TRUE)
+    ELSE()
+      SET(OPENSSL_FOUND FALSE)
+    ENDIF()
+
+    MESSAGE(STATUS "OPENSSL_INCLUDE_DIR = ${OPENSSL_INCLUDE_DIR}")
+    MESSAGE(STATUS "OPENSSL_LIBRARIES = ${OPENSSL_LIBRARIES}")
+    MESSAGE(STATUS "CRYPTO_LIBRARY = ${CRYPTO_LIBRARY}")
+    MESSAGE(STATUS "OPENSSL_MAJOR_VERSION = ${OPENSSL_MAJOR_VERSION}")
+
     INCLUDE(CheckSymbolExists)
     SET(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
     CHECK_SYMBOL_EXISTS(SHA512_DIGEST_LENGTH "openssl/sha.h" 
                         HAVE_SHA512_DIGEST_LENGTH)
-    SET(CMAKE_REQUIRED_INCLUDES)
-    IF(OPENSSL_FOUND AND CRYPTO_LIBRARY AND HAVE_SHA512_DIGEST_LENGTH)
+    IF(OPENSSL_FOUND AND HAVE_SHA512_DIGEST_LENGTH)
       SET(SSL_SOURCES "")
       SET(SSL_LIBRARIES ${OPENSSL_LIBRARIES} ${CRYPTO_LIBRARY})
+      IF(CMAKE_SYSTEM_NAME MATCHES "SunOS")
+        SET(SSL_LIBRARIES ${SSL_LIBRARIES} ${LIBSOCKET})
+      ENDIF()
+      IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
+        SET(SSL_LIBRARIES ${SSL_LIBRARIES} ${LIBDL})
+      ENDIF()
+      MESSAGE(STATUS "SSL_LIBRARIES = ${SSL_LIBRARIES}")
       SET(SSL_INCLUDE_DIRS ${OPENSSL_INCLUDE_DIR})
       SET(SSL_INTERNAL_INCLUDE_DIRS "")
       SET(SSL_DEFINES "-DHAVE_OPENSSL")
-      CHANGE_SSL_SETTINGS("system")
     ELSE()
       IF(WITH_SSL STREQUAL "system")
         MESSAGE(SEND_ERROR "Cannot find appropriate system libraries for SSL. Use  WITH_SSL=bundled to enable SSL support")
       ENDIF()
       MYSQL_USE_BUNDLED_SSL()
     ENDIF()
-  ELSEIF(NOT WITH_SSL STREQUAL "no")
-    MESSAGE(SEND_ERROR "Wrong option for WITH_SSL. Valid values are : yes, no, bundled")
+  ELSE()
+    MESSAGE(SEND_ERROR
+      "Wrong option for WITH_SSL. Valid values are : "${WITH_SSL_DOC})
+  ENDIF()
+ENDMACRO()
+
+
+# Many executables will depend on libeay32.dll and ssleay32.dll at runtime.
+# In order to ensure we find the right version(s), we copy them into
+# the same directory as the executables.
+# NOTE: Using dlls will likely crash in malloc/free,
+#       see INSTALL.W32 which comes with the openssl sources.
+# So we should be linking static versions of the libraries.
+MACRO (COPY_OPENSSL_DLLS target_name)
+  IF (WIN32 AND WITH_SSL_PATH)
+    GET_FILENAME_COMPONENT(CRYPTO_NAME "${CRYPTO_LIBRARY}" NAME_WE)
+    GET_FILENAME_COMPONENT(OPENSSL_NAME "${OPENSSL_LIBRARIES}" NAME_WE)
+    FILE(GLOB HAVE_CRYPTO_DLL "${WITH_SSL_PATH}/bin/${CRYPTO_NAME}.dll")
+    FILE(GLOB HAVE_OPENSSL_DLL "${WITH_SSL_PATH}/bin/${OPENSSL_NAME}.dll")
+    IF (HAVE_CRYPTO_DLL AND HAVE_OPENSSL_DLL)
+      ADD_CUSTOM_COMMAND(OUTPUT ${target_name}
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different
+          "${WITH_SSL_PATH}/bin/${CRYPTO_NAME}.dll"
+          "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${CRYPTO_NAME}.dll"
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different
+          "${WITH_SSL_PATH}/bin/${OPENSSL_NAME}.dll"
+          "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${OPENSSL_NAME}.dll"
+        )
+      ADD_CUSTOM_TARGET(${target_name} ALL)
+    ENDIF()
   ENDIF()
 ENDMACRO()
diff --git a/dbug/dbug.c b/dbug/dbug.c
index b285b32fa17..9ec8044eaf1 100644
--- a/dbug/dbug.c
+++ b/dbug/dbug.c
@@ -1332,7 +1332,7 @@ void _db_dump_(uint _line_, const char *keyword,
     if (TRACING)
     {
       Indent(cs, cs->level + 1);
-      pos= min(max(cs->level-cs->stack->sub_level,0)*INDENT,80);
+      pos= MY_MIN(MY_MAX(cs->level-cs->stack->sub_level,0)*INDENT,80);
     }
     else
     {
@@ -1737,7 +1737,7 @@ static void Indent(CODE_STATE *cs, int indent)
 {
   int count;
 
-  indent= max(indent-1-cs->stack->sub_level,0)*INDENT;
+  indent= MY_MAX(indent-1-cs->stack->sub_level,0)*INDENT;
   for (count= 0; count < indent ; count++)
   {
     if ((count % INDENT) == 0)
diff --git a/extra/comp_err.c b/extra/comp_err.c
index fb51377ddc5..bf757122957 100644
--- a/extra/comp_err.c
+++ b/extra/comp_err.c
@@ -33,8 +33,9 @@
 #include <assert.h>
 #include <my_dir.h>
 
-#define MAX_ROWS  1000
+#define MAX_ROWS  2000
 #define HEADER_LENGTH 32                /* Length of header in errmsg.sys */
+#define ERRMSG_VERSION 3                /* Version number of errmsg.sys */
 #define DEFAULT_CHARSET_DIR "../sql/share/charsets"
 #define ER_PREFIX "ER_"
 #define ER_PREFIX2 "MARIA_ER_"
@@ -50,9 +51,9 @@ static char *default_dbug_option= (char*) "d:t:O,/tmp/comp_err.trace";
 #endif
 
 /* Header for errmsg.sys files */
-uchar file_head[]= { 254, 254, 2, 2 };
+uchar file_head[]= { 254, 254, 2, ERRMSG_VERSION };
 /* Store positions to each error message row to store in errmsg.sys header */
-uint file_pos[MAX_ROWS];
+uint file_pos[MAX_ROWS+1];
 
 const char *empty_string= "";			/* For empty states */
 /*
@@ -379,9 +380,11 @@ static int create_sys_files(struct languages *lang_head,
     if (my_fwrite(to, (uchar*) head, HEADER_LENGTH, MYF(MY_WME | MY_FNABP)))
       goto err;
 
+    file_pos[row_count]= (ftell(to) - start_pos);
     for (i= 0; i < row_count; i++)
     {
-      int2store(head, file_pos[i]);
+      /* Store length of each string */
+      int2store(head, file_pos[i+1] - file_pos[i]);
       if (my_fwrite(to, (uchar*) head, 2, MYF(MY_WME | MY_FNABP)))
 	goto err;
     }
diff --git a/extra/my_print_defaults.c b/extra/my_print_defaults.c
index 8a16e677cb9..5b661c0d04b 100644
--- a/extra/my_print_defaults.c
+++ b/extra/my_print_defaults.c
@@ -26,7 +26,7 @@
 #include <my_sys.h>
 #include <m_string.h>
 #include <my_getopt.h>
-
+#include <my_default.h>
 
 const char *config_file="my";			/* Default config file */
 uint verbose= 0, opt_defaults_file_used= 0;
diff --git a/include/big_endian.h b/include/big_endian.h
new file mode 100644
index 00000000000..021b6abc383
--- /dev/null
+++ b/include/big_endian.h
@@ -0,0 +1,82 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02111-1307  USA */
+
+/*
+  Data in big-endian format.
+*/
+#define float4store(T,A) do { *(T)= ((uchar *) &A)[3];\
+                              *((T)+1)=(char) ((uchar *) &A)[2];\
+                              *((T)+2)=(char) ((uchar *) &A)[1];\
+                              *((T)+3)=(char) ((uchar *) &A)[0]; } while(0)
+
+#define float4get(V,M)   do { float def_temp;\
+                              ((uchar*) &def_temp)[0]=(M)[3];\
+                              ((uchar*) &def_temp)[1]=(M)[2];\
+                              ((uchar*) &def_temp)[2]=(M)[1];\
+                              ((uchar*) &def_temp)[3]=(M)[0];\
+                              (V)=def_temp; } while(0)
+
+#define float8store(T,V) do { *(T)= ((uchar *) &V)[7];\
+                              *((T)+1)=(char) ((uchar *) &V)[6];\
+                              *((T)+2)=(char) ((uchar *) &V)[5];\
+                              *((T)+3)=(char) ((uchar *) &V)[4];\
+                              *((T)+4)=(char) ((uchar *) &V)[3];\
+                              *((T)+5)=(char) ((uchar *) &V)[2];\
+                              *((T)+6)=(char) ((uchar *) &V)[1];\
+                              *((T)+7)=(char) ((uchar *) &V)[0]; } while(0)
+
+#define float8get(V,M)   do { double def_temp;\
+                              ((uchar*) &def_temp)[0]=(M)[7];\
+                              ((uchar*) &def_temp)[1]=(M)[6];\
+                              ((uchar*) &def_temp)[2]=(M)[5];\
+                              ((uchar*) &def_temp)[3]=(M)[4];\
+                              ((uchar*) &def_temp)[4]=(M)[3];\
+                              ((uchar*) &def_temp)[5]=(M)[2];\
+                              ((uchar*) &def_temp)[6]=(M)[1];\
+                              ((uchar*) &def_temp)[7]=(M)[0];\
+                              (V) = def_temp; } while(0)
+
+#define ushortget(V,M)  do { V = (uint16) (((uint16) ((uchar) (M)[1]))+\
+                                 ((uint16) ((uint16) (M)[0]) << 8)); } while(0)
+#define shortget(V,M)   do { V = (short) (((short) ((uchar) (M)[1]))+\
+                                 ((short) ((short) (M)[0]) << 8)); } while(0)
+#define longget(V,M)    do { int32 def_temp;\
+                             ((uchar*) &def_temp)[0]=(M)[0];\
+                             ((uchar*) &def_temp)[1]=(M)[1];\
+                             ((uchar*) &def_temp)[2]=(M)[2];\
+                             ((uchar*) &def_temp)[3]=(M)[3];\
+                             (V)=def_temp; } while(0)
+#define ulongget(V,M)   do { uint32 def_temp;\
+                            ((uchar*) &def_temp)[0]=(M)[0];\
+                            ((uchar*) &def_temp)[1]=(M)[1];\
+                            ((uchar*) &def_temp)[2]=(M)[2];\
+                            ((uchar*) &def_temp)[3]=(M)[3];\
+                            (V)=def_temp; } while(0)
+#define shortstore(T,A) do { uint def_temp=(uint) (A) ;\
+                             *(((char*)T)+1)=(char)(def_temp); \
+                             *(((char*)T)+0)=(char)(def_temp >> 8); } while(0)
+#define longstore(T,A)  do { *(((char*)T)+3)=((A));\
+                             *(((char*)T)+2)=(((A) >> 8));\
+                             *(((char*)T)+1)=(((A) >> 16));\
+                             *(((char*)T)+0)=(((A) >> 24)); } while(0)
+
+#define floatget(V,M)      memcpy(&V, (M), sizeof(float))
+/* Cast away type qualifiers (necessary as macro takes argument by value). */
+#define floatstore(T,V)    memcpy((T), (void*) (&V), sizeof(float))
+#define doubleget(V,M)     memcpy(&V, (M), sizeof(double))
+/* Cast away type qualifiers (necessary as macro takes argument by value). */
+#define doublestore(T,V)   memcpy((T), (void*) &V, sizeof(double))
+#define longlongget(V,M)   memcpy(&V, (M), sizeof(ulonglong))
+#define longlongstore(T,V) memcpy((T), &V, sizeof(ulonglong))
diff --git a/include/byte_order_generic.h b/include/byte_order_generic.h
new file mode 100644
index 00000000000..d4ac27eeb9c
--- /dev/null
+++ b/include/byte_order_generic.h
@@ -0,0 +1,95 @@
+/* Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02111-1307  USA */
+
+/*
+  Endianness-independent definitions for architectures other
+  than the x86 architecture.
+*/
+#define sint2korr(A)	(int16) (((int16) ((uchar) (A)[0])) +\
+				 ((int16) ((int16) (A)[1]) << 8))
+#define sint3korr(A)	((int32) ((((uchar) (A)[2]) & 128) ? \
+				  (((uint32) 255L << 24) | \
+				   (((uint32) (uchar) (A)[2]) << 16) |\
+				   (((uint32) (uchar) (A)[1]) << 8) | \
+				   ((uint32) (uchar) (A)[0])) : \
+				  (((uint32) (uchar) (A)[2]) << 16) |\
+				  (((uint32) (uchar) (A)[1]) << 8) | \
+				  ((uint32) (uchar) (A)[0])))
+#define sint4korr(A)	(int32) (((int32) ((uchar) (A)[0])) +\
+				(((int32) ((uchar) (A)[1]) << 8)) +\
+				(((int32) ((uchar) (A)[2]) << 16)) +\
+				(((int32) ((int16) (A)[3]) << 24)))
+#define sint8korr(A)	(longlong) uint8korr(A)
+#define uint2korr(A)	(uint16) (((uint16) ((uchar) (A)[0])) +\
+				  ((uint16) ((uchar) (A)[1]) << 8))
+#define uint3korr(A)	(uint32) (((uint32) ((uchar) (A)[0])) +\
+				  (((uint32) ((uchar) (A)[1])) << 8) +\
+				  (((uint32) ((uchar) (A)[2])) << 16))
+#define uint4korr(A)	(uint32) (((uint32) ((uchar) (A)[0])) +\
+				  (((uint32) ((uchar) (A)[1])) << 8) +\
+				  (((uint32) ((uchar) (A)[2])) << 16) +\
+				  (((uint32) ((uchar) (A)[3])) << 24))
+#define uint5korr(A)	((ulonglong)(((uint32) ((uchar) (A)[0])) +\
+				    (((uint32) ((uchar) (A)[1])) << 8) +\
+				    (((uint32) ((uchar) (A)[2])) << 16) +\
+				    (((uint32) ((uchar) (A)[3])) << 24)) +\
+				    (((ulonglong) ((uchar) (A)[4])) << 32))
+#define uint6korr(A)	((ulonglong)(((uint32)    ((uchar) (A)[0]))          + \
+                                     (((uint32)    ((uchar) (A)[1])) << 8)   + \
+                                     (((uint32)    ((uchar) (A)[2])) << 16)  + \
+                                     (((uint32)    ((uchar) (A)[3])) << 24)) + \
+                         (((ulonglong) ((uchar) (A)[4])) << 32) +       \
+                         (((ulonglong) ((uchar) (A)[5])) << 40))
+#define uint8korr(A)	((ulonglong)(((uint32) ((uchar) (A)[0])) +\
+				    (((uint32) ((uchar) (A)[1])) << 8) +\
+				    (((uint32) ((uchar) (A)[2])) << 16) +\
+				    (((uint32) ((uchar) (A)[3])) << 24)) +\
+			(((ulonglong) (((uint32) ((uchar) (A)[4])) +\
+				    (((uint32) ((uchar) (A)[5])) << 8) +\
+				    (((uint32) ((uchar) (A)[6])) << 16) +\
+				    (((uint32) ((uchar) (A)[7])) << 24))) <<\
+				    32))
+#define int2store(T,A)       do { uint def_temp= (uint) (A) ;\
+                                  *((uchar*) (T))=  (uchar)(def_temp); \
+                                   *((uchar*) (T)+1)=(uchar)((def_temp >> 8)); \
+                             } while(0)
+#define int3store(T,A)       do { /*lint -save -e734 */\
+                                  *((uchar*)(T))=(uchar) ((A));\
+                                  *((uchar*) (T)+1)=(uchar) (((A) >> 8));\
+                                  *((uchar*)(T)+2)=(uchar) (((A) >> 16)); \
+                                  /*lint -restore */} while(0)
+#define int4store(T,A)       do { *((char *)(T))=(char) ((A));\
+                                  *(((char *)(T))+1)=(char) (((A) >> 8));\
+                                  *(((char *)(T))+2)=(char) (((A) >> 16));\
+                                  *(((char *)(T))+3)=(char) (((A) >> 24));\
+                             } while(0)
+#define int5store(T,A)       do { *((char *)(T))=     (char)((A));  \
+                                  *(((char *)(T))+1)= (char)(((A) >> 8)); \
+                                  *(((char *)(T))+2)= (char)(((A) >> 16)); \
+                                  *(((char *)(T))+3)= (char)(((A) >> 24)); \
+                                  *(((char *)(T))+4)= (char)(((A) >> 32)); \
+		             } while(0)
+#define int6store(T,A)       do { *((char *)(T))=     (char)((A)); \
+                                  *(((char *)(T))+1)= (char)(((A) >> 8)); \
+                                  *(((char *)(T))+2)= (char)(((A) >> 16)); \
+                                  *(((char *)(T))+3)= (char)(((A) >> 24)); \
+                                  *(((char *)(T))+4)= (char)(((A) >> 32)); \
+                                  *(((char *)(T))+5)= (char)(((A) >> 40)); \
+                             } while(0)
+#define int8store(T,A)       do { uint def_temp= (uint) (A), \
+                                       def_temp2= (uint) ((A) >> 32); \
+                                  int4store((T),def_temp); \
+                                  int4store((T+4),def_temp2);\
+                             } while(0)
diff --git a/include/byte_order_generic_x86.h b/include/byte_order_generic_x86.h
new file mode 100644
index 00000000000..0a71a17829b
--- /dev/null
+++ b/include/byte_order_generic_x86.h
@@ -0,0 +1,97 @@
+/* Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02111-1307  USA */
+
+/*
+  Optimized function-like macros for the x86 architecture (_WIN32 included).
+*/
+#define sint2korr(A)	(*((const int16 *) (A)))
+#define sint3korr(A)	((int32) ((((uchar) (A)[2]) & 128) ? \
+				  (((uint32) 255L << 24) | \
+				   (((uint32) (uchar) (A)[2]) << 16) |\
+				   (((uint32) (uchar) (A)[1]) << 8) | \
+				   ((uint32) (uchar) (A)[0])) : \
+				  (((uint32) (uchar) (A)[2]) << 16) |\
+				  (((uint32) (uchar) (A)[1]) << 8) | \
+				  ((uint32) (uchar) (A)[0])))
+#define sint4korr(A)	(*((const long *) (A)))
+#define uint2korr(A)	(*((const uint16 *) (A)))
+
+/*
+  Attention: Please, note, uint3korr reads 4 bytes (not 3)!
+  It means, that you have to provide enough allocated space.
+*/
+#if defined(HAVE_valgrind) && !defined(_WIN32)
+#define uint3korr(A)	(uint32) (((uint32) ((uchar) (A)[0])) +\
+				  (((uint32) ((uchar) (A)[1])) << 8) +\
+				  (((uint32) ((uchar) (A)[2])) << 16))
+#else
+#define uint3korr(A)	(long) (*((const unsigned int *) (A)) & 0xFFFFFF)
+#endif
+
+#define uint4korr(A)	(*((const uint32 *) (A)))
+#define uint5korr(A)	((ulonglong)(((uint32) ((uchar) (A)[0])) +\
+				    (((uint32) ((uchar) (A)[1])) << 8) +\
+				    (((uint32) ((uchar) (A)[2])) << 16) +\
+				    (((uint32) ((uchar) (A)[3])) << 24)) +\
+				    (((ulonglong) ((uchar) (A)[4])) << 32))
+#define uint6korr(A)	((ulonglong)(((uint32)    ((uchar) (A)[0]))          + \
+                                     (((uint32)    ((uchar) (A)[1])) << 8)   + \
+                                     (((uint32)    ((uchar) (A)[2])) << 16)  + \
+                                     (((uint32)    ((uchar) (A)[3])) << 24)) + \
+                         (((ulonglong) ((uchar) (A)[4])) << 32) +       \
+                         (((ulonglong) ((uchar) (A)[5])) << 40))
+#define uint8korr(A)	(*((const ulonglong *) (A)))
+#define sint8korr(A)	(*((const longlong *) (A)))
+
+#define int2store(T,A)	*((uint16*) (T))= (uint16) (A)
+#define int3store(T,A)  do { *(T)=  (uchar) ((A));\
+                            *(T+1)=(uchar) (((uint) (A) >> 8));\
+                            *(T+2)=(uchar) (((A) >> 16));\
+                        } while (0)
+#define int4store(T,A)	*((long *) (T))= (long) (A)
+#define int5store(T,A)  do { *(T)= (uchar)((A));\
+                             *((T)+1)=(uchar) (((A) >> 8));\
+                             *((T)+2)=(uchar) (((A) >> 16));\
+                             *((T)+3)=(uchar) (((A) >> 24));\
+                             *((T)+4)=(uchar) (((A) >> 32));\
+                        } while(0)
+#define int6store(T,A)  do { *(T)=    (uchar)((A));          \
+                             *((T)+1)=(uchar) (((A) >> 8));  \
+                             *((T)+2)=(uchar) (((A) >> 16)); \
+                             *((T)+3)=(uchar) (((A) >> 24)); \
+                             *((T)+4)=(uchar) (((A) >> 32)); \
+                             *((T)+5)=(uchar) (((A) >> 40)); \
+                        } while(0)
+#define int8store(T,A)	*((ulonglong *) (T))= (ulonglong) (A)
+typedef union {
+  double v;
+  long m[2];
+} doubleget_union;
+#define doubleget(V,M) \
+do { doubleget_union _tmp; \
+     _tmp.m[0] = *((const long*)(M));      \
+     _tmp.m[1] = *(((const long*) (M))+1); \
+     (V) = _tmp.v; } while(0)
+#define doublestore(T,V) \
+do { *((long *) T) = ((const doubleget_union *)&V)->m[0];     \
+     *(((long *) T)+1) = ((const doubleget_union *)&V)->m[1]; \
+   } while (0)
+#define float4get(V,M) \
+do { *((float *) &(V)) = *((const float*) (M)); } while(0)
+#define float8get(V,M)   doubleget((V),(M))
+#define float4store(V,M) memcpy((uchar*)(V), (uchar*)(&M), sizeof(float))
+#define floatstore(T,V)  memcpy((uchar*)(T), (uchar*)(&V), sizeof(float))
+#define floatget(V,M)    memcpy((uchar*)(&V),(uchar*) (M), sizeof(float))
+#define float8store(V,M) doublestore((V),(M))
diff --git a/include/byte_order_generic_x86_64.h b/include/byte_order_generic_x86_64.h
new file mode 100644
index 00000000000..877c1574dfa
--- /dev/null
+++ b/include/byte_order_generic_x86_64.h
@@ -0,0 +1,83 @@
+/* Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02111-1307  USA */
+
+/*
+  Optimized function-like macros for the x86 architecture (_WIN32 included).
+*/
+#define sint2korr(A)	(int16) (*((int16 *) (A)))
+#define sint3korr(A)	((int32) ((((uchar) (A)[2]) & 128) ? \
+				  (((uint32) 255L << 24) | \
+				   (((uint32) (uchar) (A)[2]) << 16) |\
+				   (((uint32) (uchar) (A)[1]) << 8) | \
+				   ((uint32) (uchar) (A)[0])) : \
+				  (((uint32) (uchar) (A)[2]) << 16) |\
+				  (((uint32) (uchar) (A)[1]) << 8) | \
+				  ((uint32) (uchar) (A)[0])))
+#define sint4korr(A)	(int32)  (*((int32 *) (A)))
+#define uint2korr(A)	(uint16) (*((uint16 *) (A)))
+/*
+  Attention: Please, note, uint3korr reads 4 bytes (not 3)!
+  It means, that you have to provide enough allocated space.
+*/
+#if defined(HAVE_purify) && !defined(_WIN32)
+#define uint3korr(A)	(uint32) (((uint32) ((uchar) (A)[0])) +\
+				  (((uint32) ((uchar) (A)[1])) << 8) +\
+				  (((uint32) ((uchar) (A)[2])) << 16))
+#else
+#define uint3korr(A)	(uint32) (*((unsigned int *) (A)) & 0xFFFFFF)
+#endif
+#define uint4korr(A)	(uint32) (*((uint32 *) (A)))
+#define uint5korr(A)	((ulonglong)(((uint32) ((uchar) (A)[0])) +\
+				    (((uint32) ((uchar) (A)[1])) << 8) +\
+				    (((uint32) ((uchar) (A)[2])) << 16) +\
+				    (((uint32) ((uchar) (A)[3])) << 24)) +\
+				    (((ulonglong) ((uchar) (A)[4])) << 32))
+#define uint6korr(A)	((ulonglong)(((uint32)    ((uchar) (A)[0]))          + \
+                                     (((uint32)    ((uchar) (A)[1])) << 8)   + \
+                                     (((uint32)    ((uchar) (A)[2])) << 16)  + \
+                                     (((uint32)    ((uchar) (A)[3])) << 24)) + \
+                         (((ulonglong) ((uchar) (A)[4])) << 32) +       \
+                         (((ulonglong) ((uchar) (A)[5])) << 40))
+#define uint8korr(A)	(ulonglong) (*((ulonglong *) (A)))
+#define sint8korr(A)	(longlong) (*((longlong *) (A)))
+
+#define int2store(T,A)	do { uchar *pT= (uchar*)(T);\
+                             *((uint16*)(pT))= (uint16) (A);\
+                        } while (0)
+  
+#define int3store(T,A)  do { *(T)=  (uchar) ((A));\
+                            *(T+1)=(uchar) (((uint) (A) >> 8));\
+                            *(T+2)=(uchar) (((A) >> 16));\
+                        } while (0)
+#define int4store(T,A)	do { uchar *pT= (uchar*)(T);\
+                             *((uint32 *) (pT))= (uint32) (A); \
+                        } while (0)
+
+#define int5store(T,A)  do { *(T)= (uchar)((A));\
+                             *((T)+1)=(uchar) (((A) >> 8));\
+                             *((T)+2)=(uchar) (((A) >> 16));\
+                             *((T)+3)=(uchar) (((A) >> 24));\
+                             *((T)+4)=(uchar) (((A) >> 32));\
+                        } while(0)
+#define int6store(T,A)  do { *(T)=    (uchar)((A));          \
+                             *((T)+1)=(uchar) (((A) >> 8));  \
+                             *((T)+2)=(uchar) (((A) >> 16)); \
+                             *((T)+3)=(uchar) (((A) >> 24)); \
+                             *((T)+4)=(uchar) (((A) >> 32)); \
+                             *((T)+5)=(uchar) (((A) >> 40)); \
+                        } while(0)
+#define int8store(T,A)	do { uchar *pT= (uchar*)(T);\
+                             *((ulonglong *) (pT))= (ulonglong) (A);\
+                        } while(0)
diff --git a/include/crypt_genhash_impl.h b/include/crypt_genhash_impl.h
new file mode 100644
index 00000000000..af5afd23e86
--- /dev/null
+++ b/include/crypt_genhash_impl.h
@@ -0,0 +1,32 @@
+/* defines and prototypes for using crypt_genhash_impl.cc */
+
+#ifndef CRYPT_HASHGEN_IMPL_H
+#define CRYPT_HASHGEN_IMPL_H
+#define	ROUNDS_DEFAULT	5000
+#define	ROUNDS_MIN	1000
+#define	ROUNDS_MAX	999999999
+#define	MIXCHARS	32
+#define CRYPT_SALT_LENGTH  20
+#define CRYPT_MAGIC_LENGTH  3
+#define CRYPT_PARAM_LENGTH 13
+#define SHA256_HASH_LENGTH 43
+#define CRYPT_MAX_PASSWORD_SIZE (CRYPT_SALT_LENGTH + \
+                                 SHA256_HASH_LENGTH + \
+                                 CRYPT_MAGIC_LENGTH + \
+                                 CRYPT_PARAM_LENGTH)
+
+int extract_user_salt(char **salt_begin,
+                      char **salt_end);
+C_MODE_START
+char *
+my_crypt_genhash(char *ctbuffer,
+                 size_t ctbufflen,
+                 const char *plaintext,
+                 int plaintext_len,
+                 const char *switchsalt,
+                 const char **params);
+void generate_user_salt(char *buffer, int buffer_len);
+void xor_string(char *to, int to_len, char *pattern, int pattern_len);
+
+C_MODE_END
+#endif
diff --git a/include/errmsg.h b/include/errmsg.h
index 64ec2df395c..b839060a881 100644
--- a/include/errmsg.h
+++ b/include/errmsg.h
@@ -16,8 +16,12 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
 
-/* Error messages for MySQL clients */
-/* (Error messages for the daemon are in sql/share/errmsg.txt) */
+/*
+  Error messages numbers for MySQL clients.
+  The error messages itself are in libmysql/errmsg.c
+
+  Error messages for the mysqld daemon are in sql/share/errmsg.txt
+*/
 
 #ifdef	__cplusplus
 extern "C" {
@@ -102,7 +106,9 @@ extern const char *client_errors[];	/* Error messages */
 #define CR_NEW_STMT_METADATA                    2057
 #define CR_ALREADY_CONNECTED                    2058
 #define CR_AUTH_PLUGIN_CANNOT_LOAD              2059
-#define CR_ERROR_LAST  /*Copy last error nr:*/  2059
+#define CR_DUPLICATE_CONNECTION_ATTR            2060
+#define CR_AUTH_PLUGIN_ERR                      2061
+#define CR_ERROR_LAST  /*Copy last error nr:*/  2061
 /* Add error numbers before CR_ERROR_LAST and change it accordingly. */
 
 #endif /* ERRMSG_INCLUDED */
diff --git a/include/ft_global.h b/include/ft_global.h
index 8a1069d6e62..8a77cbca014 100644
--- a/include/ft_global.h
+++ b/include/ft_global.h
@@ -43,11 +43,32 @@ struct _ft_vft
   void      (*reinit_search)(FT_INFO *);
 };
 
+typedef struct st_ft_info_ext FT_INFO_EXT;
+struct _ft_vft_ext
+{
+  uint      (*get_version)();        // Extended API version
+  ulonglong (*get_flags)();
+  ulonglong (*get_docid)(FT_INFO_EXT *);
+  ulonglong (*count_matches)(FT_INFO_EXT *);
+};
+
+/* Flags for extended FT API */
+#define FTS_ORDERED_RESULT                (LL(1) << 1)
+#define FTS_DOCID_IN_RESULT               (LL(1) << 2)
+
+#define FTS_DOC_ID_COL_NAME "FTS_DOC_ID"
+
 #ifndef FT_CORE
 struct st_ft_info
 {
   struct _ft_vft *please; /* INTERCAL style :-) */
 };
+
+struct st_ft_info_ext
+{
+  struct _ft_vft     *please; /* INTERCAL style :-) */
+  struct _ft_vft_ext *could_you;
+};
 #endif
 
 extern const char *ft_stopword_file;
diff --git a/include/little_endian.h b/include/little_endian.h
new file mode 100644
index 00000000000..7223fea648f
--- /dev/null
+++ b/include/little_endian.h
@@ -0,0 +1,75 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02111-1307  USA */
+
+/*
+  Data in little-endian format.
+*/
+
+#ifndef MY_BYTE_ORDER_ARCH_OPTIMIZED
+#define float4get(V,M)   memcpy(&V, (M), sizeof(float))
+#define float4store(V,M) memcpy(V, (&M), sizeof(float))
+#define float8get(V,M)   doubleget((V),(M))
+#define float8store(V,M) doublestore((V),(M))
+
+/* Bi-endian hardware.... */
+#if defined(__FLOAT_WORD_ORDER) && (__FLOAT_WORD_ORDER == __BIG_ENDIAN)
+#define doublestore(T,V) do { *(((char*)T)+0)=(char) ((uchar *) &V)[4];\
+                              *(((char*)T)+1)=(char) ((uchar *) &V)[5];\
+                              *(((char*)T)+2)=(char) ((uchar *) &V)[6];\
+                              *(((char*)T)+3)=(char) ((uchar *) &V)[7];\
+                              *(((char*)T)+4)=(char) ((uchar *) &V)[0];\
+                              *(((char*)T)+5)=(char) ((uchar *) &V)[1];\
+                              *(((char*)T)+6)=(char) ((uchar *) &V)[2];\
+                              *(((char*)T)+7)=(char) ((uchar *) &V)[3]; }\
+                         while(0)
+#define doubleget(V,M)   do { double def_temp;\
+                              ((uchar*) &def_temp)[0]=(M)[4];\
+                              ((uchar*) &def_temp)[1]=(M)[5];\
+                              ((uchar*) &def_temp)[2]=(M)[6];\
+                              ((uchar*) &def_temp)[3]=(M)[7];\
+                              ((uchar*) &def_temp)[4]=(M)[0];\
+                              ((uchar*) &def_temp)[5]=(M)[1];\
+                              ((uchar*) &def_temp)[6]=(M)[2];\
+                              ((uchar*) &def_temp)[7]=(M)[3];\
+                              (V) = def_temp; } while(0)
+#else /* Bi-endian hardware.... */
+
+/* Cast away type qualifiers (necessary as macro takes argument by value). */
+#define doublestore(T,V) memcpy((T), (void*) &V, sizeof(double))
+#define doubleget(V,M)	 memcpy(&V, (M), sizeof(double))
+
+#endif /* Bi-endian hardware.... */
+
+#endif /* !MY_BYTE_ORDER_ARCH_OPTIMIZED */
+
+#define ushortget(V,M)	do { uchar *pM= (uchar*)(M);V = uint2korr(pM);} while(0)
+#define shortget(V,M)	do { uchar *pM= (uchar*)(M);V = sint2korr(pM);} while(0)
+#define longget(V,M)	do { uchar *pM= (uchar*)(M);V = sint4korr(pM);} while(0)
+#define ulongget(V,M)   do { uchar *pM= (uchar*)(M);V = uint4korr(pM);} while(0)
+#define shortstore(T,V) int2store(T,V)
+#define longstore(T,V)	int4store(T,V)
+
+#ifndef floatstore
+/* Cast away type qualifiers (necessary as macro takes argument by value). */
+#define floatstore(T,V)  memcpy((T), (void*) (&V), sizeof(float))
+#define floatget(V,M)    memcpy(&V, (M), sizeof(float))
+#endif
+#ifndef doubleget
+#define doubleget(V,M)	 memcpy(&V, (M), sizeof(double))
+#define doublestore(T,V) memcpy((T), (void *) &V, sizeof(double))
+#endif /* doubleget */
+
+#define longlongget(V,M) memcpy(&V, (M), sizeof(ulonglong))
+#define longlongstore(T,V) memcpy((T), &V, sizeof(ulonglong))
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 969cb0058ac..5d2a6f80b75 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -588,6 +588,10 @@ my_bool my_charset_is_ascii_compatible(CHARSET_INFO *cs);
 extern size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
                               const char* fmt, va_list ap);
 
+uint32 my_convert(char *to, uint32 to_length, const CHARSET_INFO *to_cs,
+                  const char *from, uint32 from_length,
+                  const CHARSET_INFO *from_cs, uint *errors);
+
 #define	_MY_U	01	/* Upper case */
 #define	_MY_L	02	/* Lower case */
 #define	_MY_NMR	04	/* Numeral (digit) */
diff --git a/include/my_base.h b/include/my_base.h
index 4cbcb00425b..18b75f88393 100644
--- a/include/my_base.h
+++ b/include/my_base.h
@@ -46,7 +46,8 @@
 #define HA_OPEN_COPY			256     /* Open copy (for repair) */
 /* Internal temp table, used for temporary results */
 #define HA_OPEN_INTERNAL_TABLE          512
-#define HA_OPEN_MERGE_TABLE		1024
+#define HA_OPEN_NO_PSI_CALL             1024    /* Don't call/connect PSI */
+#define HA_OPEN_MERGE_TABLE		2048
 
 /* The following is parameter to ha_rkey() how to use key */
 
@@ -194,6 +195,11 @@ enum ha_extra_function {
   HA_EXTRA_ATTACH_CHILDREN,
   HA_EXTRA_IS_ATTACHED_CHILDREN,
   HA_EXTRA_DETACH_CHILDREN,
+  /*
+    Prepare table for export
+    (e.g. quiesce the table and write table metadata).
+  */
+  HA_EXTRA_EXPORT,
   HA_EXTRA_DETACH_CHILD,
   /* Inform handler we will force a close as part of flush */
   HA_EXTRA_PREPARE_FOR_FORCED_CLOSE
@@ -317,6 +323,23 @@ enum ha_base_keytype {
 #define HA_OPTION_RELIES_ON_SQL_LAYER   512
 #define HA_OPTION_NULL_FIELDS		1024
 #define HA_OPTION_PAGE_CHECKSUM		2048
+/*
+   STATS_PERSISTENT=1 has been specified in the SQL command (either CREATE
+   or ALTER TABLE). Table and index statistics that are collected by the
+   storage engine and used by the optimizer for query optimization will be
+   stored on disk and will not change after a server restart.
+*/
+#define HA_OPTION_STATS_PERSISTENT	4096
+/*
+  STATS_PERSISTENT=0 has been specified in CREATE/ALTER TABLE. Statistics
+  for the table will be wiped away on server shutdown and new ones recalculated
+  after the server is started again. If none of HA_OPTION_STATS_PERSISTENT or
+  HA_OPTION_NO_STATS_PERSISTENT is set, this means that the setting is not
+  explicitly set at table level and the corresponding table will use whatever
+  is the global server default.
+*/
+#define HA_OPTION_NO_STATS_PERSISTENT	8192
+
 /* .frm has extra create options in linked-list format */
 #define HA_OPTION_TEXT_CREATE_OPTIONS   (1L << 14)
 #define HA_OPTION_TEMP_COMPRESS_RECORD  (1L << 15)      /* set by isamchk */
@@ -334,7 +357,7 @@ enum ha_base_keytype {
 #define HA_CREATE_PAGE_CHECKSUM	32
 #define HA_CREATE_DELAY_KEY_WRITE 64
 #define HA_CREATE_RELIES_ON_SQL_LAYER 128
-
+#define HA_CREATE_INTERNAL_TABLE 256
 
 /* Flags used by start_bulk_insert */
 
@@ -458,7 +481,8 @@ enum ha_base_keytype {
 /* It is not possible to log this statement */
 #define HA_ERR_LOGGING_IMPOSSIBLE 170
 /* The event was corrupt, leading to illegal data being read */
-#define HA_ERR_CORRUPT_EVENT      171
+#define HA_ERR_CORRUPT_EVENT      171	 /* The event was corrupt, leading to
+                                            illegal data being read */
 #define HA_ERR_NEW_FILE	          172	 /* New file format */
 /* The event could not be processed no other handler error happened */
 #define HA_ERR_ROWS_EVENT_APPLY   173
@@ -466,16 +490,19 @@ enum ha_base_keytype {
 #define HA_ERR_FILE_TOO_SHORT	  175	 /* File too short */
 #define HA_ERR_WRONG_CRC	  176	 /* Wrong CRC on page */
 #define HA_ERR_TOO_MANY_CONCURRENT_TRXS 177 /*Too many active concurrent transactions */
+/* There's no explicitly listed partition in table for the given value */
 #define HA_ERR_NOT_IN_LOCK_PARTITIONS 178
 #define HA_ERR_INDEX_COL_TOO_LONG 179    /* Index column length exceeds limit */
 #define HA_ERR_INDEX_CORRUPT      180    /* Index corrupted */
 #define HA_ERR_UNDO_REC_TOO_BIG   181    /* Undo log record too big */
-#define HA_ERR_TABLE_IN_FK_CHECK  182    /* Table being used in foreign key check */
-#define HA_FTS_INVALID_DOCID      183	 /* Invalid InnoDB Doc ID */
-#define HA_ERR_ROW_NOT_VISIBLE    184
-#define HA_ERR_ABORTED_BY_USER    185
-#define HA_ERR_DISK_FULL          186
-#define HA_ERR_LAST               186    /* Copy of last error nr */
+#define HA_FTS_INVALID_DOCID      182	 /* Invalid InnoDB Doc ID */
+#define HA_ERR_TABLE_IN_FK_CHECK  183    /* Table being used in foreign key check */
+#define HA_ERR_TABLESPACE_EXISTS  184    /* The tablespace existed in storage engine */
+#define HA_ERR_TOO_MANY_FIELDS    185    /* Table has too many columns */
+#define HA_ERR_ROW_NOT_VISIBLE    186
+#define HA_ERR_ABORTED_BY_USER    187
+#define HA_ERR_DISK_FULL          188
+#define HA_ERR_LAST               188    /* Copy of last error nr */
 
 /* Number of different errors */
 #define HA_ERR_ERRORS            (HA_ERR_LAST - HA_ERR_FIRST + 1)
@@ -608,4 +635,17 @@ C_MODE_START
 typedef void (* invalidator_by_filename)(const char * filename);
 C_MODE_END
 
+
+enum durability_properties
+{
+  /*
+    Preserves the durability properties defined by the engine */
+  HA_REGULAR_DURABILITY= 0,
+  /* 
+     Ignore the durability properties defined by the engine and
+     write only in-memory entries.
+  */
+  HA_IGNORE_DURABILITY= 1
+};
+
 #endif /* _my_base_h */
diff --git a/include/my_byteorder.h b/include/my_byteorder.h
new file mode 100644
index 00000000000..1f29248bfb2
--- /dev/null
+++ b/include/my_byteorder.h
@@ -0,0 +1,54 @@
+#ifndef MY_BYTEORDER_INCLUDED
+#define MY_BYTEORDER_INCLUDED
+
+/* Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
+
+
+/*
+  Macro for reading 32-bit integer from network byte order (big-endian)
+  from an unaligned memory location.
+*/
+#define int4net(A)        (int32) (((uint32) ((uchar) (A)[3]))        | \
+                                  (((uint32) ((uchar) (A)[2])) << 8)  | \
+                                  (((uint32) ((uchar) (A)[1])) << 16) | \
+                                  (((uint32) ((uchar) (A)[0])) << 24))
+
+/*
+  Function-like macros for reading and storing in machine independent
+  format (low byte first). There are 'korr' (assume 'corrector') variants
+  for integer types, but 'get' (assume 'getter') for floating point types.
+*/
+#if defined(__i386__) || defined(_WIN32)
+#define MY_BYTE_ORDER_ARCH_OPTIMIZED
+#include "byte_order_generic_x86.h"
+#elif defined(__x86_64__)
+#include "byte_order_generic_x86_64.h"
+#else
+#include "byte_order_generic.h"
+#endif
+
+/*
+  Function-like macros for reading and storing in machine format from/to
+  short/long to/from some place in memory V should be a variable (not on
+  a register) and M a pointer to byte.
+*/
+#ifdef WORDS_BIGENDIAN
+#include "big_endian.h"
+#else
+#include "little_endian.h"
+#endif
+
+#endif /* MY_BYTEORDER_INCLUDED */
diff --git a/include/my_default.h b/include/my_default.h
new file mode 100644
index 00000000000..1d556de69ee
--- /dev/null
+++ b/include/my_default.h
@@ -0,0 +1,50 @@
+/* Copyright (C) 2013 Monty Program Ab
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/* Definitions for mysys/my_default.c */
+
+#ifndef MY_DEFAULT_INCLUDED
+#define MY_DEFAULT_INCLUDED
+
+C_MODE_START
+
+extern const char *my_defaults_extra_file;
+extern const char *my_defaults_group_suffix;
+extern const char *my_defaults_file;
+extern my_bool my_getopt_use_args_separator;
+extern my_bool my_getopt_is_args_separator(const char* arg);
+
+/* Define the type of function to be passed to process_default_option_files */
+typedef int (*Process_option_func)(void *ctx, const char *group_name,
+                                   const char *option);
+
+extern int get_defaults_options(int argc, char **argv,
+                                char **defaults, char **extra_defaults,
+                                char **group_suffix);
+extern int my_load_defaults(const char *conf_file, const char **groups,
+                            int *argc, char ***argv, const char ***);
+extern int load_defaults(const char *conf_file, const char **groups,
+                         int *argc, char ***argv);
+extern int my_search_option_files(const char *conf_file, int *argc,
+                                  char ***argv, uint *args_used,
+                                  Process_option_func func, void *func_ctx,
+                                  const char **default_directories);
+extern void free_defaults(char **argv);
+extern void my_print_default_files(const char *conf_file);
+extern void print_defaults(const char *conf_file, const char **groups);
+
+C_MODE_END
+
+#endif /* MY_DEFAULT_INCLUDED */
diff --git a/include/my_getopt.h b/include/my_getopt.h
index 589d9c9880c..2cbbca9cab9 100644
--- a/include/my_getopt.h
+++ b/include/my_getopt.h
@@ -17,7 +17,9 @@
 #ifndef _my_getopt_h
 #define _my_getopt_h
 
-#include "my_sys.h"                             /* loglevel */
+#include "my_sys.h"             /* loglevel */
+/* my_getopt and my_default are almost always used together */
+#include <my_default.h>
 
 C_MODE_START
 
@@ -85,7 +87,6 @@ struct my_option
   void       *app_type;                 /**< To be used by an application */
 };
 
-
 typedef my_bool (*my_get_one_option)(int, const struct my_option *, char *);
 
 /**
diff --git a/include/my_global.h b/include/my_global.h
index 7c133268f59..db71a60238f 100644
--- a/include/my_global.h
+++ b/include/my_global.h
@@ -49,11 +49,6 @@
 #define _POSIX_THREAD_CPUTIME
 #endif /* __CYGWIN__ */
 
-/* to make command line shorter we'll define USE_PRAGMA_INTERFACE here */
-#ifdef USE_PRAGMA_IMPLEMENTATION
-#define USE_PRAGMA_INTERFACE
-#endif
-
 #if defined(__OpenBSD__) && (OpenBSD >= 200411)
 #define HAVE_ERRNO_AS_DEFINE
 #endif
@@ -117,6 +112,7 @@
 /* Define missing access() modes. */
 #define F_OK 0
 #define W_OK 2
+#define R_OK 4                        /* Test for read permission.  */
 
 /* Define missing file locking constants. */
 #define F_RDLCK 1
@@ -335,6 +331,9 @@ C_MODE_END
 #ifdef HAVE_FCNTL_H
 #include <fcntl.h>
 #endif
+#ifdef HAVE_SYS_TIMEB_H
+#include <sys/timeb.h>				/* Avoid warnings on SCO */
+#endif
 #ifdef HAVE_SYS_STAT_H
 #include <sys/stat.h>
 #endif
@@ -1047,296 +1046,7 @@ typedef char		my_bool; /* Small bool */
 #define MY_HOW_OFTEN_TO_ALARM	2	/* How often we want info on screen */
 #define MY_HOW_OFTEN_TO_WRITE	10000	/* How often we want info on screen */
 
-/*
-  Define-funktions for reading and storing in machine independent format
-  (low byte first)
-*/
-
-/* Optimized store functions for Intel x86 */
-#if defined(__i386__) || defined(_WIN32)
-#define sint2korr(A)	(*((const int16 *) (A)))
-#define sint3korr(A)	((int32) ((((uchar) (A)[2]) & 128) ? \
-				  (((uint32) 255L << 24) | \
-				   (((uint32) (uchar) (A)[2]) << 16) |\
-				   (((uint32) (uchar) (A)[1]) << 8) | \
-				   ((uint32) (uchar) (A)[0])) : \
-				  (((uint32) (uchar) (A)[2]) << 16) |\
-				  (((uint32) (uchar) (A)[1]) << 8) | \
-				  ((uint32) (uchar) (A)[0])))
-#define sint4korr(A)	(*((const long *) (A)))
-#define uint2korr(A)	(*((const uint16 *) (A)))
-#if defined(HAVE_valgrind) && !defined(_WIN32)
-#define uint3korr(A)	(uint32) (((uint32) ((uchar) (A)[0])) +\
-				  (((uint32) ((uchar) (A)[1])) << 8) +\
-				  (((uint32) ((uchar) (A)[2])) << 16))
-#else
-/*
-   ATTENTION !
-   
-    Please, note, uint3korr reads 4 bytes (not 3) !
-    It means, that you have to provide enough allocated space !
-*/
-#define uint3korr(A)	(long) (*((const unsigned int *) (A)) & 0xFFFFFF)
-#endif /* HAVE_valgrind && !_WIN32 */
-#define uint4korr(A)	(*((const uint32 *) (A)))
-#define uint5korr(A)	((ulonglong)(((uint32) ((uchar) (A)[0])) +\
-				    (((uint32) ((uchar) (A)[1])) << 8) +\
-				    (((uint32) ((uchar) (A)[2])) << 16) +\
-				    (((uint32) ((uchar) (A)[3])) << 24)) +\
-				    (((ulonglong) ((uchar) (A)[4])) << 32))
-#define uint6korr(A)	((ulonglong)(((uint32)    ((uchar) (A)[0]))          + \
-                                     (((uint32)    ((uchar) (A)[1])) << 8)   + \
-                                     (((uint32)    ((uchar) (A)[2])) << 16)  + \
-                                     (((uint32)    ((uchar) (A)[3])) << 24)) + \
-                         (((ulonglong) ((uchar) (A)[4])) << 32) +       \
-                         (((ulonglong) ((uchar) (A)[5])) << 40))
-#define uint8korr(A)	(*((const ulonglong *) (A)))
-#define sint8korr(A)	(*((const longlong *) (A)))
-#define int2store(T,A)	*((uint16*) (T))= (uint16) (A)
-#define int3store(T,A)  do { *(T)=  (uchar) ((A));\
-                            *(T+1)=(uchar) (((uint) (A) >> 8));\
-                            *(T+2)=(uchar) (((A) >> 16)); } while (0)
-#define int4store(T,A)	*((long *) (T))= (long) (A)
-#define int5store(T,A)  do { *(T)= (uchar)((A));\
-                             *((T)+1)=(uchar) (((A) >> 8));\
-                             *((T)+2)=(uchar) (((A) >> 16));\
-                             *((T)+3)=(uchar) (((A) >> 24)); \
-                             *((T)+4)=(uchar) (((A) >> 32)); } while(0)
-#define int6store(T,A)  do { *(T)=    (uchar)((A));          \
-                             *((T)+1)=(uchar) (((A) >> 8));  \
-                             *((T)+2)=(uchar) (((A) >> 16)); \
-                             *((T)+3)=(uchar) (((A) >> 24)); \
-                             *((T)+4)=(uchar) (((A) >> 32)); \
-                             *((T)+5)=(uchar) (((A) >> 40)); } while(0)
-#define int8store(T,A)	*((ulonglong *) (T))= (ulonglong) (A)
-
-typedef union {
-  double v;
-  long m[2];
-} doubleget_union;
-#define doubleget(V,M)	\
-do { doubleget_union _tmp; \
-     _tmp.m[0] = *((const long*)(M)); \
-     _tmp.m[1] = *(((const long*) (M))+1); \
-     (V) = _tmp.v; } while(0)
-#define doublestore(T,V) do { *((long *) T) = ((const doubleget_union *)&V)->m[0]; \
-			     *(((long *) T)+1) = ((const doubleget_union *)&V)->m[1]; \
-                         } while (0)
-#define float4get(V,M)   do { *((float *) &(V)) = *((const float*) (M)); } while(0)
-#define float8get(V,M)   doubleget((V),(M))
-#define float4store(V,M) memcpy((uchar*) V,(uchar*) (&M),sizeof(float))
-#define floatstore(T,V)  memcpy((uchar*)(T), (uchar*)(&V),sizeof(float))
-#define floatget(V,M)    memcpy((uchar*) &V,(uchar*) (M),sizeof(float))
-#define float8store(V,M) doublestore((V),(M))
-#else
-
-/*
-  We're here if it's not a IA-32 architecture (Win32 and UNIX IA-32 defines
-  were done before)
-*/
-#define sint2korr(A)	(int16) (((int16) ((uchar) (A)[0])) +\
-				 ((int16) ((int16) (A)[1]) << 8))
-#define sint3korr(A)	((int32) ((((uchar) (A)[2]) & 128) ? \
-				  (((uint32) 255L << 24) | \
-				   (((uint32) (uchar) (A)[2]) << 16) |\
-				   (((uint32) (uchar) (A)[1]) << 8) | \
-				   ((uint32) (uchar) (A)[0])) : \
-				  (((uint32) (uchar) (A)[2]) << 16) |\
-				  (((uint32) (uchar) (A)[1]) << 8) | \
-				  ((uint32) (uchar) (A)[0])))
-#define sint4korr(A)	(int32) (((int32) ((uchar) (A)[0])) +\
-				(((int32) ((uchar) (A)[1]) << 8)) +\
-				(((int32) ((uchar) (A)[2]) << 16)) +\
-				(((int32) ((int16) (A)[3]) << 24)))
-#define sint8korr(A)	(longlong) uint8korr(A)
-#define uint2korr(A)	(uint16) (((uint16) ((uchar) (A)[0])) +\
-				  ((uint16) ((uchar) (A)[1]) << 8))
-#define uint3korr(A)	(uint32) (((uint32) ((uchar) (A)[0])) +\
-				  (((uint32) ((uchar) (A)[1])) << 8) +\
-				  (((uint32) ((uchar) (A)[2])) << 16))
-#define uint4korr(A)	(uint32) (((uint32) ((uchar) (A)[0])) +\
-				  (((uint32) ((uchar) (A)[1])) << 8) +\
-				  (((uint32) ((uchar) (A)[2])) << 16) +\
-				  (((uint32) ((uchar) (A)[3])) << 24))
-#define uint5korr(A)	((ulonglong)(((uint32) ((uchar) (A)[0])) +\
-				    (((uint32) ((uchar) (A)[1])) << 8) +\
-				    (((uint32) ((uchar) (A)[2])) << 16) +\
-				    (((uint32) ((uchar) (A)[3])) << 24)) +\
-				    (((ulonglong) ((uchar) (A)[4])) << 32))
-#define uint6korr(A)	((ulonglong)(((uint32)    ((uchar) (A)[0]))          + \
-                                     (((uint32)    ((uchar) (A)[1])) << 8)   + \
-                                     (((uint32)    ((uchar) (A)[2])) << 16)  + \
-                                     (((uint32)    ((uchar) (A)[3])) << 24)) + \
-                         (((ulonglong) ((uchar) (A)[4])) << 32) +       \
-                         (((ulonglong) ((uchar) (A)[5])) << 40))
-#define uint8korr(A)	((ulonglong)(((uint32) ((uchar) (A)[0])) +\
-				    (((uint32) ((uchar) (A)[1])) << 8) +\
-				    (((uint32) ((uchar) (A)[2])) << 16) +\
-				    (((uint32) ((uchar) (A)[3])) << 24)) +\
-			(((ulonglong) (((uint32) ((uchar) (A)[4])) +\
-				    (((uint32) ((uchar) (A)[5])) << 8) +\
-				    (((uint32) ((uchar) (A)[6])) << 16) +\
-				    (((uint32) ((uchar) (A)[7])) << 24))) <<\
-				    32))
-#define int2store(T,A)       do { uint def_temp= (uint) (A) ;\
-                                  *((uchar*) (T))=  (uchar)(def_temp); \
-                                   *((uchar*) (T)+1)=(uchar)((def_temp >> 8)); \
-                             } while(0)
-#define int3store(T,A)       do { /*lint -save -e734 */\
-                                  *((uchar*)(T))=(uchar) ((A));\
-                                  *((uchar*) (T)+1)=(uchar) (((A) >> 8));\
-                                  *((uchar*)(T)+2)=(uchar) (((A) >> 16)); \
-                                  /*lint -restore */} while(0)
-#define int4store(T,A)       do { *((char *)(T))=(char) ((A));\
-                                  *(((char *)(T))+1)=(char) (((A) >> 8));\
-                                  *(((char *)(T))+2)=(char) (((A) >> 16));\
-                                  *(((char *)(T))+3)=(char) (((A) >> 24)); } while(0)
-#define int5store(T,A)       do { *((char *)(T))=     (char)((A));  \
-                                  *(((char *)(T))+1)= (char)(((A) >> 8)); \
-                                  *(((char *)(T))+2)= (char)(((A) >> 16)); \
-                                  *(((char *)(T))+3)= (char)(((A) >> 24)); \
-                                  *(((char *)(T))+4)= (char)(((A) >> 32)); \
-		                } while(0)
-#define int6store(T,A)       do { *((char *)(T))=     (char)((A)); \
-                                  *(((char *)(T))+1)= (char)(((A) >> 8)); \
-                                  *(((char *)(T))+2)= (char)(((A) >> 16)); \
-                                  *(((char *)(T))+3)= (char)(((A) >> 24)); \
-                                  *(((char *)(T))+4)= (char)(((A) >> 32)); \
-                                  *(((char *)(T))+5)= (char)(((A) >> 40)); \
-                                } while(0)
-#define int8store(T,A)       do { uint def_temp= (uint) (A), def_temp2= (uint) ((A) >> 32); \
-                                  int4store((T),def_temp); \
-                                  int4store((T+4),def_temp2); } while(0)
-#ifdef WORDS_BIGENDIAN
-#define float4store(T,A) do { *(T)= ((uchar *) &A)[3];\
-                              *((T)+1)=(char) ((uchar *) &A)[2];\
-                              *((T)+2)=(char) ((uchar *) &A)[1];\
-                              *((T)+3)=(char) ((uchar *) &A)[0]; } while(0)
-
-#define float4get(V,M)   do { float def_temp;\
-                              ((uchar*) &def_temp)[0]=(M)[3];\
-                              ((uchar*) &def_temp)[1]=(M)[2];\
-                              ((uchar*) &def_temp)[2]=(M)[1];\
-                              ((uchar*) &def_temp)[3]=(M)[0];\
-                              (V)=def_temp; } while(0)
-#define float8store(T,V) do { *(T)= ((uchar *) &V)[7];\
-                              *((T)+1)=(char) ((uchar *) &V)[6];\
-                              *((T)+2)=(char) ((uchar *) &V)[5];\
-                              *((T)+3)=(char) ((uchar *) &V)[4];\
-                              *((T)+4)=(char) ((uchar *) &V)[3];\
-                              *((T)+5)=(char) ((uchar *) &V)[2];\
-                              *((T)+6)=(char) ((uchar *) &V)[1];\
-                              *((T)+7)=(char) ((uchar *) &V)[0]; } while(0)
-
-#define float8get(V,M)   do { double def_temp;\
-                              ((uchar*) &def_temp)[0]=(M)[7];\
-                              ((uchar*) &def_temp)[1]=(M)[6];\
-                              ((uchar*) &def_temp)[2]=(M)[5];\
-                              ((uchar*) &def_temp)[3]=(M)[4];\
-                              ((uchar*) &def_temp)[4]=(M)[3];\
-                              ((uchar*) &def_temp)[5]=(M)[2];\
-                              ((uchar*) &def_temp)[6]=(M)[1];\
-                              ((uchar*) &def_temp)[7]=(M)[0];\
-                              (V) = def_temp; } while(0)
-#else
-#define float4get(V,M)   memcpy(&V, (M), sizeof(float))
-#define float4store(V,M) memcpy(V, (&M), sizeof(float))
-
-#if defined(__FLOAT_WORD_ORDER) && (__FLOAT_WORD_ORDER == __BIG_ENDIAN)
-#define doublestore(T,V) do { *(((char*)T)+0)=(char) ((uchar *) &V)[4];\
-                              *(((char*)T)+1)=(char) ((uchar *) &V)[5];\
-                              *(((char*)T)+2)=(char) ((uchar *) &V)[6];\
-                              *(((char*)T)+3)=(char) ((uchar *) &V)[7];\
-                              *(((char*)T)+4)=(char) ((uchar *) &V)[0];\
-                              *(((char*)T)+5)=(char) ((uchar *) &V)[1];\
-                              *(((char*)T)+6)=(char) ((uchar *) &V)[2];\
-                              *(((char*)T)+7)=(char) ((uchar *) &V)[3]; }\
-                         while(0)
-#define doubleget(V,M)   do { double def_temp;\
-                              ((uchar*) &def_temp)[0]=(M)[4];\
-                              ((uchar*) &def_temp)[1]=(M)[5];\
-                              ((uchar*) &def_temp)[2]=(M)[6];\
-                              ((uchar*) &def_temp)[3]=(M)[7];\
-                              ((uchar*) &def_temp)[4]=(M)[0];\
-                              ((uchar*) &def_temp)[5]=(M)[1];\
-                              ((uchar*) &def_temp)[6]=(M)[2];\
-                              ((uchar*) &def_temp)[7]=(M)[3];\
-                              (V) = def_temp; } while(0)
-#endif /* __FLOAT_WORD_ORDER */
-
-#define float8get(V,M)   doubleget((V),(M))
-#define float8store(V,M) doublestore((V),(M))
-#endif /* WORDS_BIGENDIAN */
-
-#endif /* __i386__ OR _WIN32 */
-
-/*
-  Macro for reading 32-bit integer from network byte order (big-endian)
-  from unaligned memory location.
-*/
-#define int4net(A)        (int32) (((uint32) ((uchar) (A)[3]))        |\
-				  (((uint32) ((uchar) (A)[2])) << 8)  |\
-				  (((uint32) ((uchar) (A)[1])) << 16) |\
-				  (((uint32) ((uchar) (A)[0])) << 24))
-/*
-  Define-funktions for reading and storing in machine format from/to
-  short/long to/from some place in memory V should be a (not
-  register) variable, M is a pointer to byte
-*/
-
-#ifdef WORDS_BIGENDIAN
-
-#define ushortget(V,M)  do { V = (uint16) (((uint16) ((uchar) (M)[1]))+\
-                                 ((uint16) ((uint16) (M)[0]) << 8)); } while(0)
-#define shortget(V,M)   do { V = (short) (((short) ((uchar) (M)[1]))+\
-                                 ((short) ((short) (M)[0]) << 8)); } while(0)
-#define longget(V,M)    do { int32 def_temp;\
-                             ((uchar*) &def_temp)[0]=(M)[0];\
-                             ((uchar*) &def_temp)[1]=(M)[1];\
-                             ((uchar*) &def_temp)[2]=(M)[2];\
-                             ((uchar*) &def_temp)[3]=(M)[3];\
-                             (V)=def_temp; } while(0)
-#define ulongget(V,M)   do { uint32 def_temp;\
-                            ((uchar*) &def_temp)[0]=(M)[0];\
-                            ((uchar*) &def_temp)[1]=(M)[1];\
-                            ((uchar*) &def_temp)[2]=(M)[2];\
-                            ((uchar*) &def_temp)[3]=(M)[3];\
-                            (V)=def_temp; } while(0)
-#define shortstore(T,A) do { uint def_temp=(uint) (A) ;\
-                             *(((char*)T)+1)=(char)(def_temp); \
-                             *(((char*)T)+0)=(char)(def_temp >> 8); } while(0)
-#define longstore(T,A)  do { *(((char*)T)+3)=((A));\
-                             *(((char*)T)+2)=(((A) >> 8));\
-                             *(((char*)T)+1)=(((A) >> 16));\
-                             *(((char*)T)+0)=(((A) >> 24)); } while(0)
-
-#define floatget(V,M)    memcpy(&V, (M), sizeof(float))
-#define floatstore(T,V)  memcpy((T), (void*) (&V), sizeof(float))
-#define doubleget(V,M)	 memcpy(&V, (M), sizeof(double))
-#define doublestore(T,V) memcpy((T), (void *) &V, sizeof(double))
-#define longlongget(V,M) memcpy(&V, (M), sizeof(ulonglong))
-#define longlongstore(T,V) memcpy((T), &V, sizeof(ulonglong))
-
-#else
-
-#define ushortget(V,M)	do { V = uint2korr(M); } while(0)
-#define shortget(V,M)	do { V = sint2korr(M); } while(0)
-#define longget(V,M)	do { V = sint4korr(M); } while(0)
-#define ulongget(V,M)   do { V = uint4korr(M); } while(0)
-#define shortstore(T,V) int2store(T,V)
-#define longstore(T,V)	int4store(T,V)
-#ifndef floatstore
-#define floatstore(T,V)  memcpy((T), (void *) (&V), sizeof(float))
-#define floatget(V,M)    memcpy(&V, (M), sizeof(float))
-#endif
-#ifndef doubleget
-#define doubleget(V,M)	 memcpy(&V, (M), sizeof(double))
-#define doublestore(T,V) memcpy((T), (void *) &V, sizeof(double))
-#endif /* doubleget */
-#define longlongget(V,M) memcpy(&V, (M), sizeof(ulonglong))
-#define longlongstore(T,V) memcpy((T), &V, sizeof(ulonglong))
-
-#endif /* WORDS_BIGENDIAN */
+#include <my_byteorder.h>
 
 #ifdef HAVE_CHARSET_utf8
 #define MYSQL_UNIVERSAL_CLIENT_CHARSET "utf8"
@@ -1397,10 +1107,6 @@ static inline char *dlerror(void)
 #endif
 
 /* Define some useful general macros (should be done after all headers). */
-#if !defined(max)
-#define max(a, b)	((a) > (b) ? (a) : (b))
-#define min(a, b)	((a) < (b) ? (a) : (b))
-#endif  
 #define MY_MAX(a, b)	((a) > (b) ? (a) : (b))
 #define MY_MIN(a, b)	((a) < (b) ? (a) : (b))
 
diff --git a/include/my_handler_errors.h b/include/my_handler_errors.h
index f2c51773e83..24b977c38ce 100644
--- a/include/my_handler_errors.h
+++ b/include/my_handler_errors.h
@@ -84,8 +84,10 @@ static const char *handler_error_messages[]=
   "Index column length exceeds limit",
   "Index corrupted",
   "Undo record too big",
-  "Table is being used in foreign key check",
   "Invalid InnoDB FTS Doc ID",
+  "Table is being used in foreign key check",
+  "Tablespace already exists",
+  "Too many columns",
   "Row is not visible by the current transaction",
   "Operation was interrupted by end user (probably kill command?)",
   "Disk full"
diff --git a/include/my_md5.h b/include/my_md5.h
index 4f90541067b..5a0c60e7bfa 100644
--- a/include/my_md5.h
+++ b/include/my_md5.h
@@ -1,7 +1,8 @@
 #ifndef MY_MD5_INCLUDED
 #define MY_MD5_INCLUDED
 
-/* Copyright (C) 2000 MySQL AB
+/* Copyright (c) 2000, 2012, Oracle and/or its affiliates.
+   Copyright (c) 2013 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -16,79 +17,36 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
-/* See md5.c for explanation and copyright information.  */
+#include "m_string.h"
 
-/*
- * $FreeBSD: src/contrib/cvs/lib/md5.h,v 1.2 1999/12/11 15:10:02 peter Exp $
- */
+#define MD5_HASH_SIZE 16 /* Hash size in bytes */
 
-#if defined(HAVE_YASSL) || defined(HAVE_OPENSSL)
 /*
-  Use MD5 implementation provided by the SSL libraries.
+  Wrapper function for MD5 implementation.
 */
-
-#if defined(HAVE_YASSL)
-
-C_MODE_START
-
-void my_md5_hash(char *digest, const char *buf, int len);
-
-C_MODE_END
-
-#else /* HAVE_YASSL */
-
-#include <openssl/md5.h>
-
-#define MY_MD5_HASH(digest, buf, len) \
-do { \
-  MD5_CTX ctx; \
-  MD5_Init (&ctx); \
-  MD5_Update (&ctx, buf, len); \
-  MD5_Final (digest, &ctx); \
-} while (0)
-
-#endif /* HAVE_YASSL */
-
-#else /* HAVE_YASSL || HAVE_OPENSSL */
-/* Fallback to the MySQL's implementation. */
-
-/* Unlike previous versions of this code, uint32 need not be exactly
-   32 bits, merely 32 bits or more.  Choosing a data type which is 32
-   bits instead of 64 is not important; speed is considerably more
-   important.  ANSI guarantees that "unsigned long" will be big enough,
-   and always using it seems to have few disadvantages.  */
-typedef uint32 cvs_uint32;
-
-typedef struct {
-  cvs_uint32 buf[4];
-  cvs_uint32 bits[2];
-  unsigned char in[64];
-} my_MD5Context;
-
-C_MODE_START
-
-void my_MD5Init (my_MD5Context *context);
-void my_MD5Update (my_MD5Context *context,
-                   unsigned char const *buf, unsigned len);
-void my_MD5Final (unsigned char digest[16],
-                  my_MD5Context *context);
-
-C_MODE_END
-
-#define MY_MD5_HASH(digest,buf,len) \
-do { \
-  my_MD5Context ctx; \
-  my_MD5Init (&ctx); \
-  my_MD5Update (&ctx, buf, len); \
-  my_MD5Final (digest, &ctx); \
-} while (0)
-
-#endif /* defined(HAVE_YASSL) || defined(HAVE_OPENSSL) */
-
-C_MODE_START
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 void compute_md5_hash(char *digest, const char *buf, int len);
 
-C_MODE_END
+/*
+  Convert an array of bytes to a hexadecimal representation.
+
+  Used to generate a hexadecimal representation of a message digest.
+*/
+static inline void array_to_hex(char *to, const unsigned char *str, uint len)
+{
+  const unsigned char *str_end= str + len;
+  for (; str != str_end; ++str)
+  {
+    *to++= _dig_vec_lower[((uchar) *str) >> 4];
+    *to++= _dig_vec_lower[((uchar) *str) & 0x0F];
+  }
+}
+
+#ifdef __cplusplus
+}
+#endif
 
 #endif /* MY_MD5_INCLUDED */
diff --git a/include/my_rnd.h b/include/my_rnd.h
new file mode 100644
index 00000000000..b4a5d735811
--- /dev/null
+++ b/include/my_rnd.h
@@ -0,0 +1,32 @@
+/* Copyright (C) 2013 Monty Program Ab
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 or later of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifndef _my_rnd_h
+#define _my_rnd_h
+
+C_MODE_START
+
+struct my_rnd_struct {
+  unsigned long seed1,seed2,max_value;
+  double max_value_dbl;
+};
+
+void my_rnd_init(struct my_rnd_struct *rand_st, ulong seed1, ulong seed2);
+double my_rnd(struct my_rnd_struct *rand_st);
+double my_rnd_ssl(struct my_rnd_struct *rand_st);
+
+C_MODE_END
+
+#endif /* _my_rnd_h */
diff --git a/include/my_sys.h b/include/my_sys.h
index 42ee9c915da..1a9c5f887a8 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -173,8 +173,6 @@ extern void *my_memdup(const void *from,size_t length,myf MyFlags);
 extern char *my_strdup(const char *from,myf MyFlags);
 extern char *my_strndup(const char *from, size_t length, myf MyFlags);
 
-extern int sf_leaking_memory; /* set to 1 to disable memleak detection */
-
 #ifdef HAVE_LARGE_PAGES
 extern uint my_get_large_page_size(void);
 extern uchar * my_large_malloc(size_t size, myf my_flags);
@@ -198,14 +196,18 @@ extern void my_large_free(uchar *ptr);
 #endif /* GNUC */
 #define my_alloca(SZ) alloca((size_t) (SZ))
 #define my_afree(PTR) ((void)0)
+#define my_safe_alloca(size, max_alloca_sz) ((size <= max_alloca_sz) ? \
+                                             my_alloca(size) : \
+                                             my_malloc(size, MYF(0)))
+#define my_safe_afree(ptr, size, max_alloca_sz) if (size > max_alloca_sz) \
+                                               my_free(ptr)
 #else
 #define my_alloca(SZ) my_malloc(SZ,MYF(MY_FAE))
 #define my_afree(PTR) my_free(PTR)
+#define my_safe_alloca(size, max_alloca_sz) my_alloca(size)
+#define my_safe_afree(ptr, size, max_alloca_sz) my_afree(ptr)
 #endif /* HAVE_ALLOCA */
 
-#define my_safe_alloca(size, min_length) ((size <= min_length) ? my_alloca(size) : my_malloc(size,MYF(MY_FAE)))
-#define my_safe_afree(ptr, size, min_length) ((size <= min_length) ? my_afree(ptr) : my_free(ptr))
-
 #ifndef errno				/* did we already get it? */
 #ifdef HAVE_ERRNO_AS_DEFINE
 #include <errno.h>			/* errno is a define */
@@ -222,6 +224,7 @@ extern void (*fatal_error_handler_hook)(uint my_err, const char *str,
 				       myf MyFlags);
 extern uint my_file_limit;
 extern ulonglong my_thread_stack_size;
+extern int sf_leaking_memory; /* set to 1 to disable memleak detection */
 
 extern void (*proc_info_hook)(void *, const PSI_stage_info *, PSI_stage_info *,
                               const char *, const char *, const unsigned int);
@@ -264,11 +267,6 @@ extern my_bool  my_disable_locking, my_disable_async_io,
 extern my_bool my_disable_sync;
 extern char	wild_many,wild_one,wild_prefix;
 extern const char *charsets_dir;
-/* from default.c */
-extern const char *my_defaults_extra_file;
-extern const char *my_defaults_group_suffix;
-extern const char *my_defaults_file;
-
 extern my_bool timed_mutexes;
 
 enum loglevel {
@@ -565,13 +563,8 @@ my_off_t my_b_safe_tell(IO_CACHE* info); /* picks the correct tell() */
 typedef uint32 ha_checksum;
 extern ulong my_crc_dbug_check;
 
-/* Define the type of function to be passed to process_default_option_files */
-typedef int (*Process_option_func)(void *ctx, const char *group_name,
-                                   const char *option);
-
 #include <my_alloc.h>
 
-
 	/* Prototypes for mysys and my_func functions */
 
 extern int my_copy(const char *from,const char *to,myf MyFlags);
@@ -630,6 +623,13 @@ extern int      my_access(const char *path, int amode);
 extern int check_if_legal_filename(const char *path);
 extern int check_if_legal_tablename(const char *path);
 
+#ifdef __WIN__
+extern my_bool is_filename_allowed(const char *name, size_t length,
+                   my_bool allow_current_dir);
+#else /* __WIN__ */
+# define is_filename_allowed(name, length, allow_cwd) (TRUE)
+#endif /* __WIN__ */ 
+
 #ifdef _WIN32
 extern int nt_share_delete(const char *name,myf MyFlags);
 #define my_delete_allow_opened(fname,flags)  nt_share_delete((fname),(flags))
@@ -662,15 +662,16 @@ extern void thr_set_sync_wait_callback(void (*before_sync)(void),
 extern int my_sync(File fd, myf my_flags);
 extern int my_sync_dir(const char *dir_name, myf my_flags);
 extern int my_sync_dir_by_file(const char *file_name, myf my_flags);
-extern void my_error(int nr,myf MyFlags, ...);
+extern const char *my_get_err_msg(uint nr);
+extern void my_error(uint nr,myf MyFlags, ...);
 extern void my_printf_error(uint my_err, const char *format,
                             myf MyFlags, ...)
                             ATTRIBUTE_FORMAT(printf, 2, 4);
 extern void my_printv_error(uint error, const char *format, myf MyFlags,
                             va_list ap);
 extern int my_error_register(const char** (*get_errmsgs) (),
-                             int first, int last);
-extern const char **my_error_unregister(int first, int last);
+                             uint first, uint last);
+extern const char **my_error_unregister(uint first, uint last);
 extern void my_message(uint my_err, const char *str,myf MyFlags);
 extern void my_message_stderr(uint my_err, const char *str, myf MyFlags);
 extern my_bool my_init(void);
@@ -781,7 +782,8 @@ extern size_t my_b_gets(IO_CACHE *info, char *to, size_t max_length);
 extern my_off_t my_b_filelength(IO_CACHE *info);
 extern size_t my_b_write_backtick_quote(IO_CACHE *info, const char *str,
                                         size_t len);
-extern size_t my_b_printf(IO_CACHE *info, const char* fmt, ...);
+extern size_t my_b_printf(IO_CACHE *info, const char* fmt, ...)
+  ATTRIBUTE_FORMAT(printf, 2, 3);
 extern size_t my_b_vprintf(IO_CACHE *info, const char* fmt, va_list ap);
 extern my_bool open_cached_file(IO_CACHE *cache,const char *dir,
 				 const char *prefix, size_t cache_size,
@@ -860,22 +862,6 @@ static inline char *safe_strdup_root(MEM_ROOT *root, const char *str)
 }
 extern char *strmake_root(MEM_ROOT *root,const char *str,size_t len);
 extern void *memdup_root(MEM_ROOT *root,const void *str, size_t len);
-extern int get_defaults_options(int argc, char **argv,
-                                char **defaults, char **extra_defaults,
-                                char **group_suffix);
-extern my_bool my_getopt_use_args_separator;
-extern my_bool my_getopt_is_args_separator(const char* arg);
-extern int my_load_defaults(const char *conf_file, const char **groups,
-                            int *argc, char ***argv, const char ***);
-extern int load_defaults(const char *conf_file, const char **groups,
-                         int *argc, char ***argv);
-extern int my_search_option_files(const char *conf_file, int *argc,
-                                  char ***argv, uint *args_used,
-                                  Process_option_func func, void *func_ctx,
-                                  const char **default_directories);
-extern void free_defaults(char **argv);
-extern void my_print_default_files(const char *conf_file);
-extern void print_defaults(const char *conf_file, const char **groups);
 extern my_bool my_compress(uchar *, size_t *, size_t *);
 extern my_bool my_uncompress(uchar *, size_t , size_t *);
 extern uchar *my_compress_alloc(const uchar *packet, size_t *len,
@@ -967,14 +953,6 @@ void my_uuid(uchar *guid);
 void my_uuid2str(const uchar *guid, char *s);
 void my_uuid_end();
 
-struct my_rnd_struct {
-  unsigned long seed1,seed2,max_value;
-  double max_value_dbl;
-};
-
-void my_rnd_init(struct my_rnd_struct *rand_st, ulong seed1, ulong seed2);
-double my_rnd(struct my_rnd_struct *rand_st);
-
 /* character sets */
 extern uint get_charset_number(const char *cs_name, uint cs_flags);
 extern uint get_collation_number(const char *name);
@@ -1037,6 +1015,5 @@ void my_init_mysys_psi_keys(void);
 
 struct st_mysql_file;
 extern struct st_mysql_file *mysql_stdin;
-
 C_MODE_END
 #endif /* _my_sys_h */
diff --git a/include/my_time.h b/include/my_time.h
index 9bd545bb850..c7a3e17d236 100644
--- a/include/my_time.h
+++ b/include/my_time.h
@@ -138,8 +138,8 @@ void my_init_time(void);
     estimate.
 
   RETURN VALUES
-    FALSE   The value seems sane
-    TRUE    The MYSQL_TIME value is definitely out of range
+    TRUE   The value seems sane
+    FALSE  The MYSQL_TIME value is definitely out of range
 */
 
 static inline my_bool validate_timestamp_range(const MYSQL_TIME *t)
diff --git a/include/mysql/client_authentication.h b/include/mysql/client_authentication.h
new file mode 100644
index 00000000000..2bd2fc98bac
--- /dev/null
+++ b/include/mysql/client_authentication.h
@@ -0,0 +1,13 @@
+#ifndef CLIENT_AUTHENTICATION_H
+#define CLIENT_AUTHENTICATION_H
+#include "mysql.h"
+#include "mysql/client_plugin.h"
+
+C_MODE_START
+int sha256_password_auth_client(MYSQL_PLUGIN_VIO *vio, MYSQL *mysql);
+int sha256_password_init(char *, size_t, int, va_list);
+int sha256_password_deinit(void);
+C_MODE_END
+
+#endif
+
diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h
index 38573180232..4220e73ee11 100644
--- a/include/mysql/plugin.h
+++ b/include/mysql/plugin.h
@@ -45,6 +45,8 @@ class Item;
 #define MYSQL_THD void*
 #endif
 
+typedef void * MYSQL_PLUGIN;
+
 #include <mysql/services.h>
 
 #define MYSQL_XIDDATASIZE 128
@@ -69,10 +71,10 @@ typedef struct st_mysql_xid MYSQL_XID;
 */
 
 /* MySQL plugin interface version */
-#define MYSQL_PLUGIN_INTERFACE_VERSION 0x0103
+#define MYSQL_PLUGIN_INTERFACE_VERSION 0x0104
 
 /* MariaDB plugin interface version */
-#define MARIA_PLUGIN_INTERFACE_VERSION 0x0104
+#define MARIA_PLUGIN_INTERFACE_VERSION 0x0105
 
 /*
   The allowable types of plugins
@@ -85,7 +87,8 @@ typedef struct st_mysql_xid MYSQL_XID;
 #define MYSQL_AUDIT_PLUGIN           5  /* The Audit plugin type        */
 #define MYSQL_REPLICATION_PLUGIN     6	/* The replication plugin type */
 #define MYSQL_AUTHENTICATION_PLUGIN  7  /* The authentication plugin type */
-#define MYSQL_MAX_PLUGIN_TYPE_NUM    8  /* The number of plugin types   */
+#define MYSQL_VALIDATE_PASSWORD_PLUGIN  8   /* validate password plugin type */
+#define MYSQL_MAX_PLUGIN_TYPE_NUM    9  /* The number of plugin types   */
 
 /* We use the following strings to define licenses for plugins */
 #define PLUGIN_LICENSE_PROPRIETARY 0
@@ -558,7 +561,7 @@ struct handlerton;
 /*
   API for Replication plugin. (MYSQL_REPLICATION_PLUGIN)
 */
- #define MYSQL_REPLICATION_INTERFACE_VERSION 0x0100
+ #define MYSQL_REPLICATION_INTERFACE_VERSION 0x0200
  
  /**
     Replication plugin descriptor
@@ -606,6 +609,7 @@ int thd_sql_command(const MYSQL_THD thd);
 void **thd_ha_data(const MYSQL_THD thd, const struct handlerton *hton);
 void thd_storage_lock_wait(MYSQL_THD thd, long long value);
 int thd_tx_isolation(const MYSQL_THD thd);
+int thd_tx_is_read_only(const MYSQL_THD thd);
 char *thd_security_context(MYSQL_THD thd, char *buffer, unsigned int length,
                            unsigned int max_query_len);
 /* Increments the row counter, see THD::row_count */
diff --git a/include/mysql/plugin_audit.h.pp b/include/mysql/plugin_audit.h.pp
index fbd4ec2dd3f..c3ba7eec0d6 100644
--- a/include/mysql/plugin_audit.h.pp
+++ b/include/mysql/plugin_audit.h.pp
@@ -1,4 +1,5 @@
 #include "plugin.h"
+typedef void * MYSQL_PLUGIN;
 #include <mysql/services.h>
 #include <mysql/service_my_snprintf.h>
 extern struct my_snprintf_service_st {
@@ -232,6 +233,7 @@ int thd_sql_command(const void* thd);
 void **thd_ha_data(const void* thd, const struct handlerton *hton);
 void thd_storage_lock_wait(void* thd, long long value);
 int thd_tx_isolation(const void* thd);
+int thd_tx_is_read_only(const void* thd);
 char *thd_security_context(void* thd, char *buffer, unsigned int length,
                            unsigned int max_query_len);
 void thd_inc_row_count(void* thd);
diff --git a/include/mysql/plugin_auth.h.pp b/include/mysql/plugin_auth.h.pp
index 46811825142..4f04d51cb52 100644
--- a/include/mysql/plugin_auth.h.pp
+++ b/include/mysql/plugin_auth.h.pp
@@ -1,4 +1,5 @@
 #include <mysql/plugin.h>
+typedef void * MYSQL_PLUGIN;
 #include <mysql/services.h>
 #include <mysql/service_my_snprintf.h>
 extern struct my_snprintf_service_st {
@@ -232,6 +233,7 @@ int thd_sql_command(const void* thd);
 void **thd_ha_data(const void* thd, const struct handlerton *hton);
 void thd_storage_lock_wait(void* thd, long long value);
 int thd_tx_isolation(const void* thd);
+int thd_tx_is_read_only(const void* thd);
 char *thd_security_context(void* thd, char *buffer, unsigned int length,
                            unsigned int max_query_len);
 void thd_inc_row_count(void* thd);
diff --git a/include/mysql/plugin_ftparser.h.pp b/include/mysql/plugin_ftparser.h.pp
index 49cf7e5b931..3a978645c24 100644
--- a/include/mysql/plugin_ftparser.h.pp
+++ b/include/mysql/plugin_ftparser.h.pp
@@ -1,4 +1,5 @@
 #include "plugin.h"
+typedef void * MYSQL_PLUGIN;
 #include <mysql/services.h>
 #include <mysql/service_my_snprintf.h>
 extern struct my_snprintf_service_st {
@@ -185,6 +186,7 @@ int thd_sql_command(const void* thd);
 void **thd_ha_data(const void* thd, const struct handlerton *hton);
 void thd_storage_lock_wait(void* thd, long long value);
 int thd_tx_isolation(const void* thd);
+int thd_tx_is_read_only(const void* thd);
 char *thd_security_context(void* thd, char *buffer, unsigned int length,
                            unsigned int max_query_len);
 void thd_inc_row_count(void* thd);
diff --git a/include/mysql/psi/mysql_file.h b/include/mysql/psi/mysql_file.h
index 816ac713631..c226258f462 100644
--- a/include/mysql/psi/mysql_file.h
+++ b/include/mysql/psi/mysql_file.h
@@ -518,7 +518,7 @@ static inline void inline_mysql_file_register(
 )
 {
 #ifdef HAVE_PSI_FILE_INTERFACE
-  PSI_CALL(register_file)(category, info, count);
+  PSI_FILE_CALL(register_file)(category, info, count);
 #endif
 }
 
@@ -533,13 +533,13 @@ inline_mysql_file_fgets(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
-                                                  PSI_FILE_READ);
+  locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+    (&state, file->m_psi, PSI_FILE_READ);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) size, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) size, src_file, src_line);
     result= fgets(str, size, file->m_file);
-    PSI_CALL(end_file_wait)(locker, result ? strlen(result) : 0);
+    PSI_FILE_CALL(end_file_wait)(locker, result ? strlen(result) : 0);
     return result;
   }
 #endif
@@ -559,13 +559,13 @@ inline_mysql_file_fgetc(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
-                                                  PSI_FILE_READ);
+  locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+    (&state, file->m_psi, PSI_FILE_READ);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 1, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) 1, src_file, src_line);
     result= fgetc(file->m_file);
-    PSI_CALL(end_file_wait)(locker, (size_t) 1);
+    PSI_FILE_CALL(end_file_wait)(locker, (size_t) 1);
     return result;
   }
 #endif
@@ -586,14 +586,14 @@ inline_mysql_file_fputs(
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
   size_t bytes;
-  locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
-                                                  PSI_FILE_WRITE);
+  locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+    (&state, file->m_psi, PSI_FILE_WRITE);
   if (likely(locker != NULL))
   {
     bytes= str ? strlen(str) : 0;
-    PSI_CALL(start_file_wait)(locker, bytes, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, bytes, src_file, src_line);
     result= fputs(str, file->m_file);
-    PSI_CALL(end_file_wait)(locker, bytes);
+    PSI_FILE_CALL(end_file_wait)(locker, bytes);
     return result;
   }
 #endif
@@ -613,13 +613,13 @@ inline_mysql_file_fputc(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
-                                                  PSI_FILE_WRITE);
+  locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+    (&state, file->m_psi, PSI_FILE_WRITE);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 1, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) 1, src_file, src_line);
     result= fputc(c, file->m_file);
-    PSI_CALL(end_file_wait)(locker, (size_t) 1);
+    PSI_FILE_CALL(end_file_wait)(locker, (size_t) 1);
     return result;
   }
 #endif
@@ -639,15 +639,15 @@ inline_mysql_file_fprintf(MYSQL_FILE *file, const char *format, ...)
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
-                                                  PSI_FILE_WRITE);
+  locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+    (&state, file->m_psi, PSI_FILE_WRITE);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 0, __FILE__, __LINE__);
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, __FILE__, __LINE__);
     va_start(args, format);
     result= vfprintf(file->m_file, format, args);
     va_end(args);
-    PSI_CALL(end_file_wait)(locker, (size_t) result);
+    PSI_FILE_CALL(end_file_wait)(locker, (size_t) result);
     return result;
   }
 #endif
@@ -669,13 +669,13 @@ inline_mysql_file_vfprintf(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
-                                                  PSI_FILE_WRITE);
+  locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+    (&state, file->m_psi, PSI_FILE_WRITE);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
     result= vfprintf(file->m_file, format, args);
-    PSI_CALL(end_file_wait)(locker, (size_t) result);
+    PSI_FILE_CALL(end_file_wait)(locker, (size_t) result);
     return result;
   }
 #endif
@@ -695,13 +695,13 @@ inline_mysql_file_fflush(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
-                                                  PSI_FILE_FLUSH);
+  locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+    (&state, file->m_psi, PSI_FILE_FLUSH);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
     result= fflush(file->m_file);
-    PSI_CALL(end_file_wait)(locker, (size_t) 0);
+    PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
     return result;
   }
 #endif
@@ -727,13 +727,13 @@ inline_mysql_file_fstat(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, filenr,
-                                                      PSI_FILE_FSTAT);
+  locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+    (&state, filenr, PSI_FILE_FSTAT);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
     result= my_fstat(filenr, stat_area, flags);
-    PSI_CALL(end_file_wait)(locker, (size_t) 0);
+    PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
     return result;
   }
 #endif
@@ -753,14 +753,13 @@ inline_mysql_file_stat(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_name_locker)(&state,
-                                                key, PSI_FILE_STAT,
-                                                path, &locker);
+  locker= PSI_FILE_CALL(get_thread_file_name_locker)
+    (&state, key, PSI_FILE_STAT, path, &locker);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_open_wait)(locker, src_file, src_line);
+    PSI_FILE_CALL(start_file_open_wait)(locker, src_file, src_line);
     result= my_stat(path, stat_area, flags);
-    PSI_CALL(end_file_wait)(locker, (size_t) 0);
+    PSI_FILE_CALL(end_file_open_wait)(locker, result);
     return result;
   }
 #endif
@@ -780,14 +779,14 @@ inline_mysql_file_chsize(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file,
-                                                      PSI_FILE_CHSIZE);
+  locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+    (&state, file, PSI_FILE_CHSIZE);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) newlength, src_file,
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) newlength, src_file,
                               src_line);
     result= my_chsize(file, newlength, filler, flags);
-    PSI_CALL(end_file_wait)(locker, (size_t) newlength);
+    PSI_FILE_CALL(end_file_wait)(locker, (size_t) newlength);
     return result;
   }
 #endif
@@ -810,14 +809,14 @@ inline_mysql_file_fopen(
 #ifdef HAVE_PSI_FILE_INTERFACE
     struct PSI_file_locker *locker;
     PSI_file_locker_state state;
-    locker= PSI_CALL(get_thread_file_name_locker)
+    locker= PSI_FILE_CALL(get_thread_file_name_locker)
         (&state, key, PSI_FILE_STREAM_OPEN, filename, that);
     if (likely(locker != NULL))
     {
-      that->m_psi= PSI_CALL(start_file_open_wait)(locker, src_file,
-                                                  src_line);
+      PSI_FILE_CALL(start_file_open_wait)
+        (locker, src_file, src_line);
       that->m_file= my_fopen(filename, flags, myFlags);
-      PSI_CALL(end_file_open_wait)(locker);
+      that->m_psi= PSI_FILE_CALL(end_file_open_wait)(locker, that->m_file);
       if (unlikely(that->m_file == NULL))
       {
         my_free(that);
@@ -851,13 +850,13 @@ inline_mysql_file_fclose(
 #ifdef HAVE_PSI_FILE_INTERFACE
     struct PSI_file_locker *locker;
     PSI_file_locker_state state;
-    locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
-                                                    PSI_FILE_STREAM_CLOSE);
+    locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+      (&state, file->m_psi, PSI_FILE_STREAM_CLOSE);
     if (likely(locker != NULL))
     {
-      PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+      PSI_FILE_CALL(start_file_close_wait)(locker, src_file, src_line);
       result= my_fclose(file->m_file, flags);
-      PSI_CALL(end_file_wait)(locker, (size_t) 0);
+      PSI_FILE_CALL(end_file_close_wait)(locker, result);
       my_free(file);
       return result;
     }
@@ -881,17 +880,17 @@ inline_mysql_file_fread(
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
   size_t bytes_read;
-  locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
-                                                  PSI_FILE_READ);
+  locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+    (&state, file->m_psi, PSI_FILE_READ);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, count, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, count, src_file, src_line);
     result= my_fread(file->m_file, buffer, count, flags);
     if (flags & (MY_NABP | MY_FNABP))
       bytes_read= (result == 0) ? count : 0;
     else
       bytes_read= (result != MY_FILE_ERROR) ? result : 0;
-    PSI_CALL(end_file_wait)(locker, bytes_read);
+    PSI_FILE_CALL(end_file_wait)(locker, bytes_read);
     return result;
   }
 #endif
@@ -912,17 +911,17 @@ inline_mysql_file_fwrite(
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
   size_t bytes_written;
-  locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
-                                                  PSI_FILE_WRITE);
+  locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+    (&state, file->m_psi, PSI_FILE_WRITE);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, count, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, count, src_file, src_line);
     result= my_fwrite(file->m_file, buffer, count, flags);
     if (flags & (MY_NABP | MY_FNABP))
       bytes_written= (result == 0) ? count : 0;
     else
       bytes_written= (result != MY_FILE_ERROR) ? result : 0;
-    PSI_CALL(end_file_wait)(locker, bytes_written);
+    PSI_FILE_CALL(end_file_wait)(locker, bytes_written);
     return result;
   }
 #endif
@@ -942,13 +941,13 @@ inline_mysql_file_fseek(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
-                                                  PSI_FILE_SEEK);
+  locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+    (&state, file->m_psi, PSI_FILE_SEEK);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
     result= my_fseek(file->m_file, pos, whence, flags);
-    PSI_CALL(end_file_wait)(locker, (size_t) 0);
+    PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
     return result;
   }
 #endif
@@ -968,13 +967,13 @@ inline_mysql_file_ftell(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
-                                                  PSI_FILE_TELL);
+  locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+    (&state, file->m_psi, PSI_FILE_TELL);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
     result= my_ftell(file->m_file, flags);
-    PSI_CALL(end_file_wait)(locker, (size_t) 0);
+    PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
     return result;
   }
 #endif
@@ -994,13 +993,13 @@ inline_mysql_file_create(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_name_locker)(&state, key, PSI_FILE_CREATE,
-                                                filename, &locker);
+  locker= PSI_FILE_CALL(get_thread_file_name_locker)
+    (&state, key, PSI_FILE_CREATE, filename, &locker);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_open_wait)(locker, src_file, src_line);
+    PSI_FILE_CALL(start_file_open_wait)(locker, src_file, src_line);
     file= my_create(filename, create_flags, access_flags, myFlags);
-    PSI_CALL(end_file_open_wait_and_bind_to_descriptor)(locker, file);
+    PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(locker, file);
     return file;
   }
 #endif
@@ -1024,7 +1023,7 @@ inline_mysql_file_create_temp(
   */
   file= create_temp_file(to, dir, pfx, mode, myFlags);
 #ifdef HAVE_PSI_FILE_INTERFACE
-  PSI_CALL(create_file)(key, to, file);
+  PSI_FILE_CALL(create_file)(key, to, file);
 #endif
   return file;
 }
@@ -1040,13 +1039,13 @@ inline_mysql_file_open(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_name_locker)(&state, key, PSI_FILE_OPEN,
-                                                filename, &locker);
+  locker= PSI_FILE_CALL(get_thread_file_name_locker)
+    (&state, key, PSI_FILE_OPEN, filename, &locker);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_open_wait)(locker, src_file, src_line);
+    PSI_FILE_CALL(start_file_open_wait)(locker, src_file, src_line);
     file= my_open(filename, flags, myFlags);
-    PSI_CALL(end_file_open_wait_and_bind_to_descriptor)(locker, file);
+    PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(locker, file);
     return file;
   }
 #endif
@@ -1066,13 +1065,13 @@ inline_mysql_file_close(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file,
-                                                      PSI_FILE_CLOSE);
+  locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+    (&state, file, PSI_FILE_CLOSE);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+    PSI_FILE_CALL(start_file_close_wait)(locker, src_file, src_line);
     result= my_close(file, flags);
-    PSI_CALL(end_file_wait)(locker, (size_t) 0);
+    PSI_FILE_CALL(end_file_close_wait)(locker, result);
     return result;
   }
 #endif
@@ -1093,17 +1092,17 @@ inline_mysql_file_read(
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
   size_t bytes_read;
-  locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file,
-                                                      PSI_FILE_READ);
+  locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+    (&state, file, PSI_FILE_READ);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, count, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, count, src_file, src_line);
     result= my_read(file, buffer, count, flags);
     if (flags & (MY_NABP | MY_FNABP))
       bytes_read= (result == 0) ? count : 0;
     else
       bytes_read= (result != MY_FILE_ERROR) ? result : 0;
-    PSI_CALL(end_file_wait)(locker, bytes_read);
+    PSI_FILE_CALL(end_file_wait)(locker, bytes_read);
     return result;
   }
 #endif
@@ -1124,17 +1123,17 @@ inline_mysql_file_write(
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
   size_t bytes_written;
-  locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file,
-                                                      PSI_FILE_WRITE);
+  locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+    (&state, file, PSI_FILE_WRITE);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, count, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, count, src_file, src_line);
     result= my_write(file, buffer, count, flags);
     if (flags & (MY_NABP | MY_FNABP))
       bytes_written= (result == 0) ? count : 0;
     else
       bytes_written= (result != MY_FILE_ERROR) ? result : 0;
-    PSI_CALL(end_file_wait)(locker, bytes_written);
+    PSI_FILE_CALL(end_file_wait)(locker, bytes_written);
     return result;
   }
 #endif
@@ -1155,16 +1154,17 @@ inline_mysql_file_pread(
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
   size_t bytes_read;
-  locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file, PSI_FILE_READ);
+  locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+    (&state, file, PSI_FILE_READ);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, count, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, count, src_file, src_line);
     result= my_pread(file, buffer, count, offset, flags);
     if (flags & (MY_NABP | MY_FNABP))
       bytes_read= (result == 0) ? count : 0;
     else
       bytes_read= (result != MY_FILE_ERROR) ? result : 0;
-    PSI_CALL(end_file_wait)(locker, bytes_read);
+    PSI_FILE_CALL(end_file_wait)(locker, bytes_read);
     return result;
   }
 #endif
@@ -1185,17 +1185,17 @@ inline_mysql_file_pwrite(
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
   size_t bytes_written;
-  locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file,
-                                                      PSI_FILE_WRITE);
+  locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+    (&state, file, PSI_FILE_WRITE);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, count, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, count, src_file, src_line);
     result= my_pwrite(file, buffer, count, offset, flags);
     if (flags & (MY_NABP | MY_FNABP))
       bytes_written= (result == 0) ? count : 0;
     else
       bytes_written= (result != MY_FILE_ERROR) ? result : 0;
-    PSI_CALL(end_file_wait)(locker, bytes_written);
+    PSI_FILE_CALL(end_file_wait)(locker, bytes_written);
     return result;
   }
 #endif
@@ -1215,12 +1215,13 @@ inline_mysql_file_seek(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file, PSI_FILE_SEEK);
+  locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+    (&state, file, PSI_FILE_SEEK);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
     result= my_seek(file, pos, whence, flags);
-    PSI_CALL(end_file_wait)(locker, (size_t) 0);
+    PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
     return result;
   }
 #endif
@@ -1240,12 +1241,13 @@ inline_mysql_file_tell(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file, PSI_FILE_TELL);
+  locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+    (&state, file, PSI_FILE_TELL);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
     result= my_tell(file, flags);
-    PSI_CALL(end_file_wait)(locker, (size_t) 0);
+    PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
     return result;
   }
 #endif
@@ -1265,13 +1267,13 @@ inline_mysql_file_delete(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_name_locker)(&state, key, PSI_FILE_DELETE,
-                                                name, &locker);
+  locker= PSI_FILE_CALL(get_thread_file_name_locker)
+    (&state, key, PSI_FILE_DELETE, name, &locker);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+    PSI_FILE_CALL(start_file_close_wait)(locker, src_file, src_line);
     result= my_delete(name, flags);
-    PSI_CALL(end_file_wait)(locker, (size_t) 0);
+    PSI_FILE_CALL(end_file_close_wait)(locker, result);
     return result;
   }
 #endif
@@ -1291,13 +1293,13 @@ inline_mysql_file_rename(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_name_locker)(&state, key, PSI_FILE_RENAME,
-                                                to, &locker);
+  locker= PSI_FILE_CALL(get_thread_file_name_locker)
+    (&state, key, PSI_FILE_RENAME, to, &locker);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
     result= my_rename(from, to, flags);
-    PSI_CALL(end_file_wait)(locker, (size_t) 0);
+    PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
     return result;
   }
 #endif
@@ -1318,14 +1320,14 @@ inline_mysql_file_create_with_symlink(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_name_locker)(&state, key, PSI_FILE_CREATE,
-                                                filename, &locker);
+  locker= PSI_FILE_CALL(get_thread_file_name_locker)
+    (&state, key, PSI_FILE_CREATE, filename, &locker);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_open_wait)(locker, src_file, src_line);
+    PSI_FILE_CALL(start_file_open_wait)(locker, src_file, src_line);
     file= my_create_with_symlink(linkname, filename, create_flags, access_flags,
                                  flags);
-    PSI_CALL(end_file_open_wait_and_bind_to_descriptor)(locker, file);
+    PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(locker, file);
     return file;
   }
 #endif
@@ -1346,13 +1348,13 @@ inline_mysql_file_delete_with_symlink(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_name_locker)(&state, key, PSI_FILE_DELETE,
-                                                name, &locker);
+  locker= PSI_FILE_CALL(get_thread_file_name_locker)
+    (&state, key, PSI_FILE_DELETE, name, &locker);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+    PSI_FILE_CALL(start_file_close_wait)(locker, src_file, src_line);
     result= my_delete_with_symlink(name, flags);
-    PSI_CALL(end_file_wait)(locker, (size_t) 0);
+    PSI_FILE_CALL(end_file_close_wait)(locker, result);
     return result;
   }
 #endif
@@ -1372,13 +1374,13 @@ inline_mysql_file_rename_with_symlink(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_name_locker)(&state, key, PSI_FILE_RENAME,
-                                                to, &locker);
+  locker= PSI_FILE_CALL(get_thread_file_name_locker)
+    (&state, key, PSI_FILE_RENAME, to, &locker);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
     result= my_rename_with_symlink(from, to, flags);
-    PSI_CALL(end_file_wait)(locker, (size_t) 0);
+    PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
     return result;
   }
 #endif
@@ -1398,12 +1400,13 @@ inline_mysql_file_sync(
 #ifdef HAVE_PSI_FILE_INTERFACE
   struct PSI_file_locker *locker;
   PSI_file_locker_state state;
-  locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, fd, PSI_FILE_SYNC);
+  locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+    (&state, fd, PSI_FILE_SYNC);
   if (likely(locker != NULL))
   {
-    PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+    PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
     result= my_sync(fd, flags);
-    PSI_CALL(end_file_wait)(locker, (size_t) 0);
+    PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
     return result;
   }
 #endif
diff --git a/include/mysql/psi/mysql_idle.h b/include/mysql/psi/mysql_idle.h
index 7a3fccfdb8c..c53d0ceb8c7 100644
--- a/include/mysql/psi/mysql_idle.h
+++ b/include/mysql/psi/mysql_idle.h
@@ -70,7 +70,7 @@ inline_mysql_start_idle_wait(PSI_idle_locker_state *state,
                              const char *src_file, int src_line)
 {
   struct PSI_idle_locker *locker;
-  locker= PSI_CALL(start_idle_wait)(state, src_file, src_line);
+  locker= PSI_IDLE_CALL(start_idle_wait)(state, src_file, src_line);
   return locker;
 }
 
@@ -82,7 +82,7 @@ static inline void
 inline_mysql_end_idle_wait(struct PSI_idle_locker *locker)
 {
   if (likely(locker != NULL))
-    PSI_CALL(end_idle_wait)(locker);
+    PSI_IDLE_CALL(end_idle_wait)(locker);
 }
 #endif
 
diff --git a/include/mysql/psi/mysql_socket.h b/include/mysql/psi/mysql_socket.h
index c908032883a..e1d56539f85 100644
--- a/include/mysql/psi/mysql_socket.h
+++ b/include/mysql/psi/mysql_socket.h
@@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 #ifdef __WIN__
   #include <ws2def.h>
   #include <winsock2.h>
+  #include <MSWSock.h>
   #define SOCKBUF_T char
 #else
   #include <netinet/in.h>
@@ -121,7 +122,7 @@ mysql_socket_set_address(
 {
 #ifdef HAVE_PSI_SOCKET_INTERFACE
   if (socket.m_psi != NULL)
-    PSI_CALL(set_socket_info)(socket.m_psi, NULL, addr, addr_len);
+    PSI_SOCKET_CALL(set_socket_info)(socket.m_psi, NULL, addr, addr_len);
 #endif
 }
 
@@ -141,7 +142,7 @@ MYSQL_SOCKET socket __attribute__ ((unused))
 {
 #ifdef HAVE_PSI_SOCKET_INTERFACE
   if (socket.m_psi != NULL)
-    PSI_CALL(set_socket_thread_owner)(socket.m_psi);
+    PSI_SOCKET_CALL(set_socket_thread_owner)(socket.m_psi);
 #endif
 }
 
@@ -247,8 +248,8 @@ inline_mysql_start_socket_wait(PSI_socket_locker_state *state,
   struct PSI_socket_locker *locker;
   if (mysql_socket.m_psi != NULL)
   {
-    locker= PSI_CALL(start_socket_wait)(state, mysql_socket.m_psi, op,
-                                        byte_count, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (state, mysql_socket.m_psi, op, byte_count, src_file, src_line);
   }
   else
     locker= NULL;
@@ -263,7 +264,7 @@ static inline void
 inline_mysql_end_socket_wait(struct PSI_socket_locker *locker, size_t byte_count)
 {
   if (locker != NULL)
-    PSI_CALL(end_socket_wait)(locker, byte_count);
+    PSI_SOCKET_CALL(end_socket_wait)(locker, byte_count);
 }
 
 /**
@@ -276,7 +277,7 @@ static inline void
 inline_mysql_socket_set_state(MYSQL_SOCKET socket, enum PSI_socket_state state)
 {
   if (socket.m_psi != NULL)
-    PSI_CALL(set_socket_state)(socket.m_psi, state);
+    PSI_SOCKET_CALL(set_socket_state)(socket.m_psi, state);
 }
 #endif /* HAVE_PSI_SOCKET_INTERFACE */
 
@@ -537,7 +538,7 @@ static inline void inline_mysql_socket_register(
   PSI_socket_info *info,
   int count)
 {
-  PSI_CALL(register_socket)(category, info, count);
+  PSI_SOCKET_CALL(register_socket)(category, info, count);
 }
 #endif
 
@@ -551,16 +552,15 @@ inline_mysql_socket_socket
 #endif
   int domain, int type, int protocol)
 {
-  MYSQL_SOCKET mysql_socket;
+  MYSQL_SOCKET mysql_socket= MYSQL_INVALID_SOCKET;
   mysql_socket.fd= socket(domain, type, protocol);
 
 #ifdef HAVE_PSI_SOCKET_INTERFACE
-  mysql_socket.m_psi= PSI_CALL(init_socket)(key, (const my_socket*)&mysql_socket.fd);
-
-  if (likely(mysql_socket.fd != INVALID_SOCKET && mysql_socket.m_psi != NULL))
-    PSI_CALL(set_socket_info)(mysql_socket.m_psi, &mysql_socket.fd, NULL, 0);
-#else
-  mysql_socket.m_psi= NULL;
+  if (likely(mysql_socket.fd != INVALID_SOCKET))
+  {
+    mysql_socket.m_psi= PSI_SOCKET_CALL(init_socket)
+      (key, (const my_socket*)&mysql_socket.fd, NULL, 0);
+  }
 #endif
   return mysql_socket;
 }
@@ -583,17 +583,18 @@ inline_mysql_socket_bind
     /* Instrumentation start */
     PSI_socket_locker_state state;
     PSI_socket_locker *locker;
-    locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
-                                        PSI_SOCKET_BIND, (size_t)0, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (&state, mysql_socket.m_psi, PSI_SOCKET_BIND, (size_t)0, src_file, src_line);
 
     /* Instrumented code */
     result= bind(mysql_socket.fd, addr, len);
 
     /* Instrumentation end */
-    PSI_CALL(set_socket_info)(mysql_socket.m_psi, NULL, addr, len);
+    if (result == 0)
+      PSI_SOCKET_CALL(set_socket_info)(mysql_socket.m_psi, NULL, addr, len);
 
     if (locker != NULL)
-      PSI_CALL(end_socket_wait)(locker, (size_t)0);
+      PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
 
     return result;
   }
@@ -622,15 +623,15 @@ inline_mysql_socket_getsockname
     /* Instrumentation start */
     PSI_socket_locker *locker;
     PSI_socket_locker_state state;
-    locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
-                                        PSI_SOCKET_BIND, (size_t)0, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (&state, mysql_socket.m_psi, PSI_SOCKET_BIND, (size_t)0, src_file, src_line);
 
     /* Instrumented code */
     result= getsockname(mysql_socket.fd, addr, len);
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_socket_wait)(locker, (size_t)0);
+      PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
 
     return result;
   }
@@ -660,15 +661,15 @@ inline_mysql_socket_connect
     /* Instrumentation start */
     PSI_socket_locker *locker;
     PSI_socket_locker_state state;
-    locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
-                                        PSI_SOCKET_CONNECT, (size_t)0, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (&state, mysql_socket.m_psi, PSI_SOCKET_CONNECT, (size_t)0, src_file, src_line);
 
     /* Instrumented code */
     result= connect(mysql_socket.fd, addr, len);
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_socket_wait)(locker, (size_t)0);
+      PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
 
     return result;
   }
@@ -698,15 +699,15 @@ inline_mysql_socket_getpeername
     /* Instrumentation start */
     PSI_socket_locker *locker;
     PSI_socket_locker_state state;
-    locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
-                                        PSI_SOCKET_BIND, (size_t)0, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (&state, mysql_socket.m_psi, PSI_SOCKET_BIND, (size_t)0, src_file, src_line);
 
     /* Instrumented code */
     result= getpeername(mysql_socket.fd, addr, len);
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_socket_wait)(locker, (size_t)0);
+      PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
 
     return result;
   }
@@ -736,18 +737,18 @@ inline_mysql_socket_send
     /* Instrumentation start */
     PSI_socket_locker *locker;
     PSI_socket_locker_state state;
-    locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
-                                        PSI_SOCKET_SEND, n, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (&state, mysql_socket.m_psi, PSI_SOCKET_SEND, n, src_file, src_line);
 
     /* Instrumented code */
-    result= send(mysql_socket.fd, buf, n, flags);
+    result= send(mysql_socket.fd, buf, IF_WIN((int),) n, flags);
 
     /* Instrumentation end */
     if (locker != NULL)
     {
       size_t bytes_written;
       bytes_written= (result > -1) ? result : 0;
-      PSI_CALL(end_socket_wait)(locker, bytes_written);
+      PSI_SOCKET_CALL(end_socket_wait)(locker, bytes_written);
     }
 
     return result;
@@ -755,7 +756,7 @@ inline_mysql_socket_send
 #endif
 
   /* Non instrumented code */
-  result= send(mysql_socket.fd, buf, n, flags);
+  result= send(mysql_socket.fd, buf, IF_WIN((int),) n, flags);
 
   return result;
 }
@@ -778,18 +779,18 @@ inline_mysql_socket_recv
     /* Instrumentation start */
     PSI_socket_locker *locker;
     PSI_socket_locker_state state;
-    locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
-                                        PSI_SOCKET_RECV, (size_t)0, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (&state, mysql_socket.m_psi, PSI_SOCKET_RECV, (size_t)0, src_file, src_line);
 
     /* Instrumented code */
-    result= recv(mysql_socket.fd, buf, n, flags);
+    result= recv(mysql_socket.fd, buf, IF_WIN((int),) n, flags);
 
     /* Instrumentation end */
     if (locker != NULL)
     {
       size_t bytes_read;
       bytes_read= (result > -1) ? result : 0;
-      PSI_CALL(end_socket_wait)(locker, bytes_read);
+      PSI_SOCKET_CALL(end_socket_wait)(locker, bytes_read);
     }
 
     return result;
@@ -797,7 +798,7 @@ inline_mysql_socket_recv
 #endif
 
   /* Non instrumented code */
-  result= recv(mysql_socket.fd, buf, n, flags);
+  result= recv(mysql_socket.fd, buf, IF_WIN((int),) n, flags);
 
   return result;
 }
@@ -820,18 +821,18 @@ inline_mysql_socket_sendto
     /* Instrumentation start */
     PSI_socket_locker *locker;
     PSI_socket_locker_state state;
-    locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
-                                        PSI_SOCKET_SEND, n, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (&state, mysql_socket.m_psi, PSI_SOCKET_SEND, n, src_file, src_line);
 
     /* Instrumented code */
-    result= sendto(mysql_socket.fd, buf, n, flags, addr, addr_len);
+    result= sendto(mysql_socket.fd, buf, IF_WIN((int),) n, flags, addr, addr_len);
 
     /* Instrumentation end */
     if (locker != NULL)
     {
       size_t bytes_written;
       bytes_written = (result > -1) ? result : 0;
-      PSI_CALL(end_socket_wait)(locker, bytes_written);
+      PSI_SOCKET_CALL(end_socket_wait)(locker, bytes_written);
     }
 
     return result;
@@ -839,7 +840,7 @@ inline_mysql_socket_sendto
 #endif
 
   /* Non instrumented code */
-  result= sendto(mysql_socket.fd, buf, n, flags, addr, addr_len);
+  result= sendto(mysql_socket.fd, buf, IF_WIN((int),) n, flags, addr, addr_len);
 
   return result;
 }
@@ -863,18 +864,18 @@ inline_mysql_socket_recvfrom
     /* Instrumentation start */
     PSI_socket_locker *locker;
     PSI_socket_locker_state state;
-    locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
-                                        PSI_SOCKET_RECV, (size_t)0, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (&state, mysql_socket.m_psi, PSI_SOCKET_RECV, (size_t)0, src_file, src_line);
 
     /* Instrumented code */
-    result= recvfrom(mysql_socket.fd, buf, n, flags, addr, addr_len);
+    result= recvfrom(mysql_socket.fd, buf, IF_WIN((int),) n, flags, addr, addr_len);
 
     /* Instrumentation end */
     if (locker != NULL)
     {
       size_t bytes_read;
       bytes_read = (result > -1) ? result : 0;
-      PSI_CALL(end_socket_wait)(locker, bytes_read);
+      PSI_SOCKET_CALL(end_socket_wait)(locker, bytes_read);
     }
 
     return result;
@@ -882,7 +883,7 @@ inline_mysql_socket_recvfrom
 #endif
 
   /* Non instrumented code */
-  result= recvfrom(mysql_socket.fd, buf, n, flags, addr, addr_len);
+  result= recvfrom(mysql_socket.fd, buf, IF_WIN((int),) n, flags, addr, addr_len);
 
   return result;
 }
@@ -905,15 +906,15 @@ inline_mysql_socket_getsockopt
     /* Instrumentation start */
     PSI_socket_locker *locker;
     PSI_socket_locker_state state;
-    locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
-                                        PSI_SOCKET_OPT, (size_t)0, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (&state, mysql_socket.m_psi, PSI_SOCKET_OPT, (size_t)0, src_file, src_line);
 
     /* Instrumented code */
     result= getsockopt(mysql_socket.fd, level, optname, optval, optlen);
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_socket_wait)(locker, (size_t)0);
+      PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
 
     return result;
   }
@@ -944,15 +945,15 @@ inline_mysql_socket_setsockopt
     /* Instrumentation start */
     PSI_socket_locker *locker;
     PSI_socket_locker_state state;
-    locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
-                                        PSI_SOCKET_OPT, (size_t)0, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (&state, mysql_socket.m_psi, PSI_SOCKET_OPT, (size_t)0, src_file, src_line);
 
     /* Instrumented code */
     result= setsockopt(mysql_socket.fd, level, optname, optval, optlen);
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_socket_wait)(locker, (size_t)0);
+      PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
 
     return result;
   }
@@ -982,15 +983,15 @@ inline_mysql_socket_listen
     /* Instrumentation start */
     PSI_socket_locker *locker;
     PSI_socket_locker_state state;
-    locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
-                                        PSI_SOCKET_CONNECT, (size_t)0, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (&state, mysql_socket.m_psi, PSI_SOCKET_CONNECT, (size_t)0, src_file, src_line);
 
     /* Instrumented code */
     result= listen(mysql_socket.fd, backlog);
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_socket_wait)(locker, (size_t)0);
+      PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
 
     return result;
   }
@@ -1021,15 +1022,15 @@ inline_mysql_socket_accept
     /* Instrumentation start */
     PSI_socket_locker *locker;
     PSI_socket_locker_state state;
-    locker= PSI_CALL(start_socket_wait)(&state, socket_listen.m_psi,
-                                        PSI_SOCKET_CONNECT, (size_t)0, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (&state, socket_listen.m_psi, PSI_SOCKET_CONNECT, (size_t)0, src_file, src_line);
 
     /* Instrumented code */
     socket_accept.fd= accept(socket_listen.fd, addr, &addr_length);
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_socket_wait)(locker, (size_t)0);
+      PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
   }
   else
 #endif
@@ -1039,14 +1040,12 @@ inline_mysql_socket_accept
   }
 
 #ifdef HAVE_PSI_SOCKET_INTERFACE
-  /* Initialize the instrument with the new socket descriptor and address */
-  socket_accept.m_psi=
-            PSI_CALL(init_socket)(key, (const my_socket*)&socket_accept.fd);
-
-  /* FIXME: simplify this with just 1 call to init_socket(). */
-  if (socket_accept.m_psi != NULL)
-    PSI_CALL(set_socket_info)(socket_accept.m_psi, &socket_accept.fd, addr,
-                              addr_length);
+  if (likely(socket_accept.fd != INVALID_SOCKET))
+  {
+    /* Initialize the instrument with the new socket descriptor and address */
+    socket_accept.m_psi= PSI_SOCKET_CALL(init_socket)
+      (key, (const my_socket*)&socket_accept.fd, addr, addr_length);
+  }
 #endif
 
   return socket_accept;
@@ -1070,18 +1069,18 @@ inline_mysql_socket_close
     /* Instrumentation start */
     PSI_socket_locker *locker;
     PSI_socket_locker_state state;
-    locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
-                                        PSI_SOCKET_CLOSE, (size_t)0, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (&state, mysql_socket.m_psi, PSI_SOCKET_CLOSE, (size_t)0, src_file, src_line);
 
     /* Instrumented code */
     result= closesocket(mysql_socket.fd);
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_socket_wait)(locker, (size_t)0);
+      PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
     /* Remove the instrumentation for this socket. */
     if (mysql_socket.m_psi != NULL)
-      PSI_CALL(destroy_socket)(mysql_socket.m_psi);
+      PSI_SOCKET_CALL(destroy_socket)(mysql_socket.m_psi);
 
     return result;
   }
@@ -1105,28 +1104,53 @@ inline_mysql_socket_shutdown
 {
   int result;
 
-  /* Instrumentation start */
+#ifdef __WIN__
+  static LPFN_DISCONNECTEX DisconnectEx = NULL;
+  if (DisconnectEx == NULL)
+  {
+    DWORD dwBytesReturned;
+    GUID guidDisconnectEx = WSAID_DISCONNECTEX;
+    WSAIoctl(mysql_socket.fd, SIO_GET_EXTENSION_FUNCTION_POINTER,
+             &guidDisconnectEx, sizeof(GUID),
+             &DisconnectEx, sizeof(DisconnectEx), 
+             &dwBytesReturned, NULL, NULL);
+  }
+#endif
+
+/* Instrumentation start */
 #ifdef HAVE_PSI_SOCKET_INTERFACE
   if (mysql_socket.m_psi != NULL)
   {
     PSI_socket_locker *locker;
     PSI_socket_locker_state state;
-    locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
-                                        PSI_SOCKET_SHUTDOWN, (size_t)0, src_file, src_line);
+    locker= PSI_SOCKET_CALL(start_socket_wait)
+      (&state, mysql_socket.m_psi, PSI_SOCKET_SHUTDOWN, (size_t)0, src_file, src_line);
 
     /* Instrumented code */
-    result= shutdown(mysql_socket.fd, how);
+#ifdef __WIN__
+    if (DisconnectEx)
+      result= (DisconnectEx(mysql_socket.fd, (LPOVERLAPPED) NULL,
+                            (DWORD) 0, (DWORD) 0) == TRUE) ? 0 : -1;
+    else
+#endif
+      result= shutdown(mysql_socket.fd, how);
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_socket_wait)(locker, (size_t)0);
+      PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
 
     return result;
   }
 #endif
 
   /* Non instrumented code */
-  result= shutdown(mysql_socket.fd, how);
+#ifdef __WIN__
+  if (DisconnectEx)
+    result= (DisconnectEx(mysql_socket.fd, (LPOVERLAPPED) NULL,
+                          (DWORD) 0, (DWORD) 0) == TRUE) ? 0 : -1;
+  else
+#endif
+    result= shutdown(mysql_socket.fd, how);
 
   return result;
 }
diff --git a/include/mysql/psi/mysql_stage.h b/include/mysql/psi/mysql_stage.h
index dc44e9b0bed..61bfdbb7d59 100644
--- a/include/mysql/psi/mysql_stage.h
+++ b/include/mysql/psi/mysql_stage.h
@@ -53,7 +53,7 @@
 static inline void inline_mysql_stage_register(
   const char *category, PSI_stage_info **info, int count)
 {
-  PSI_CALL(register_stage)(category, info, count);
+  PSI_STAGE_CALL(register_stage)(category, info, count);
 }
 #endif
 
@@ -62,7 +62,7 @@ static inline void
 inline_mysql_set_stage(PSI_stage_key key,
                        const char *src_file, int src_line)
 {
-  PSI_CALL(start_stage)(key, src_file, src_line);
+  PSI_STAGE_CALL(start_stage)(key, src_file, src_line);
 }
 #endif
 
diff --git a/include/mysql/psi/mysql_statement.h b/include/mysql/psi/mysql_statement.h
index 1b065065e57..d7a76ee25e4 100644
--- a/include/mysql/psi/mysql_statement.h
+++ b/include/mysql/psi/mysql_statement.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.   
+/* Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.   
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -63,10 +63,10 @@
 #endif
 
 #ifdef HAVE_PSI_STATEMENT_INTERFACE
-  #define MYSQL_START_STATEMENT(STATE, K, DB, DB_LEN) \
-    inline_mysql_start_statement(STATE, K, DB, DB_LEN, __FILE__, __LINE__)
+  #define MYSQL_START_STATEMENT(STATE, K, DB, DB_LEN, CS) \
+    inline_mysql_start_statement(STATE, K, DB, DB_LEN, CS, __FILE__, __LINE__)
 #else
-  #define MYSQL_START_STATEMENT(STATE, K, DB, DB_LEN) \
+  #define MYSQL_START_STATEMENT(STATE, K, DB, DB_LEN, CS) \
     NULL
 #endif
 
@@ -122,7 +122,7 @@
 static inline void inline_mysql_statement_register(
   const char *category, PSI_statement_info *info, int count)
 {
-  PSI_CALL(register_statement)(category, info, count);
+  PSI_STATEMENT_CALL(register_statement)(category, info, count);
 }
 
 #ifdef HAVE_PSI_STATEMENT_DIGEST_INTERFACE
@@ -132,7 +132,7 @@ inline_mysql_digest_start(PSI_statement_locker *locker)
   PSI_digest_locker* digest_locker= NULL;
 
   if (likely(locker != NULL))
-    digest_locker= PSI_CALL(digest_start)(locker);
+    digest_locker= PSI_STATEMENT_CALL(digest_start)(locker);
   return digest_locker;
 }
 #endif
@@ -143,7 +143,7 @@ inline_mysql_add_token(PSI_digest_locker *locker, uint token,
                        void *yylval)
 {
   if (likely(locker != NULL))
-    locker= PSI_CALL(digest_add_token)(locker, token,
+    locker= PSI_STATEMENT_CALL(digest_add_token)(locker, token,
                                       (OPAQUE_LEX_YYSTYPE*)yylval);
   return locker;
 }
@@ -153,12 +153,13 @@ static inline struct PSI_statement_locker *
 inline_mysql_start_statement(PSI_statement_locker_state *state,
                              PSI_statement_key key,
                              const char *db, uint db_len,
+                             const CHARSET_INFO *charset,
                              const char *src_file, int src_line)
 {
   PSI_statement_locker *locker;
-  locker= PSI_CALL(get_thread_statement_locker)(state, key);
+  locker= PSI_STATEMENT_CALL(get_thread_statement_locker)(state, key, charset);
   if (likely(locker != NULL))
-    PSI_CALL(start_statement)(locker, db, db_len, src_file, src_line);
+    PSI_STATEMENT_CALL(start_statement)(locker, db, db_len, src_file, src_line);
   return locker;
 }
 
@@ -168,7 +169,7 @@ inline_mysql_refine_statement(PSI_statement_locker *locker,
 {
   if (likely(locker != NULL))
   {
-    locker= PSI_CALL(refine_statement)(locker, key);
+    locker= PSI_STATEMENT_CALL(refine_statement)(locker, key);
   }
   return locker;
 }
@@ -179,7 +180,7 @@ inline_mysql_set_statement_text(PSI_statement_locker *locker,
 {
   if (likely(locker != NULL))
   {
-    PSI_CALL(set_statement_text)(locker, text, text_len);
+    PSI_STATEMENT_CALL(set_statement_text)(locker, text, text_len);
   }
 }
 
@@ -189,7 +190,7 @@ inline_mysql_set_statement_lock_time(PSI_statement_locker *locker,
 {
   if (likely(locker != NULL))
   {
-    PSI_CALL(set_statement_lock_time)(locker, count);
+    PSI_STATEMENT_CALL(set_statement_lock_time)(locker, count);
   }
 }
 
@@ -199,7 +200,7 @@ inline_mysql_set_statement_rows_sent(PSI_statement_locker *locker,
 {
   if (likely(locker != NULL))
   {
-    PSI_CALL(set_statement_rows_sent)(locker, count);
+    PSI_STATEMENT_CALL(set_statement_rows_sent)(locker, count);
   }
 }
 
@@ -209,7 +210,7 @@ inline_mysql_set_statement_rows_examined(PSI_statement_locker *locker,
 {
   if (likely(locker != NULL))
   {
-    PSI_CALL(set_statement_rows_examined)(locker, count);
+    PSI_STATEMENT_CALL(set_statement_rows_examined)(locker, count);
   }
 }
 
@@ -217,9 +218,9 @@ static inline void
 inline_mysql_end_statement(struct PSI_statement_locker *locker,
                            Diagnostics_area *stmt_da)
 {
-  PSI_CALL(end_stage)();
+  PSI_STAGE_CALL(end_stage)();
   if (likely(locker != NULL))
-    PSI_CALL(end_statement)(locker, stmt_da);
+    PSI_STATEMENT_CALL(end_statement)(locker, stmt_da);
 }
 #endif
 
diff --git a/include/mysql/psi/mysql_table.h b/include/mysql/psi/mysql_table.h
index 1796943096e..815313e654b 100644
--- a/include/mysql/psi/mysql_table.h
+++ b/include/mysql/psi/mysql_table.h
@@ -60,22 +60,22 @@
   @sa MYSQL_END_TABLE_WAIT.
 */
 #ifdef HAVE_PSI_TABLE_INTERFACE
-  #define MYSQL_TABLE_IO_WAIT(PSI, OP, INDEX, FLAGS, PAYLOAD)          \
-    {                                                                  \
-      if (PSI != NULL)                                                 \
-      {                                                                \
-        PSI_table_locker *locker;                                      \
-        PSI_table_locker_state state;                                  \
-        locker= PSI_CALL(start_table_io_wait)(& state, PSI, OP, INDEX, \
-                                              __FILE__, __LINE__);     \
-        PAYLOAD                                                        \
-        if (locker != NULL)                                            \
-          PSI_CALL(end_table_io_wait)(locker);                         \
-      }                                                                \
-      else                                                             \
-      {                                                                \
-        PAYLOAD                                                        \
-      }                                                                \
+  #define MYSQL_TABLE_IO_WAIT(PSI, OP, INDEX, FLAGS, PAYLOAD) \
+    {                                                         \
+      if (PSI != NULL)                                        \
+      {                                                       \
+        PSI_table_locker *locker;                             \
+        PSI_table_locker_state state;                         \
+        locker= PSI_TABLE_CALL(start_table_io_wait)           \
+          (& state, PSI, OP, INDEX, __FILE__, __LINE__);      \
+        PAYLOAD                                               \
+        if (locker != NULL)                                   \
+          PSI_TABLE_CALL(end_table_io_wait)(locker);          \
+      }                                                       \
+      else                                                    \
+      {                                                       \
+        PAYLOAD                                               \
+      }                                                       \
     }
 #else
   #define MYSQL_TABLE_IO_WAIT(PSI, OP, INDEX, FLAGS, PAYLOAD) \
@@ -93,22 +93,22 @@
   @sa MYSQL_END_TABLE_WAIT.
 */
 #ifdef HAVE_PSI_TABLE_INTERFACE
-  #define MYSQL_TABLE_LOCK_WAIT(PSI, OP, FLAGS, PAYLOAD)                 \
-    {                                                                    \
-      if (PSI != NULL)                                                   \
-      {                                                                  \
-        PSI_table_locker *locker;                                        \
-        PSI_table_locker_state state;                                    \
-        locker= PSI_CALL(start_table_lock_wait)(& state, PSI, OP, FLAGS, \
-                                                __FILE__, __LINE__);     \
-        PAYLOAD                                                          \
-        if (locker != NULL)                                              \
-          PSI_CALL(end_table_lock_wait)(locker);                         \
-      }                                                                  \
-      else                                                               \
-      {                                                                  \
-        PAYLOAD                                                          \
-      }                                                                  \
+  #define MYSQL_TABLE_LOCK_WAIT(PSI, OP, FLAGS, PAYLOAD) \
+    {                                                    \
+      if (PSI != NULL)                                   \
+      {                                                  \
+        PSI_table_locker *locker;                        \
+        PSI_table_locker_state state;                    \
+        locker= PSI_TABLE_CALL(start_table_lock_wait)    \
+          (& state, PSI, OP, FLAGS, __FILE__, __LINE__); \
+        PAYLOAD                                          \
+        if (locker != NULL)                              \
+          PSI_TABLE_CALL(end_table_lock_wait)(locker);   \
+      }                                                  \
+      else                                               \
+      {                                                  \
+        PAYLOAD                                          \
+      }                                                  \
     }
 #else
   #define MYSQL_TABLE_LOCK_WAIT(PSI, OP, FLAGS, PAYLOAD) \
@@ -164,7 +164,8 @@ inline_mysql_start_table_lock_wait(PSI_table_locker_state *state,
   if (psi != NULL)
   {
     struct PSI_table_locker *locker;
-    locker= PSI_CALL(start_table_lock_wait)(state, psi, op, flags, src_file, src_line);
+    locker= PSI_TABLE_CALL(start_table_lock_wait)
+      (state, psi, op, flags, src_file, src_line);
     return locker;
   }
   return NULL;
@@ -178,7 +179,7 @@ static inline void
 inline_mysql_end_table_lock_wait(struct PSI_table_locker *locker)
 {
   if (locker != NULL)
-    PSI_CALL(end_table_lock_wait)(locker);
+    PSI_TABLE_CALL(end_table_lock_wait)(locker);
 }
 #endif
 
diff --git a/include/mysql/psi/mysql_thread.h b/include/mysql/psi/mysql_thread.h
index 78175196fa2..f0d88ff8ede 100644
--- a/include/mysql/psi/mysql_thread.h
+++ b/include/mysql/psi/mysql_thread.h
@@ -597,7 +597,7 @@ static inline void inline_mysql_mutex_register(
 )
 {
 #ifdef HAVE_PSI_MUTEX_INTERFACE
-  PSI_CALL(register_mutex)(category, info, count);
+  PSI_MUTEX_CALL(register_mutex)(category, info, count);
 #endif
 }
 
@@ -613,7 +613,7 @@ static inline int inline_mysql_mutex_init(
   )
 {
 #ifdef HAVE_PSI_MUTEX_INTERFACE
-  that->m_psi= PSI_CALL(init_mutex)(key, &that->m_mutex);
+  that->m_psi= PSI_MUTEX_CALL(init_mutex)(key, &that->m_mutex);
 #else
   that->m_psi= NULL;
 #endif
@@ -636,7 +636,7 @@ static inline int inline_mysql_mutex_destroy(
 #ifdef HAVE_PSI_MUTEX_INTERFACE
   if (that->m_psi != NULL)
   {
-    PSI_CALL(destroy_mutex)(that->m_psi);
+    PSI_MUTEX_CALL(destroy_mutex)(that->m_psi);
     that->m_psi= NULL;
   }
 #endif
@@ -664,7 +664,7 @@ static inline int inline_mysql_mutex_lock(
     /* Instrumentation start */
     PSI_mutex_locker *locker;
     PSI_mutex_locker_state state;
-    locker= PSI_CALL(start_mutex_wait)(&state, that->m_psi,
+    locker= PSI_MUTEX_CALL(start_mutex_wait)(&state, that->m_psi,
                                        PSI_MUTEX_LOCK, src_file, src_line);
 
     /* Instrumented code */
@@ -678,7 +678,7 @@ static inline int inline_mysql_mutex_lock(
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_mutex_wait)(locker, result);
+      PSI_MUTEX_CALL(end_mutex_wait)(locker, result);
 
     return result;
   }
@@ -711,7 +711,7 @@ static inline int inline_mysql_mutex_trylock(
     /* Instrumentation start */
     PSI_mutex_locker *locker;
     PSI_mutex_locker_state state;
-    locker= PSI_CALL(start_mutex_wait)(&state, that->m_psi,
+    locker= PSI_MUTEX_CALL(start_mutex_wait)(&state, that->m_psi,
                                        PSI_MUTEX_TRYLOCK, src_file, src_line);
 
     /* Instrumented code */
@@ -725,7 +725,7 @@ static inline int inline_mysql_mutex_trylock(
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_mutex_wait)(locker, result);
+      PSI_MUTEX_CALL(end_mutex_wait)(locker, result);
 
     return result;
   }
@@ -754,7 +754,7 @@ static inline int inline_mysql_mutex_unlock(
 
 #ifdef HAVE_PSI_MUTEX_INTERFACE
   if (that->m_psi != NULL)
-    PSI_CALL(unlock_mutex)(that->m_psi);
+    PSI_MUTEX_CALL(unlock_mutex)(that->m_psi);
 #endif
 
 #ifdef SAFE_MUTEX
@@ -781,7 +781,7 @@ static inline void inline_mysql_rwlock_register(
 )
 {
 #ifdef HAVE_PSI_RWLOCK_INTERFACE
-  PSI_CALL(register_rwlock)(category, info, count);
+  PSI_RWLOCK_CALL(register_rwlock)(category, info, count);
 #endif
 }
 
@@ -792,7 +792,7 @@ static inline int inline_mysql_rwlock_init(
   mysql_rwlock_t *that)
 {
 #ifdef HAVE_PSI_RWLOCK_INTERFACE
-  that->m_psi= PSI_CALL(init_rwlock)(key, &that->m_rwlock);
+  that->m_psi= PSI_RWLOCK_CALL(init_rwlock)(key, &that->m_rwlock);
 #else
   that->m_psi= NULL;
 #endif
@@ -810,7 +810,7 @@ static inline int inline_mysql_prlock_init(
   mysql_prlock_t *that)
 {
 #ifdef HAVE_PSI_RWLOCK_INTERFACE
-  that->m_psi= PSI_CALL(init_rwlock)(key, &that->m_prlock);
+  that->m_psi= PSI_RWLOCK_CALL(init_rwlock)(key, &that->m_prlock);
 #else
   that->m_psi= NULL;
 #endif
@@ -824,7 +824,7 @@ static inline int inline_mysql_rwlock_destroy(
 #ifdef HAVE_PSI_RWLOCK_INTERFACE
   if (that->m_psi != NULL)
   {
-    PSI_CALL(destroy_rwlock)(that->m_psi);
+    PSI_RWLOCK_CALL(destroy_rwlock)(that->m_psi);
     that->m_psi= NULL;
   }
 #endif
@@ -838,7 +838,7 @@ static inline int inline_mysql_prlock_destroy(
 #ifdef HAVE_PSI_RWLOCK_INTERFACE
   if (that->m_psi != NULL)
   {
-    PSI_CALL(destroy_rwlock)(that->m_psi);
+    PSI_RWLOCK_CALL(destroy_rwlock)(that->m_psi);
     that->m_psi= NULL;
   }
 #endif
@@ -861,7 +861,7 @@ static inline int inline_mysql_rwlock_rdlock(
     /* Instrumentation start */
     PSI_rwlock_locker *locker;
     PSI_rwlock_locker_state state;
-    locker= PSI_CALL(start_rwlock_rdwait)(&state, that->m_psi,
+    locker= PSI_RWLOCK_CALL(start_rwlock_rdwait)(&state, that->m_psi,
                                           PSI_RWLOCK_READLOCK, src_file, src_line);
 
     /* Instrumented code */
@@ -869,7 +869,7 @@ static inline int inline_mysql_rwlock_rdlock(
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_rwlock_rdwait)(locker, result);
+      PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, result);
 
     return result;
   }
@@ -897,7 +897,7 @@ static inline int inline_mysql_prlock_rdlock(
     /* Instrumentation start */
     PSI_rwlock_locker *locker;
     PSI_rwlock_locker_state state;
-    locker= PSI_CALL(start_rwlock_rdwait)(&state, that->m_psi,
+    locker= PSI_RWLOCK_CALL(start_rwlock_rdwait)(&state, that->m_psi,
                                           PSI_RWLOCK_READLOCK, src_file, src_line);
 
     /* Instrumented code */
@@ -905,7 +905,7 @@ static inline int inline_mysql_prlock_rdlock(
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_rwlock_rdwait)(locker, result);
+      PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, result);
 
     return result;
   }
@@ -933,7 +933,7 @@ static inline int inline_mysql_rwlock_wrlock(
     /* Instrumentation start */
     PSI_rwlock_locker *locker;
     PSI_rwlock_locker_state state;
-    locker= PSI_CALL(start_rwlock_wrwait)(&state, that->m_psi,
+    locker= PSI_RWLOCK_CALL(start_rwlock_wrwait)(&state, that->m_psi,
                                           PSI_RWLOCK_WRITELOCK, src_file, src_line);
 
     /* Instrumented code */
@@ -941,7 +941,7 @@ static inline int inline_mysql_rwlock_wrlock(
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_rwlock_wrwait)(locker, result);
+      PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, result);
 
     return result;
   }
@@ -969,7 +969,7 @@ static inline int inline_mysql_prlock_wrlock(
     /* Instrumentation start */
     PSI_rwlock_locker *locker;
     PSI_rwlock_locker_state state;
-    locker= PSI_CALL(start_rwlock_wrwait)(&state, that->m_psi,
+    locker= PSI_RWLOCK_CALL(start_rwlock_wrwait)(&state, that->m_psi,
                                           PSI_RWLOCK_WRITELOCK, src_file, src_line);
 
     /* Instrumented code */
@@ -977,7 +977,7 @@ static inline int inline_mysql_prlock_wrlock(
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_rwlock_wrwait)(locker, result);
+      PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, result);
 
     return result;
   }
@@ -1005,7 +1005,7 @@ static inline int inline_mysql_rwlock_tryrdlock(
     /* Instrumentation start */
     PSI_rwlock_locker *locker;
     PSI_rwlock_locker_state state;
-    locker= PSI_CALL(start_rwlock_rdwait)(&state, that->m_psi,
+    locker= PSI_RWLOCK_CALL(start_rwlock_rdwait)(&state, that->m_psi,
                                           PSI_RWLOCK_TRYREADLOCK, src_file, src_line);
 
     /* Instrumented code */
@@ -1013,7 +1013,7 @@ static inline int inline_mysql_rwlock_tryrdlock(
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_rwlock_rdwait)(locker, result);
+      PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, result);
 
     return result;
   }
@@ -1040,7 +1040,7 @@ static inline int inline_mysql_rwlock_trywrlock(
     /* Instrumentation start */
     PSI_rwlock_locker *locker;
     PSI_rwlock_locker_state state;
-    locker= PSI_CALL(start_rwlock_wrwait)(&state, that->m_psi,
+    locker= PSI_RWLOCK_CALL(start_rwlock_wrwait)(&state, that->m_psi,
                                           PSI_RWLOCK_TRYWRITELOCK, src_file, src_line);
 
     /* Instrumented code */
@@ -1048,7 +1048,7 @@ static inline int inline_mysql_rwlock_trywrlock(
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_rwlock_wrwait)(locker, result);
+      PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, result);
 
     return result;
   }
@@ -1066,7 +1066,7 @@ static inline int inline_mysql_rwlock_unlock(
   int result;
 #ifdef HAVE_PSI_RWLOCK_INTERFACE
   if (that->m_psi != NULL)
-    PSI_CALL(unlock_rwlock)(that->m_psi);
+    PSI_RWLOCK_CALL(unlock_rwlock)(that->m_psi);
 #endif
   result= rw_unlock(&that->m_rwlock);
   return result;
@@ -1079,7 +1079,7 @@ static inline int inline_mysql_prlock_unlock(
   int result;
 #ifdef HAVE_PSI_RWLOCK_INTERFACE
   if (that->m_psi != NULL)
-    PSI_CALL(unlock_rwlock)(that->m_psi);
+    PSI_RWLOCK_CALL(unlock_rwlock)(that->m_psi);
 #endif
   result= rw_pr_unlock(&that->m_prlock);
   return result;
@@ -1099,7 +1099,7 @@ static inline void inline_mysql_cond_register(
 )
 {
 #ifdef HAVE_PSI_COND_INTERFACE
-  PSI_CALL(register_cond)(category, info, count);
+  PSI_COND_CALL(register_cond)(category, info, count);
 #endif
 }
 
@@ -1111,7 +1111,7 @@ static inline int inline_mysql_cond_init(
   const pthread_condattr_t *attr)
 {
 #ifdef HAVE_PSI_COND_INTERFACE
-  that->m_psi= PSI_CALL(init_cond)(key, &that->m_cond);
+  that->m_psi= PSI_COND_CALL(init_cond)(key, &that->m_cond);
 #else
   that->m_psi= NULL;
 #endif
@@ -1124,7 +1124,7 @@ static inline int inline_mysql_cond_destroy(
 #ifdef HAVE_PSI_COND_INTERFACE
   if (that->m_psi != NULL)
   {
-    PSI_CALL(destroy_cond)(that->m_psi);
+    PSI_COND_CALL(destroy_cond)(that->m_psi);
     that->m_psi= NULL;
   }
 #endif
@@ -1147,7 +1147,7 @@ static inline int inline_mysql_cond_wait(
     /* Instrumentation start */
     PSI_cond_locker *locker;
     PSI_cond_locker_state state;
-    locker= PSI_CALL(start_cond_wait)(&state, that->m_psi, mutex->m_psi,
+    locker= PSI_COND_CALL(start_cond_wait)(&state, that->m_psi, mutex->m_psi,
                                       PSI_COND_WAIT, src_file, src_line);
 
     /* Instrumented code */
@@ -1155,7 +1155,7 @@ static inline int inline_mysql_cond_wait(
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_cond_wait)(locker, result);
+      PSI_COND_CALL(end_cond_wait)(locker, result);
 
     return result;
   }
@@ -1184,7 +1184,7 @@ static inline int inline_mysql_cond_timedwait(
     /* Instrumentation start */
     PSI_cond_locker *locker;
     PSI_cond_locker_state state;
-    locker= PSI_CALL(start_cond_wait)(&state, that->m_psi, mutex->m_psi,
+    locker= PSI_COND_CALL(start_cond_wait)(&state, that->m_psi, mutex->m_psi,
                                       PSI_COND_TIMEDWAIT, src_file, src_line);
 
     /* Instrumented code */
@@ -1192,7 +1192,7 @@ static inline int inline_mysql_cond_timedwait(
 
     /* Instrumentation end */
     if (locker != NULL)
-      PSI_CALL(end_cond_wait)(locker, result);
+      PSI_COND_CALL(end_cond_wait)(locker, result);
 
     return result;
   }
@@ -1210,7 +1210,7 @@ static inline int inline_mysql_cond_signal(
   int result;
 #ifdef HAVE_PSI_COND_INTERFACE
   if (that->m_psi != NULL)
-    PSI_CALL(signal_cond)(that->m_psi);
+    PSI_COND_CALL(signal_cond)(that->m_psi);
 #endif
   result= pthread_cond_signal(&that->m_cond);
   return result;
@@ -1222,7 +1222,7 @@ static inline int inline_mysql_cond_broadcast(
   int result;
 #ifdef HAVE_PSI_COND_INTERFACE
   if (that->m_psi != NULL)
-    PSI_CALL(broadcast_cond)(that->m_psi);
+    PSI_COND_CALL(broadcast_cond)(that->m_psi);
 #endif
   result= pthread_cond_broadcast(&that->m_cond);
   return result;
@@ -1241,7 +1241,7 @@ static inline void inline_mysql_thread_register(
 )
 {
 #ifdef HAVE_PSI_THREAD_INTERFACE
-  PSI_CALL(register_thread)(category, info, count);
+  PSI_THREAD_CALL(register_thread)(category, info, count);
 #endif
 }
 
@@ -1252,14 +1252,14 @@ static inline int inline_mysql_thread_create(
   void *(*start_routine)(void*), void *arg)
 {
   int result;
-  result= PSI_CALL(spawn_thread)(key, thread, attr, start_routine, arg);
+  result= PSI_THREAD_CALL(spawn_thread)(key, thread, attr, start_routine, arg);
   return result;
 }
 
 static inline void inline_mysql_thread_set_psi_id(ulong id)
 {
-  struct PSI_thread *psi= PSI_CALL(get_thread)();
-  PSI_CALL(set_thread_id)(psi, id);
+  struct PSI_thread *psi= PSI_THREAD_CALL(get_thread)();
+  PSI_THREAD_CALL(set_thread_id)(psi, id);
 }
 #endif
 
diff --git a/include/mysql/psi/psi.h b/include/mysql/psi/psi.h
index 8d5e6db7307..cc2057c630d 100644
--- a/include/mysql/psi/psi.h
+++ b/include/mysql/psi/psi.h
@@ -899,6 +899,10 @@ struct PSI_file_locker_state_v1
   enum PSI_file_operation m_operation;
   /** Current file. */
   struct PSI_file *m_file;
+  /** Current file name. */
+  const char *m_name;
+  /** Current file class. */
+  void *m_class;
   /** Current thread. */
   struct PSI_thread *m_thread;
   /** Operation number of bytes. */
@@ -958,6 +962,8 @@ struct PSI_digest_storage
 {
   my_bool m_full;
   int m_byte_count;
+  /** Character set number. */
+  uint m_charset_number;
   unsigned char m_token_array[PSI_MAX_DIGEST_STORAGE_SIZE];
 };
 typedef struct PSI_digest_storage PSI_digest_storage;
@@ -969,6 +975,9 @@ struct PSI_digest_locker_state
 };
 typedef struct PSI_digest_locker_state PSI_digest_locker_state;
 
+/* Duplicate of NAME_LEN, to avoid dependency on mysql_com.h */
+#define PSI_SCHEMA_NAME_LEN (64 * 3)
+
 /**
   State data storage for @c get_thread_statement_locker_v1_t,
   @c get_thread_statement_locker_v1_t.
@@ -1029,6 +1038,10 @@ struct PSI_statement_locker_state_v1
   ulong m_sort_scan;
   /** Statement digest. */
   PSI_digest_locker_state m_digest_state;
+  /** Current schema name. */
+  char m_schema_name[PSI_SCHEMA_NAME_LEN];
+  /** Length in bytes of @c m_schema_name. */
+  uint m_schema_name_length;
 };
 
 /**
@@ -1187,10 +1200,13 @@ typedef void (*destroy_cond_v1_t)(struct PSI_cond *cond);
   Socket instrumentation initialisation API.
   @param key the registered mutex key
   @param socket descriptor
+  @param addr the socket ip address
+  @param addr_len length of socket ip address
   @return an instrumented socket
 */
 typedef struct PSI_socket* (*init_socket_v1_t)
-  (PSI_socket_key key, const my_socket *fd);
+  (PSI_socket_key key, const my_socket *fd,
+  const struct sockaddr *addr, socklen_t addr_len);
 
 /**
   socket instrumentation destruction API.
@@ -1290,7 +1306,7 @@ typedef int (*spawn_thread_v1_t)(PSI_thread_key key,
   @return an instrumented thread
 */
 typedef struct PSI_thread* (*new_thread_v1_t)
-  (PSI_thread_key key, const void *identity, ulong thread_id);
+  (PSI_thread_key key, const void *identity, ulonglong thread_id);
 
 /**
   Assign an id to an instrumented thread.
@@ -1298,7 +1314,7 @@ typedef struct PSI_thread* (*new_thread_v1_t)
   @param id the id to assign
 */
 typedef void (*set_thread_id_v1_t)(struct PSI_thread *thread,
-                                   unsigned long id);
+                                   ulonglong id);
 
 /**
   Get the instrumentation for the running thread.
@@ -1570,16 +1586,18 @@ typedef void (*end_table_lock_wait_v1_t)(struct PSI_table_locker *locker);
   @param op the operation to perform
   @param src_file the source file name
   @param src_line the source line number
-  @return an instrumented file handle
 */
-typedef struct PSI_file* (*start_file_open_wait_v1_t)
+typedef void (*start_file_open_wait_v1_t)
   (struct PSI_file_locker *locker, const char *src_file, uint src_line);
 
 /**
   End a file instrumentation open operation, for file streams.
   @param locker the file locker.
+  @param result the opened file (NULL indicates failure, non NULL success).
+  @return an instrumented file handle
 */
-typedef void (*end_file_open_wait_v1_t)(struct PSI_file_locker *locker);
+typedef struct PSI_file* (*end_file_open_wait_v1_t)
+  (struct PSI_file_locker *locker, void *result);
 
 /**
   End a file instrumentation open operation, for non stream files.
@@ -1617,6 +1635,25 @@ typedef void (*end_file_wait_v1_t)
   (struct PSI_file_locker *locker, size_t count);
 
 /**
+  Start a file instrumentation close operation.
+  @param locker the file locker
+  @param op the operation to perform
+  @param src_file the source file name
+  @param src_line the source line number
+*/
+typedef void (*start_file_close_wait_v1_t)
+  (struct PSI_file_locker *locker, const char *src_file, uint src_line);
+
+/**
+  End a file instrumentation close operation.
+  @param locker the file locker.
+  @param rc the close operation return code (0 for success).
+  @return an instrumented file handle
+*/
+typedef void (*end_file_close_wait_v1_t)
+  (struct PSI_file_locker *locker, int rc);
+
+/**
   Start a new stage, and implicitly end the previous stage.
   @param key the key of the new stage
   @param src_file the source file name
@@ -1632,11 +1669,12 @@ typedef void (*end_stage_v1_t) (void);
   Get a statement instrumentation locker.
   @param state data storage for the locker
   @param key the statement instrumentation key
+  @param charset client character set
   @return a statement locker, or NULL
 */
 typedef struct PSI_statement_locker* (*get_thread_statement_locker_v1_t)
   (struct PSI_statement_locker_state_v1 *state,
-   PSI_statement_key key);
+   PSI_statement_key key, const void *charset);
 
 /**
   Refine a statement locker to a more specific key.
@@ -1871,6 +1909,19 @@ typedef struct PSI_digest_locker* (*digest_add_token_v1_t)
   (struct PSI_digest_locker *locker, uint token, struct OPAQUE_LEX_YYSTYPE *yylval);
 
 /**
+  Stores an array of connection attributes
+  @param buffer         char array of length encoded connection attributes
+                        in network format
+  @param length         legnth of the data in buffer
+  @param from_cs        charset in which @buffer is encodded
+  @return state
+    @retval  non-0    attributes truncated
+    @retval  0        stored the attribute
+*/
+typedef int (*set_thread_connect_attrs_v1_t)(const char *buffer, uint length,
+                                             const void *from_cs);
+
+/**
   Performance Schema Interface, version 1.
   @since PSI_VERSION_1
 */
@@ -2005,6 +2056,10 @@ struct PSI_v1
   start_file_wait_v1_t start_file_wait;
   /** @sa end_file_wait_v1_t. */
   end_file_wait_v1_t end_file_wait;
+  /** @sa start_file_close_wait_v1_t. */
+  start_file_close_wait_v1_t start_file_close_wait;
+  /** @sa end_file_close_wait_v1_t. */
+  end_file_close_wait_v1_t end_file_close_wait;
   /** @sa start_stage_v1_t. */
   start_stage_v1_t start_stage;
   /** @sa end_stage_v1_t. */
@@ -2065,6 +2120,8 @@ struct PSI_v1
   digest_start_v1_t digest_start;
   /** @sa digest_add_token_v1_t. */
   digest_add_token_v1_t digest_add_token;
+  /** @sa set_thread_connect_attrs_v1_t. */
+  set_thread_connect_attrs_v1_t set_thread_connect_attrs;
 };
 
 /** @} (end of group Group_PSI_v1) */
@@ -2318,7 +2375,54 @@ typedef struct PSI_stage_info_none PSI_stage_info;
 
 extern MYSQL_PLUGIN_IMPORT PSI *PSI_server;
 
-#define PSI_CALL(M) PSI_server->M
+/*
+  Allow to override PSI_XXX_CALL at compile time
+  with more efficient implementations, if available.
+  If nothing better is available,
+  make a dynamic call using the PSI_server function pointer.
+*/
+
+#ifndef PSI_MUTEX_CALL
+#define PSI_MUTEX_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_RWLOCK_CALL
+#define PSI_RWLOCK_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_COND_CALL
+#define PSI_COND_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_THREAD_CALL
+#define PSI_THREAD_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_FILE_CALL
+#define PSI_FILE_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_SOCKET_CALL
+#define PSI_SOCKET_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_STAGE_CALL
+#define PSI_STAGE_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_STATEMENT_CALL
+#define PSI_STATEMENT_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_TABLE_CALL
+#define PSI_TABLE_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_IDLE_CALL
+#define PSI_IDLE_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#define PSI_DYNAMIC_CALL(M) PSI_server->M
 
 /** @} */
 
diff --git a/include/mysql/psi/psi_abi_v1.h.pp b/include/mysql/psi/psi_abi_v1.h.pp
index b0559213998..f2037c5b724 100644
--- a/include/mysql/psi/psi_abi_v1.h.pp
+++ b/include/mysql/psi/psi_abi_v1.h.pp
@@ -221,6 +221,8 @@ struct PSI_file_locker_state_v1
   uint m_flags;
   enum PSI_file_operation m_operation;
   struct PSI_file *m_file;
+  const char *m_name;
+  void *m_class;
   struct PSI_thread *m_thread;
   size_t m_number_of_bytes;
   ulonglong m_timer_start;
@@ -243,6 +245,7 @@ struct PSI_digest_storage
 {
   my_bool m_full;
   int m_byte_count;
+  uint m_charset_number;
   unsigned char m_token_array[1024];
 };
 typedef struct PSI_digest_storage PSI_digest_storage;
@@ -278,6 +281,8 @@ struct PSI_statement_locker_state_v1
   ulong m_sort_rows;
   ulong m_sort_scan;
   PSI_digest_locker_state m_digest_state;
+  char m_schema_name[(64 * 3)];
+  uint m_schema_name_length;
 };
 struct PSI_socket_locker_state_v1
 {
@@ -318,7 +323,8 @@ typedef struct PSI_cond* (*init_cond_v1_t)
   (PSI_cond_key key, const void *identity);
 typedef void (*destroy_cond_v1_t)(struct PSI_cond *cond);
 typedef struct PSI_socket* (*init_socket_v1_t)
-  (PSI_socket_key key, const my_socket *fd);
+  (PSI_socket_key key, const my_socket *fd,
+  const struct sockaddr *addr, socklen_t addr_len);
 typedef void (*destroy_socket_v1_t)(struct PSI_socket *socket);
 typedef struct PSI_table_share* (*get_table_share_v1_t)
   (my_bool temporary, struct TABLE_SHARE *share);
@@ -340,9 +346,9 @@ typedef int (*spawn_thread_v1_t)(PSI_thread_key key,
                                  const pthread_attr_t *attr,
                                  void *(*start_routine)(void*), void *arg);
 typedef struct PSI_thread* (*new_thread_v1_t)
-  (PSI_thread_key key, const void *identity, ulong thread_id);
+  (PSI_thread_key key, const void *identity, ulonglong thread_id);
 typedef void (*set_thread_id_v1_t)(struct PSI_thread *thread,
-                                   unsigned long id);
+                                   ulonglong id);
 typedef struct PSI_thread* (*get_thread_v1_t)(void);
 typedef void (*set_thread_user_v1_t)(const char *user, int user_len);
 typedef void (*set_thread_user_host_v1_t)(const char *user, int user_len,
@@ -420,9 +426,10 @@ typedef struct PSI_table_locker* (*start_table_lock_wait_v1_t)
    ulong flags,
    const char *src_file, uint src_line);
 typedef void (*end_table_lock_wait_v1_t)(struct PSI_table_locker *locker);
-typedef struct PSI_file* (*start_file_open_wait_v1_t)
+typedef void (*start_file_open_wait_v1_t)
   (struct PSI_file_locker *locker, const char *src_file, uint src_line);
-typedef void (*end_file_open_wait_v1_t)(struct PSI_file_locker *locker);
+typedef struct PSI_file* (*end_file_open_wait_v1_t)
+  (struct PSI_file_locker *locker, void *result);
 typedef void (*end_file_open_wait_and_bind_to_descriptor_v1_t)
   (struct PSI_file_locker *locker, File file);
 typedef void (*start_file_wait_v1_t)
@@ -430,12 +437,16 @@ typedef void (*start_file_wait_v1_t)
    const char *src_file, uint src_line);
 typedef void (*end_file_wait_v1_t)
   (struct PSI_file_locker *locker, size_t count);
+typedef void (*start_file_close_wait_v1_t)
+  (struct PSI_file_locker *locker, const char *src_file, uint src_line);
+typedef void (*end_file_close_wait_v1_t)
+  (struct PSI_file_locker *locker, int rc);
 typedef void (*start_stage_v1_t)
   (PSI_stage_key key, const char *src_file, int src_line);
 typedef void (*end_stage_v1_t) (void);
 typedef struct PSI_statement_locker* (*get_thread_statement_locker_v1_t)
   (struct PSI_statement_locker_state_v1 *state,
-   PSI_statement_key key);
+   PSI_statement_key key, const void *charset);
 typedef struct PSI_statement_locker* (*refine_statement_v1_t)
   (struct PSI_statement_locker *locker,
    PSI_statement_key key);
@@ -499,6 +510,8 @@ typedef struct PSI_digest_locker * (*digest_start_v1_t)
   (struct PSI_statement_locker *locker);
 typedef struct PSI_digest_locker* (*digest_add_token_v1_t)
   (struct PSI_digest_locker *locker, uint token, struct OPAQUE_LEX_YYSTYPE *yylval);
+typedef int (*set_thread_connect_attrs_v1_t)(const char *buffer, uint length,
+                                             const void *from_cs);
 struct PSI_v1
 {
   register_mutex_v1_t register_mutex;
@@ -566,6 +579,8 @@ struct PSI_v1
     end_file_open_wait_and_bind_to_descriptor;
   start_file_wait_v1_t start_file_wait;
   end_file_wait_v1_t end_file_wait;
+  start_file_close_wait_v1_t start_file_close_wait;
+  end_file_close_wait_v1_t end_file_close_wait;
   start_stage_v1_t start_stage;
   end_stage_v1_t end_stage;
   get_thread_statement_locker_v1_t get_thread_statement_locker;
@@ -596,6 +611,7 @@ struct PSI_v1
   set_socket_thread_owner_v1_t set_socket_thread_owner;
   digest_start_v1_t digest_start;
   digest_add_token_v1_t digest_add_token;
+  set_thread_connect_attrs_v1_t set_thread_connect_attrs;
 };
 typedef struct PSI_v1 PSI;
 typedef struct PSI_mutex_info_v1 PSI_mutex_info;
diff --git a/include/mysql/service_debug_sync.h b/include/mysql/service_debug_sync.h
index bb1202c5e63..eee8e6bbe96 100644
--- a/include/mysql/service_debug_sync.h
+++ b/include/mysql/service_debug_sync.h
@@ -339,9 +339,16 @@ extern void (*debug_sync_C_callback_ptr)(MYSQL_THD, const char *, size_t);
     if (debug_sync_service)                             \
       debug_sync_service(thd, STRING_WITH_LEN(name));   \
   } while(0)
+
+#define DEBUG_SYNC_C_IF_THD(thd, name)                   \
+  do {                                                   \
+    if (debug_sync_service && thd)                       \
+      debug_sync_service((MYSQL_THD) thd, STRING_WITH_LEN(name));   \
+  } while(0)
 #else
-#define DEBUG_SYNC(thd,name)           do { } while(0)
-#endif
+#define DEBUG_SYNC(thd,name)                        do { } while(0)
+#define DEBUG_SYNC_C_IF_THD(thd, _sync_point_name_) do { } while(0)
+#endif /* defined(ENABLED_DEBUG_SYNC) */
 
 /* compatibility macro */
 #define DEBUG_SYNC_C(name) DEBUG_SYNC(NULL, name)
diff --git a/include/mysql/service_my_plugin_log.h b/include/mysql/service_my_plugin_log.h
new file mode 100644
index 00000000000..0cf7817573c
--- /dev/null
+++ b/include/mysql/service_my_plugin_log.h
@@ -0,0 +1,64 @@
+/*  Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+    
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; version 2 of the
+    License.
+    
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+    
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+/**
+  @file
+  This service provides functions to report error conditions and log to 
+  mysql error log.
+*/
+
+#ifndef MYSQL_SERVICE_MY_PLUGIN_LOG_INCLUDED
+#define MYSQL_SERVICE_MY_PLUGIN_LOG_INCLUDED
+
+#ifndef MYSQL_ABI_CHECK
+#include <stdarg.h>
+#endif
+
+/* keep in sync with the loglevel enum in my_sys.h */
+enum plugin_log_level
+{
+  MY_ERROR_LEVEL,
+  MY_WARNING_LEVEL,
+  MY_INFORMATION_LEVEL
+};  
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern struct my_plugin_log_service
+{
+  /** write a message to the log */
+  int (*my_plugin_log_message)(MYSQL_PLUGIN *, enum plugin_log_level, const char *, ...);
+} *my_plugin_log_service;
+
+#ifdef MYSQL_DYNAMIC_PLUGIN
+
+#define my_plugin_log_message my_plugin_log_service->my_plugin_log_message
+
+#else
+
+int my_plugin_log_message(MYSQL_PLUGIN *plugin, enum plugin_log_level level,
+                          const char *format, ...);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/mysql_com.h b/include/mysql_com.h
index 63b95bb5295..f38cc5ed52e 100644
--- a/include/mysql_com.h
+++ b/include/mysql_com.h
@@ -45,6 +45,7 @@
 #define TABLE_COMMENT_MAXLEN 2048
 #define COLUMN_COMMENT_MAXLEN 1024
 #define INDEX_COMMENT_MAXLEN 1024
+#define TABLE_PARTITION_COMMENT_MAXLEN 1024
 
 /*
   USER_HOST_BUFF_SIZE -- length of string buffer, that is enough to contain
@@ -119,13 +120,19 @@ enum enum_server_command
 #define BINCMP_FLAG	131072		/* Intern: Used by sql_yacc */
 #define GET_FIXED_FIELDS_FLAG (1 << 18) /* Used to get fields in item tree */
 #define FIELD_IN_PART_FUNC_FLAG (1 << 19)/* Field part of partition func */
-#define FIELD_IN_ADD_INDEX (1<< 20)	/* Intern: Field used in ADD INDEX */
+
+/**
+  Intern: Field in TABLE object for new version of altered table,
+          which participates in a newly added index.
+*/
+#define FIELD_IN_ADD_INDEX (1 << 20)
 #define FIELD_IS_RENAMED (1<< 21)       /* Intern: Field is being renamed */
-#define FIELD_FLAGS_STORAGE_MEDIA 22    /* Field storage media, bit 22-23,
-                                           reserved by MySQL Cluster */
-#define FIELD_FLAGS_COLUMN_FORMAT 24    /* Field column format, bit 24-25,
-                                           reserved by MySQL Cluster */
-#define HAS_EXPLICIT_VALUE (1 << 26)    /* An INSERT/UPDATE operation supplied
+#define FIELD_FLAGS_STORAGE_MEDIA 22    /* Field storage media, bit 22-23 */
+#define FIELD_FLAGS_STORAGE_MEDIA_MASK (3 << FIELD_FLAGS_STORAGE_MEDIA)
+#define FIELD_FLAGS_COLUMN_FORMAT 24    /* Field column format, bit 24-25 */
+#define FIELD_FLAGS_COLUMN_FORMAT_MASK (3 << FIELD_FLAGS_COLUMN_FORMAT)
+#define FIELD_IS_DROPPED (1<< 26)       /* Intern: Field is being dropped */
+#define HAS_EXPLICIT_VALUE (1 << 27)    /* An INSERT/UPDATE operation supplied
                                           an explicit default value */
 
 #define REFRESH_GRANT           (1UL << 0)  /* Refresh grant tables */
@@ -154,12 +161,12 @@ enum enum_server_command
 #define REFRESH_QUERY_CACHE_FREE (1UL << 17) /* pack query cache */
 #define REFRESH_DES_KEY_FILE    (1UL << 18)
 #define REFRESH_USER_RESOURCES  (1UL << 19)
+#define REFRESH_FOR_EXPORT      (1UL << 20) /* FLUSH TABLES ... FOR EXPORT */
 
-#define REFRESH_TABLE_STATS     (1UL << 20) /* Refresh table stats hash table */
-#define REFRESH_INDEX_STATS     (1UL << 21) /* Refresh index stats hash table */
-#define REFRESH_USER_STATS      (1UL << 22) /* Refresh user stats hash table */
-#define REFRESH_CLIENT_STATS    (1UL << 23) /* Refresh client stats hash table */
-
+#define REFRESH_TABLE_STATS     (1UL << 27) /* Refresh table stats hash table */
+#define REFRESH_INDEX_STATS     (1UL << 28) /* Refresh index stats hash table */
+#define REFRESH_USER_STATS      (1UL << 29) /* Refresh user stats hash table */
+#define REFRESH_CLIENT_STATS    (1UL << 30) /* Refresh client stats hash table */
 #define REFRESH_FAST            (1UL << 31) /* Intern flag */
 
 #define CLIENT_LONG_PASSWORD	1	/* new more secure passwords */
@@ -183,8 +190,15 @@ enum enum_server_command
 #define CLIENT_PS_MULTI_RESULTS (1UL << 18) /* Multi-results in PS-protocol */
 
 #define CLIENT_PLUGIN_AUTH  (1UL << 19) /* Client supports plugin authentication */
-#define CLIENT_PROGRESS  (1UL << 29)   /* Client support progress indicator */
 
+#define CLIENT_PLUGIN_AUTH  (1UL << 19) /* Client supports plugin authentication */
+#define CLIENT_CONNECT_ATTRS (1UL << 20) /* Client supports connection attributes */
+/* Enable authentication response packet to be larger than 255 bytes. */
+#define CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA (1UL << 21)
+/* Don't close the connection for a connection with expired password. */
+#define CLIENT_CAN_HANDLE_EXPIRED_PASSWORDS (1UL << 22)
+
+#define CLIENT_PROGRESS  (1UL << 29)   /* Client support progress indicator */
 #define CLIENT_SSL_VERIFY_SERVER_CERT (1UL << 30)
 /*
   It used to be that if mysql_real_connect() failed, it would delete any
@@ -229,6 +243,12 @@ enum enum_server_command
                            CLIENT_PLUGIN_AUTH)
 
 /*
+  To be added later:
+  CLIENT_CONNECT_ATTRS, CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA,
+  CLIENT_CAN_HANDLE_EXPIRED_PASSWORDS
+*/
+
+/*
   Switch off the flags that are optional and depending on build flags
   If any of the optional flags is supported by the build it will be switched
   on before sending to the client during the connection handshake.
diff --git a/include/password.h b/include/password.h
index 082f917e7c0..5dfea533546 100644
--- a/include/password.h
+++ b/include/password.h
@@ -24,6 +24,8 @@ void my_make_scrambled_password_323(char *to, const char *password,
                                     size_t pass_len);
 void my_make_scrambled_password(char *to, const char *password,
                                 size_t pass_len);
+void my_make_scrambled_password_sha1(char *to, const char *password,
+                                     size_t pass_len);
 
 void hash_password(ulong *result, const char *password, uint password_len);
 
diff --git a/include/sha1.h b/include/sha1.h
index c3469333c27..b20cc8f5026 100644
--- a/include/sha1.h
+++ b/include/sha1.h
@@ -18,88 +18,13 @@
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 
-/*
- This is the header file for code which implements the Secure
- Hashing Algorithm 1 as defined in FIPS PUB 180-1 published
- April 17, 1995.
-
- Many of the variable names in this code, especially the
- single character names, were used because those were the names
- used in the publication.
-
- Please read the file sha1.c for more information.
-
- Modified 2002 by Peter Zaitsev to better follow MySQL standards
-
-  Original Source from: http://www.faqs.org/rfcs/rfc3174.html
-
-  Copyright (C) The Internet Society (2001).  All Rights Reserved.
-
-  This document and translations of it may be copied and furnished to
-  others, and derivative works that comment on or otherwise explain it
-  or assist in its implementation may be prepared, copied, published
-  and distributed, in whole or in part, without restriction of any
-  kind, provided that the above copyright notice and this paragraph are
-  included on all such copies and derivative works.  However, this
-  document itself may not be modified in any way, such as by removing
-  the copyright notice or references to the Internet Society or other
-  Internet organizations, except as needed for the purpose of
-  developing Internet standards in which case the procedures for
-  copyrights defined in the Internet Standards process must be
-  followed, or as required to translate it into languages other than
-  English.
-
-  The limited permissions granted above are perpetual and will not be
-  revoked by the Internet Society or its successors or assigns.
-
-  This document and the information contained herein is provided on an
-  "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
-  TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
-  BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
-  HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
-  MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
-
-  Acknowledgement 
-  Funding for the RFC Editor function is currently provided by the 
-  Internet Society. 
-*/
-
-
-enum sha_result_codes
-{
-  SHA_SUCCESS = 0,
-  SHA_NULL,		/* Null pointer parameter */
-  SHA_INPUT_TOO_LONG,	/* input data too long */
-  SHA_STATE_ERROR	/* called Input after Result */
-};
-
 #define SHA1_HASH_SIZE 20 /* Hash size in bytes */
 
-/*
-  This structure will hold context information for the SHA-1
-  hashing operation
-*/
-
-typedef struct SHA1_CONTEXT
-{
-  ulonglong  Length;		/* Message length in bits      */
-  uint32 Intermediate_Hash[SHA1_HASH_SIZE/4]; /* Message Digest  */
-  int Computed;			/* Is the digest computed?	   */
-  int Corrupted;		/* Is the message digest corrupted? */
-  int16 Message_Block_Index;	/* Index into message block array   */
-  uint8 Message_Block[64];	/* 512-bit message blocks      */
-} SHA1_CONTEXT;
-
-/*
-  Function Prototypes
-*/
-
 C_MODE_START
 
-int mysql_sha1_reset(SHA1_CONTEXT*);
-int mysql_sha1_input(SHA1_CONTEXT*, const uint8 *, unsigned int);
-int mysql_sha1_result(SHA1_CONTEXT* , uint8 Message_Digest[SHA1_HASH_SIZE]);
-
+void compute_sha1_hash(uint8 *digest, const char *buf, int len);
+void compute_sha1_hash_multi(uint8 *digest, const char *buf1, int len1,
+                             const char *buf2, int len2);
 C_MODE_END
 
 #endif /* SHA__INCLUDED */
diff --git a/include/thread_pool_priv.h b/include/thread_pool_priv.h
index 78526894e21..95f7cd95493 100644
--- a/include/thread_pool_priv.h
+++ b/include/thread_pool_priv.h
@@ -49,7 +49,6 @@ void thd_set_killed(THD *thd);
 void thd_clear_errors(THD *thd);
 void thd_set_thread_stack(THD *thd, char *stack_start);
 void thd_lock_thread_count(THD *thd);
-void thd_unlock_thread_count(THD *thd);
 void thd_close_connection(THD *thd);
 THD *thd_get_current_thd();
 void thd_lock_data(THD *thd);
diff --git a/libevent/CMakeLists.txt b/libevent/CMakeLists.txt
new file mode 100644
index 00000000000..ea50bab2530
--- /dev/null
+++ b/libevent/CMakeLists.txt
@@ -0,0 +1,80 @@
+# Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+
+# Common defines and includes
+IF(WITH_INNODB_MEMCACHED AND UNIX)
+
+ADD_DEFINITIONS(-DHAVE_CONFIG_H)
+INCLUDE_DIRECTORIES(${LIBEVENT_INCLUDE_DIR}/compat/sys
+		    ${LIBEVENT_INCLUDE_DIR})
+
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_SHARED_LIBRARY_C_FLAGS} -I${LIBEVENT_INCLUDE_DIR}")
+
+SET(LIBEVENT_CORE_SOURCES
+		event.h
+		event-internal.h
+		evutil.h
+		log.h
+		event.c
+		buffer.c
+		evbuffer.c
+		log.c
+		evutil.c)
+
+SET(LIBEVENT_EXTRA_SOURCES
+		event_tagging.c
+		http.c
+		evhttp.h
+		http-internal.h
+		evdns.h
+		evrpc.c
+		evrpc.h
+		evrpc-internal.h
+		strlcpy.c
+		strlcpy-internal.h)
+
+IF(HAVE_SIGNAL_H)
+	SET(LIBEVENT_SIGNAL_SOURCES signal.c)
+ENDIF()
+
+IF(HAVE_POLL_H)
+	SET(LIBEVENT_POLL_SOURCES poll.c)
+ENDIF()
+
+IF(HAVE_SELECT)
+	SET(LIBEVENT_SELECT_SOURCE select.c)
+ENDIF()
+
+IF(HAVE_SYS_EPOLL_H)
+	SET(LIBEVENT_EPOLL_SOURCES epoll.c epoll_sub.c)
+ENDIF()
+
+IF(HAVE_SYS_DEVPOLL_H)
+	SET(LIBEVENT_DEVPOLL_SOURCES devpoll.c)
+ENDIF()
+
+IF(HAVE_EVENT_PORTS)
+	SET(LIBEVENT_EVPORT_SOURCES evport.c)
+ENDIF()
+
+IF(HAVE_WORKING_KQUEUE)
+	SET(LIBEVENT_KQUEUE_SOURCES kqueue.c)
+ENDIF()
+
+ADD_LIBRARY(event_share SHARED ${LIBEVENT_CORE_SOURCES} ${LIBEVENT_EXTRA_SOURCES} ${LIBEVENT_SIGNAL_SOURCES} ${LIBEVENT_POLL_SOURCES} ${LIBEVENT_SELECT_SOURCE} ${LIBEVENT_EPOLL_SOURCES} ${LIBEVENT_DEVPOLL_SOURCES} ${LIBEVENT_EVPORT_SOURCES} ${LIBEVENT_KQUEUE_SOURCES})
+
+ADD_LIBRARY(event STATIC ${LIBEVENT_CORE_SOURCES} ${LIBEVENT_EXTRA_SOURCES} ${LIBEVENT_SIGNAL_SOURCES} ${LIBEVENT_POLL_SOURCES} ${LIBEVENT_SELECT_SOURCE} ${LIBEVENT_EPOLL_SOURCES} ${LIBEVENT_DEVPOLL_SOURCES} ${LIBEVENT_EVPORT_SOURCES} ${LIBEVENT_KQUEUE_SOURCES})
+ENDIF()
+
diff --git a/libevent/ChangeLog b/libevent/ChangeLog
new file mode 100644
index 00000000000..2435c1f15d8
--- /dev/null
+++ b/libevent/ChangeLog
@@ -0,0 +1,190 @@
+Changes in 1.4.12-stable:
+ o Try to contain degree of failure when running on a win32 version so heavily firewalled that we can't fake a socketpair.
+ o Fix an obscure timing-dependent, allocator-dependent crash in the evdns code.
+ o Use __VA_ARGS__ syntax for varargs macros in event_rpcgen when compiler is not GCC.
+ o Activate fd events in a pseudorandom order with O(N) backends, so that we don't systematically favor low fds (select) or earlier-added fds (poll, win32).
+ o Fix another pair of fencepost bugs in epoll.c.  [Patch from Adam Langley.]
+ o Do not break evdns connections to nameservers when our IP changes.
+ o Set truncated flag correctly in evdns server replies.
+ o Disable strict aliasing with GCC: our code is not compliant with it.
+
+Changes in 1.4.11-stable:
+ o Fix a bug when removing a timeout from the heap. [Patch from Marko Kreen]
+ o Remove the limit on size of HTTP headers by removing static buffers.
+ o Fix a nasty dangling pointer bug in epoll.c that could occur after epoll_recalc(). [Patch from Kevin Springborn]
+ o Distribute Win32-Code/event-config.h, not ./event-config.h
+
+Changes in 1.4.10-stable:
+ o clean up buffered http connection data on reset; reported by Brian O'Kelley
+ o bug fix and potential race condition in signal handling; from Alexander Drozdov
+ o rename the Solaris event ports backend to evport
+ o support compilation on Haiku
+ o fix signal processing when a signal callback delivers a signal; from Alexander Drozdov
+ o const-ify some arguments to evdns functions.
+ o off-by-one error in epoll_recalc; reported by Victor Goya
+ o include Doxyfile in tar ball; from Jeff Garzik
+ o correctly parse queries with encoded \r, \n or + characters
+
+Changes in 1.4.9-stable:
+ o event_add would not return error for some backends; from Dean McNamee
+ o Clear the timer cache on entering the event loop; reported by Victor Chang
+ o Only bind the socket on connect when a local address has been provided; reported by Alejo Sanchez
+ o Allow setting of local port for evhttp connections to support millions of connections from a single system; from Richard Jones.
+ o Clear the timer cache when leaving the event loop; reported by Robin Haberkorn
+ o Fix a typo in setting the global event base; reported by lance.
+ o Fix a memory leak when reading multi-line headers
+ o Fix a memory leak by not running explicit close detection for server connections
+
+Changes in 1.4.8-stable:
+ o Match the query in DNS replies to the query in the request; from Vsevolod Stakhov.
+ o Fix a merge problem in which name_from_addr returned pointers to the stack; found by Jiang Hong.
+ o Do not remove Accept-Encoding header
+	
+Changes in 1.4.7-stable:
+ o Fix a bug where headers arriving in multiple packets were not parsed; fix from Jiang Hong; test by me.
+	
+Changes in 1.4.6-stable:
+ o evutil.h now includes <stdarg.h> directly
+ o switch all uses of [v]snprintf over to evutil
+ o Correct handling of trailing headers in chunked replies; from Scott Lamb.
+ o Support multi-line HTTP headers; based on a patch from Moshe Litvin
+ o Reject negative Content-Length headers; anonymous bug report
+ o Detect CLOCK_MONOTONIC at runtime for evdns; anonymous bug report	
+ o Fix a bug where deleting signals with the kqueue backend would cause subsequent adds to fail
+ o Support multiple events listening on the same signal; make signals regular events that go on the same event queue; problem report by Alexander Drozdov.
+ o Deal with evbuffer_read() returning -1 on EINTR|EAGAIN; from Adam Langley.
+ o Fix a bug in which the DNS server would incorrectly set the type of a cname reply to a.
+ o Fix a bug where setting the timeout on a bufferevent would take not effect if the event was already pending.
+ o Fix a memory leak when using signals for some event bases; reported by Alexander Drozdov.
+ o Add libevent.vcproj file to distribution to help with Windows build.
+ o Fix a problem with epoll() and reinit; problem report by Alexander Drozdov.	
+ o Fix off-by-one errors in devpoll; from Ian Bell
+ o Make event_add not change any state if it fails; reported by Ian Bell.
+ o Do not warn on accept when errno is either EAGAIN or EINTR
+
+Changes in 1.4.5-stable:
+ o Fix connection keep-alive behavior for HTTP/1.0
+ o Fix use of freed memory in event_reinit; pointed out by Peter Postma
+ o Constify struct timeval * where possible; pointed out by Forest Wilkinson
+ o allow min_heap_erase to be called on removed members; from liusifan.
+ o Rename INPUT and OUTPUT to EVRPC_INPUT and EVRPC_OUTPUT.  Retain INPUT/OUTPUT aliases on on-win32 platforms for backwards compatibility.
+ o Do not use SO_REUSEADDR when connecting
+ o Fix Windows build
+ o Fix a bug in event_rpcgen when generated fixed-sized entries
+
+Changes in 1.4.4-stable:
+ o Correct the documentation on buffer printf functions.
+ o Don't warn on unimplemented epoll_create(): this isn't a problem, just a reason to fall back to poll or select.
+ o Correctly handle timeouts larger than 35 minutes on Linux with epoll.c.  This is probably a kernel defect, but we'll have to support old kernels anyway even if it gets fixed.
+ o Fix a potential stack corruption bug in tagging on 64-bit CPUs.
+ o expose bufferevent_setwatermark via header files and fix high watermark on read
+ o fix a bug in bufferevent read water marks and add a test for them
+ o introduce bufferevent_setcb and bufferevent_setfd to allow better manipulation of bufferevents
+ o use libevent's internal timercmp on all platforms, to avoid bugs on old platforms where timercmp(a,b,<=) is buggy.
+ o reduce system calls for getting current time by caching it.
+ o fix evhttp_bind_socket() so that multiple sockets can be bound by the same http server.
+ o Build test directory correctly with CPPFLAGS set.
+ o Fix build under Visual C++ 2005.
+ o Expose evhttp_accept_socket() API.
+ o Merge windows gettimeofday() replacement into a new evutil_gettimeofday() function.
+ o Fix autoconf script behavior on IRIX.
+ o Make sure winsock2.h include always comes before windows.h include.
+
+Changes in 1.4.3-stable:
+ o include Content-Length in reply for HTTP/1.0 requests with keep-alive
+ o Patch from Tani Hosokawa: make some functions in http.c threadsafe.
+ o Do not free the kqop file descriptor in other processes, also allow it to be 0; from Andrei Nigmatulin
+ o make event_rpcgen.py generate code include event-config.h; reported by Sam Banks.
+ o make event methods static so that they are not exported; from Andrei Nigmatulin
+ o make RPC replies use application/octet-stream as mime type
+ o do not delete uninitialized timeout event in evdns
+
+Changes in 1.4.2-rc:
+ o remove pending timeouts on event_base_free()
+ o also check EAGAIN for Solaris' event ports; from W.C.A. Wijngaards
+ o devpoll and evport need reinit; tested by W.C.A Wijngaards
+ o event_base_get_method; from Springande Ulv
+ o Send CRLF after each chunk in HTTP output, for compliance with RFC2626.  Patch from "propanbutan".  Fixes bug 1894184.
+ o Add a int64_t parsing function, with unit tests, so we can apply Scott Lamb's fix to allow large HTTP values.
+ o Use a 64-bit field to hold HTTP content-lengths.  Patch from Scott Lamb.
+ o Allow regression code to build even without Python installed
+ o remove NDEBUG ifdefs from evdns.c
+ o update documentation of event_loop and event_base_loop; from Tani Hosokawa.
+ o detect integer types properly on platforms without stdint.h
+ o Remove "AM_MAINTAINER_MODE" declaration in configure.in: now makefiles and configure should get re-generated automatically when Makefile.am or configure.in chanes.
+ o do not insert event into list when evsel->add fails
+
+Changes in 1.4.1-beta:
+ o free minheap on event_base_free(); from Christopher Layne
+ o debug cleanups in signal.c; from Christopher Layne
+ o provide event_base_new() that does not set the current_base global
+ o bufferevent_write now uses a const source argument; report from Charles Kerr
+ o better documentation for event_base_loopexit; from Scott Lamb.
+ o Make kqueue have the same behavior as other backends when a signal is caught between event_add() and event_loop().  Previously, it would catch and ignore such signals.
+ o Make kqueue restore signal handlers correctly when event_del() is called.
+ o provide event_reinit() to reintialize an event_base after fork
+ o small improvements to evhttp documentation
+ o always generate Date and Content-Length headers for HTTP/1.1 replies
+ o set the correct event base for HTTP close events
+ o New function, event_{base_}loopbreak.  Like event_loopexit, it makes an event loop stop executing and return.  Unlike event_loopexit, it keeps subsequent pending events from getting executed.  Patch from Scott Lamb
+ o Removed obsoleted recalc code
+ o pull setters/getters out of RPC structures into a base class to which we just need to store a pointer; this reduces the memory footprint of these structures.
+ o fix a bug with event_rpcgen for integers
+ o move EV_PERSIST handling out of the event backends
+ o support for 32-bit tag numbers in rpc structures; this is wire compatible, but changes the API slightly.
+ o prefix {encode,decode}_tag functions with evtag to avoid collisions
+ o Correctly handle DNS replies with no answers set (Fixes bug 1846282)
+ o The configure script now takes an --enable-gcc-warnigns option that turns on many optional gcc warnings.  (Nick has been building with these for a while, but they might be useful to other developers.)
+ o When building with GCC, use the "format" attribute to verify type correctness of calls to printf-like functions.
+ o removed linger from http server socket; reported by Ilya Martynov
+ o allow \r or \n individually to separate HTTP headers instead of the standard "\r\n"; from Charles Kerr.
+ o demote most http warnings to debug messages
+ o Fix Solaris compilation; from Magne Mahre
+ o Add a "Date" header to HTTP responses, as required by HTTP 1.1.
+ o Support specifying the local address of an evhttp_connection using set_local_address
+ o Fix a memory leak in which failed HTTP connections would not free the request object
+ o Make adding of array members in event_rpcgen more efficient, but doubling memory allocation
+ o Fix a memory leak in the DNS server
+ o Fix compilation when DNS_USE_OPENSSL_FOR_ID is enabled
+ o Fix buffer size and string generation in evdns_resolve_reverse_ipv6().
+ o Respond to nonstandard DNS queries with "NOTIMPL" rather than by ignoring them.
+ o In DNS responses, the CD flag should be preserved, not the TC flag.
+ o Fix http.c to compile properly with USE_DEBUG; from Christopher Layne
+ o Handle NULL timeouts correctly on Solaris; from Trond Norbye
+ o Recalculate pending events properly when reallocating event array on Solaris; from Trond Norbye
+ o Add Doxygen documentation to header files; from Mark Heily
+ o Add a evdns_set_transaction_id_fn() function to override the default
+   transaction ID generation code.
+ o Add an evutil module (with header evutil.h) to implement our standard cross-platform hacks, on the theory that somebody else would like to use them too.
+ o Fix signals implementation on windows.
+ o Fix http module on windows to close sockets properly.
+ o Make autogen.sh script run correctly on systems where /bin/sh isn't bash. (Patch from Trond Norbye, rewritten by Hagne Mahre and then Hannah Schroeter.)
+ o Skip calling gettime() in timeout_process if we are not in fact waiting for any events. (Patch from Trond Norbye)
+ o Make test subdirectory compile under mingw.
+ o Fix win32 buffer.c behavior so that it is correct for sockets (which do not like ReadFile and WriteFile).
+ o Make the test.sh script run unit tests for the evpoll method.
+ o Make the entire evdns.h header enclosed in "extern C" as appropriate.
+ o Fix implementation of strsep on platforms that lack it
+ o Fix implementation of getaddrinfo on platforms that lack it; mainly, this will make Windows http.c work better.  Original patch by Lubomir Marinov.
+ o Fix evport implementation: port_disassociate called on unassociated events resulting in bogus errors; more efficient memory management; from Trond Norbye and Prakash Sangappa
+ o support for hooks on rpc input and output; can be used to implement rpc independent processing such as compression or authentication.
+ o use a min heap instead of a red-black tree for timeouts; as a result finding the min is a O(1) operation now; from Maxim Yegorushkin
+ o associate an event base with an rpc pool
+ o added two additional libraries: libevent_core and libevent_extra in addition to the regular libevent.  libevent_core contains only the event core whereas libevent_extra contains dns, http and rpc support
+ o Begin using libtool's library versioning support correctly.  If we don't mess up, this will more or less guarantee binaries linked against old versions of libevent continue working when we make changes to libevent that do not break backward compatibility.
+ o Fix evhttp.h compilation when TAILQ_ENTRY is not defined.
+ o Small code cleanups in epoll_dispatch().
+ o Increase the maximum number of addresses read from a packet in evdns to 32.
+ o Remove support for the rtsig method: it hasn't compiled for a while, and nobody seems to miss it very much.  Let us know if there's a good reason to put it back in.
+ o Rename the "class" field in evdns_server_request to dns_question_class, so that it won't break compilation under C++.  Use a macro so that old code won't break.  Mark the macro as deprecated.
+ o Fix DNS unit tests so that having a DNS server with broken IPv6 support is no longer cause for aborting the unit tests.
+ o Make event_base_free() succeed even if there are pending non-internal events on a base.  This may still leak memory and fds, but at least it no longer crashes.
+ o Post-process the config.h file into a new, installed event-config.h file that we can install, and whose macros will be safe to include in header files.
+ o Remove the long-deprecated acconfig.h file.
+ o Do not require #include <sys/types.h> before #include <event.h>.
+ o Add new evutil_timer* functions to wrap (or replace) the regular timeval manipulation functions.
+ o Fix many build issues when using the Microsoft C compiler.
+ o Remove a bash-ism in autogen.sh
+ o When calling event_del on a signal, restore the signal handler's previous value rather than setting it to SIG_DFL. Patch from Christopher Layne.
+ o Make the logic for active events work better with internal events; patch from Christopher Layne.
+ o We do not need to specially remove a timeout before calling event_del; patch from Christopher Layne.
diff --git a/libevent/Doxyfile b/libevent/Doxyfile
new file mode 100644
index 00000000000..77f6de89b46
--- /dev/null
+++ b/libevent/Doxyfile
@@ -0,0 +1,230 @@
+# Doxyfile 1.5.1
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded 
+# by quotes) that should identify the project.
+
+PROJECT_NAME           = libevent
+
+# Place all output under 'doxygen/'
+
+OUTPUT_DIRECTORY        = doxygen/
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen 
+# will interpret the first line (until the first dot) of a JavaDoc-style 
+# comment as the brief description. If set to NO, the JavaDoc 
+# comments will behave just like the Qt-style comments (thus requiring an 
+# explicit @brief command for a brief description.
+
+JAVADOC_AUTOBRIEF      = YES
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C 
+# sources only. Doxygen will then generate output that is more tailored for C. 
+# For instance, some of the names that are used will be different. The list 
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the 
+# brief documentation of file, namespace and class members alphabetically 
+# by member name. If set to NO (the default) the members will appear in 
+# declaration order.
+
+SORT_BRIEF_DOCS        = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain 
+# documented source files. You may enter file names like "myfile.cpp" or 
+# directories like "/usr/src/myproject". Separate the files or directories 
+# with spaces.
+
+INPUT                  = event.h evdns.h evhttp.h evrpc.h
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will 
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will 
+# generate Latex output.
+
+GENERATE_LATEX         = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be 
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to 
+# generate index for LaTeX. If left blank `makeindex' will be used as the 
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact 
+# LaTeX documents. This may be useful for small projects and may help to 
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used 
+# by the printer. Possible values are: a4, a4wide, letter, legal and 
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE             = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX 
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         = 
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for 
+# the generated latex document. The header should contain everything until 
+# the first chapter. If it is left blank doxygen will generate a 
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           = 
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated 
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will 
+# contain links (just like the HTML output) instead of page references 
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of 
+# plain latex in the generated Makefile. Set this option to YES to get a 
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = NO
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. 
+# command to the generated LaTeX files. This will instruct LaTeX to keep 
+# running if errors occur, instead of asking the user for help. 
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not 
+# include the index chapters (such as File Index, Compound Index, etc.) 
+# in the output.
+
+LATEX_HIDE_INDICES     = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will 
+# generate man pages
+
+GENERATE_MAN           = YES
+
+# The MAN_EXTENSION tag determines the extension that is added to 
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output, 
+# then it will generate one additional man file for each entity 
+# documented in the real man page(s). These additional files 
+# only source the real man page, but without them the man command 
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor   
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will 
+# evaluate all C-preprocessor directives found in the sources and include 
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro 
+# names in the source code. If set to NO (the default) only conditional 
+# compilation will be performed. Macro expansion can be done in a controlled 
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES 
+# then the macro expansion is limited to the macros specified with the 
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files 
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that 
+# contain include files that are not input files but should be processed by 
+# the preprocessor.
+
+INCLUDE_PATH           = 
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard 
+# patterns (like *.h and *.hpp) to filter out the header-files in the 
+# directories. If left blank, the patterns specified with FILE_PATTERNS will 
+# be used.
+
+INCLUDE_FILE_PATTERNS  = 
+
+# The PREDEFINED tag can be used to specify one or more macro names that 
+# are defined before the preprocessor is started (similar to the -D option of 
+# gcc). The argument of the tag is a list of macros of the form: name 
+# or name=definition (no spaces). If the definition and the = are 
+# omitted =1 is assumed. To prevent a macro definition from being 
+# undefined via #undef or recursively expanded use the := operator 
+# instead of the = operator.
+
+PREDEFINED             = TAILQ_ENTRY RB_ENTRY _EVENT_DEFINED_TQENTRY
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then 
+# this tag can be used to specify a list of macro names that should be expanded. 
+# The macro definition that is found in the sources will be used. 
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      = 
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then 
+# doxygen's preprocessor will remove all function-like macros that are alone 
+# on a line, have an all uppercase name, and do not end with a semicolon. Such 
+# function macros are typically used for boiler-plate code, and will confuse 
+# the parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
diff --git a/libevent/Makefile.am b/libevent/Makefile.am
new file mode 100644
index 00000000000..8d9d7520373
--- /dev/null
+++ b/libevent/Makefile.am
@@ -0,0 +1,124 @@
+AUTOMAKE_OPTIONS = foreign no-dependencies
+
+# This is the point release for libevent.  It shouldn't include any
+# a/b/c/d/e notations.
+RELEASE = 1.4
+
+# This is the version info for the libevent binary API.  It has three
+# numbers:
+#   Current  -- the number of the binary API that we're implementing
+#   Revision -- which iteration of the implementation of the binary
+#               API are we supplying?
+#   Age      -- How many previous binary API versions do we also
+#               support?
+#
+# If we release a new version that does not change the binary API,
+# increment Revision.
+#
+# If we release a new version that changes the binary API, but does
+# not break programs compiled against the old binary API, increment
+# Current and Age.  Set Revision to 0, since this is the first
+# implementation of the new API.
+#
+# Otherwise, we're changing the binary API and breaking bakward
+# compatibility with old binaries.  Increment Current.  Set Age to 0,
+# since we're backward compatible with no previous APIs.  Set Revision
+# to 0 too.
+
+# History:
+#  Libevent 1.4.1 was 2:0:0
+#  Libevent 1.4.2 should be 3:0:0
+#  Libevent 1.4.5 is 3:0:1 (we forgot to increment in the past)
+VERSION_INFO = 3:3:1
+
+bin_SCRIPTS = event_rpcgen.py
+
+EXTRA_DIST = autogen.sh event.h event-internal.h log.h evsignal.h evdns.3 \
+	evrpc.h evrpc-internal.h min_heap.h \
+	event.3 \
+	Doxyfile \
+	kqueue.c epoll_sub.c epoll.c select.c poll.c signal.c \
+	evport.c devpoll.c event_rpcgen.py \
+	sample/Makefile.am sample/Makefile.in sample/event-test.c \
+	sample/signal-test.c sample/time-test.c \
+	test/Makefile.am test/Makefile.in test/bench.c test/regress.c \
+	test/test-eof.c test/test-weof.c test/test-time.c \
+	test/test-init.c test/test.sh \
+	compat/sys/queue.h compat/sys/_time.h \
+	WIN32-Code/config.h \
+	WIN32-Code/event-config.h \
+	WIN32-Code/win32.c \
+	WIN32-Code/tree.h \
+	WIN32-Prj/event_test/event_test.dsp \
+	WIN32-Prj/event_test/test.txt WIN32-Prj/libevent.dsp \
+	WIN32-Prj/libevent.dsw WIN32-Prj/signal_test/signal_test.dsp \
+	WIN32-Prj/time_test/time_test.dsp WIN32-Prj/regress/regress.vcproj \
+	WIN32-Prj/libevent.sln WIN32-Prj/libevent.vcproj
+
+lib_LTLIBRARIES = libevent.la libevent_core.la libevent_extra.la
+
+if BUILD_WIN32
+
+SUBDIRS = . sample
+SYS_LIBS = -lws2_32
+SYS_SRC = WIN32-Code/win32.c
+SYS_INCLUDES = -IWIN32-Code
+
+else
+
+SUBDIRS = . sample test
+SYS_LIBS =
+SYS_SRC =
+SYS_INCLUDES =
+
+endif
+
+BUILT_SOURCES = event-config.h
+
+event-config.h: config.h
+	echo '/* event-config.h' > $@
+	echo ' * Generated by autoconf; post-processed by libevent.' >> $@
+	echo ' * Do not edit this file.' >> $@
+	echo ' * Do not rely on macros in this file existing in later versions.'>> $@
+	echo ' */' >> $@
+	echo '#ifndef _EVENT_CONFIG_H_' >> $@
+	echo '#define _EVENT_CONFIG_H_' >> $@
+
+	sed -e 's/#define /#define _EVENT_/' \
+	    -e 's/#undef /#undef _EVENT_/' \
+	    -e 's/#ifndef /#ifndef _EVENT_/' < config.h >> $@
+	echo "#endif" >> $@
+
+CORE_SRC = event.c buffer.c evbuffer.c log.c evutil.c $(SYS_SRC)
+EXTRA_SRC = event_tagging.c http.c evhttp.h http-internal.h evdns.c \
+	evdns.h evrpc.c evrpc.h evrpc-internal.h \
+	strlcpy.c strlcpy-internal.h strlcpy-internal.h
+
+libevent_la_SOURCES = $(CORE_SRC) $(EXTRA_SRC)
+libevent_la_LIBADD = @LTLIBOBJS@ $(SYS_LIBS)
+libevent_la_LDFLAGS = -release $(RELEASE) -version-info $(VERSION_INFO)
+
+libevent_core_la_SOURCES = $(CORE_SRC)
+libevent_core_la_LIBADD = @LTLIBOBJS@ $(SYS_LIBS)
+libevent_core_la_LDFLAGS = -release $(RELEASE) -version-info $(VERSION_INFO)
+
+libevent_extra_la_SOURCES = $(EXTRA_SRC)
+libevent_extra_la_LIBADD = @LTLIBOBJS@ $(SYS_LIBS)
+libevent_extra_la_LDFLAGS = -release $(RELEASE) -version-info $(VERSION_INFO)
+
+include_HEADERS = event.h evhttp.h evdns.h evrpc.h evutil.h
+
+nodist_include_HEADERS = event-config.h
+
+INCLUDES = -I$(srcdir)/compat $(SYS_INCLUDES)
+
+man_MANS = event.3 evdns.3
+
+verify: libevent.la
+	cd test && make verify
+
+doxygen: FORCE
+	doxygen $(srcdir)/Doxyfile
+FORCE:
+
+DISTCLEANFILES = *~ event-config.h
diff --git a/libevent/README b/libevent/README
new file mode 100644
index 00000000000..b0650392ed4
--- /dev/null
+++ b/libevent/README
@@ -0,0 +1,57 @@
+To build libevent, type
+
+$ ./configure && make
+
+     (If you got libevent from the subversion repository, you will
+      first need to run the included "autogen.sh" script in order to
+      generate the configure script.)
+
+Install as root via
+
+# make install
+
+You can run the regression tests by
+
+$ make verify
+
+Before, reporting any problems, please run the regression tests.
+
+To enable the low-level tracing build the library as:
+
+CFLAGS=-DUSE_DEBUG ./configure [...]
+
+Acknowledgements:
+-----------------
+
+The following people have helped with suggestions, ideas, code or
+fixing bugs:
+
+  Alejo
+  Weston Andros Adamson
+  William Ahern
+  Stas Bekman
+  Andrew Danforth
+  Mike Davis
+  Shie Erlich
+  Alexander von Gernler
+  Artur Grabowski
+  Aaron Hopkins
+  Claudio Jeker
+  Scott Lamb
+  Adam Langley
+  Philip Lewis
+  David Libenzi
+  Nick Mathewson
+  Andrey Matveev
+  Richard Nyberg
+  Jon Oberheide
+  Phil Oleson
+  Dave Pacheco
+  Tassilo von Parseval
+  Pierre Phaneuf
+  Jon Poland
+  Bert JW Regeer
+  Dug Song
+  Taral
+
+If I have forgotten your name, please contact me.
diff --git a/libevent/WIN32-Code/event-config.h b/libevent/WIN32-Code/event-config.h
new file mode 100644
index 00000000000..3059080274b
--- /dev/null
+++ b/libevent/WIN32-Code/event-config.h
@@ -0,0 +1,244 @@
+/* event-config.h
+ * Generated by autoconf; post-processed by libevent.
+ * Do not edit this file.
+ * Do not rely on macros in this file existing in later versions.
+ */
+#ifndef _EVENT_CONFIG_H_
+#define _EVENT_CONFIG_H_
+/* config.h.  Generated by configure.  */
+/* config.h.in.  Generated from configure.in by autoheader.  */
+
+/* Define if clock_gettime is available in libc */
+/* #undef _EVENT_DNS_USE_CPU_CLOCK_FOR_ID */
+
+/* Define is no secure id variant is available */
+#define _EVENT_DNS_USE_GETTIMEOFDAY_FOR_ID 1
+
+/* Define to 1 if you have the `clock_gettime' function. */
+/* #undef _EVENT_HAVE_CLOCK_GETTIME */
+
+/* Define if /dev/poll is available */
+/* #undef _EVENT_HAVE_DEVPOLL */
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+/* #undef _EVENT_HAVE_DLFCN_H */
+
+/* Define if your system supports the epoll system calls */
+/* #undef _EVENT_HAVE_EPOLL */
+
+/* Define to 1 if you have the `epoll_ctl' function. */
+/* #undef _EVENT_HAVE_EPOLL_CTL */
+
+/* Define if your system supports event ports */
+/* #undef _EVENT_HAVE_EVENT_PORTS */
+
+/* Define to 1 if you have the `fcntl' function. */
+/* #undef _EVENT_HAVE_FCNTL */
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#define _EVENT_HAVE_FCNTL_H 1
+
+/* Define to 1 if you have the `getaddrinfo' function. */
+/* #undef _EVENT_HAVE_GETADDRINFO */
+
+/* Define to 1 if you have the `getnameinfo' function. */
+/* #undef _EVENT_HAVE_GETNAMEINFO */
+
+/* Define to 1 if you have the `gettimeofday' function. */
+/* #define _EVENT_HAVE_GETTIMEOFDAY 1 */
+
+/* Define to 1 if you have the `inet_ntop' function. */
+/* #undef _EVENT_HAVE_INET_NTOP */
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+/* #undef _EVENT_HAVE_INTTYPES_H 1 */
+
+/* Define to 1 if you have the `kqueue' function. */
+/* #undef _EVENT_HAVE_KQUEUE */
+
+/* Define to 1 if you have the `nsl' library (-lnsl). */
+/* #undef _EVENT_HAVE_LIBNSL */
+
+/* Define to 1 if you have the `resolv' library (-lresolv). */
+/* #undef _EVENT_HAVE_LIBRESOLV */
+
+/* Define to 1 if you have the `rt' library (-lrt). */
+/* #undef _EVENT_HAVE_LIBRT */
+
+/* Define to 1 if you have the `socket' library (-lsocket). */
+/* #undef _EVENT_HAVE_LIBSOCKET */
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define _EVENT_HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the <netinet/in6.h> header file. */
+/* #undef _EVENT_HAVE_NETINET_IN6_H */
+
+/* Define to 1 if you have the `poll' function. */
+/* #undef _EVENT_HAVE_POLL */
+
+/* Define to 1 if you have the <poll.h> header file. */
+/* #undef _EVENT_HAVE_POLL_H */
+
+/* Define to 1 if you have the `port_create' function. */
+/* #undef _EVENT_HAVE_PORT_CREATE */
+
+/* Define to 1 if you have the <port.h> header file. */
+/* #undef _EVENT_HAVE_PORT_H */
+
+/* Define to 1 if you have the `select' function. */
+/* #undef _EVENT_HAVE_SELECT */
+
+/* Define if F_SETFD is defined in <fcntl.h> */
+/* #undef _EVENT_HAVE_SETFD */
+
+/* Define to 1 if you have the `sigaction' function. */
+/* #undef _EVENT_HAVE_SIGACTION */
+
+/* Define to 1 if you have the `signal' function. */
+#define _EVENT_HAVE_SIGNAL 1
+
+/* Define to 1 if you have the <signal.h> header file. */
+#define _EVENT_HAVE_SIGNAL_H 1
+
+/* Define to 1 if you have the <stdarg.h> header file. */
+#define _EVENT_HAVE_STDARG_H 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+/* #define _EVENT_HAVE_STDINT_H 1 */
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define _EVENT_HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define _EVENT_HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define _EVENT_HAVE_STRING_H 1
+
+/* Define to 1 if you have the `strlcpy' function. */
+/* #undef _EVENT_HAVE_STRLCPY */
+
+/* Define to 1 if you have the `strsep' function. */
+/* #undef _EVENT_HAVE_STRSEP */
+
+/* Define to 1 if you have the `strtok_r' function. */
+/* #undef _EVENT_HAVE_STRTOK_R */
+
+/* Define to 1 if the system has the type `struct in6_addr'. */
+#define _EVENT_HAVE_STRUCT_IN6_ADDR 1
+
+/* Define to 1 if you have the <sys/devpoll.h> header file. */
+/* #undef _EVENT_HAVE_SYS_DEVPOLL_H */
+
+/* Define to 1 if you have the <sys/epoll.h> header file. */
+/* #undef _EVENT_HAVE_SYS_EPOLL_H */
+
+/* Define to 1 if you have the <sys/event.h> header file. */
+/* #undef _EVENT_HAVE_SYS_EVENT_H */
+
+/* Define to 1 if you have the <sys/ioctl.h> header file. */
+/* #undef _EVENT_HAVE_SYS_IOCTL_H */
+
+/* Define to 1 if you have the <sys/queue.h> header file. */
+/* #undef _EVENT_HAVE_SYS_QUEUE_H */
+
+/* Define to 1 if you have the <sys/select.h> header file. */
+/* #undef _EVENT_HAVE_SYS_SELECT_H */
+
+/* Define to 1 if you have the <sys/socket.h> header file. */
+/* #undef _EVENT_HAVE_SYS_SOCKET_H */
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define _EVENT_HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+/* #define _EVENT_HAVE_SYS_TIME_H 1 */
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+/* #define _EVENT_HAVE_SYS_TYPES_H 1 */
+
+/* Define if TAILQ_FOREACH is defined in <sys/queue.h> */
+/* #undef _EVENT_HAVE_TAILQFOREACH */
+
+/* Define if timeradd is defined in <sys/time.h> */
+/* #undef _EVENT_HAVE_TIMERADD */
+
+/* Define if timerclear is defined in <sys/time.h> */
+/* #define _EVENT_HAVE_TIMERCLEAR 1 */
+
+/* Define if timercmp is defined in <sys/time.h> */
+#define _EVENT_HAVE_TIMERCMP 1
+
+/* Define if timerisset is defined in <sys/time.h> */
+#define _EVENT_HAVE_TIMERISSET 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+/* #define _EVENT_HAVE_UNISTD_H 1 */
+
+/* Define to 1 if you have the `vasprintf' function. */
+/* #undef _EVENT_HAVE_VASPRINTF */
+
+/* Define if kqueue works correctly with pipes */
+/* #undef _EVENT_HAVE_WORKING_KQUEUE */
+
+/* Name of package */
+#define _EVENT_PACKAGE "libevent"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define _EVENT_PACKAGE_BUGREPORT ""
+
+/* Define to the full name of this package. */
+#define _EVENT_PACKAGE_NAME ""
+
+/* Define to the full name and version of this package. */
+#define _EVENT_PACKAGE_STRING ""
+
+/* Define to the one symbol short name of this package. */
+#define _EVENT_PACKAGE_TARNAME ""
+
+/* Define to the version of this package. */
+#define _EVENT_PACKAGE_VERSION ""
+
+/* Define to 1 if you have the ANSI C header files. */
+#define _EVENT_STDC_HEADERS 1
+
+/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+#define _EVENT_TIME_WITH_SYS_TIME 1
+
+/* Version number of package */
+#define _EVENT_VERSION "1.3.99-trunk"
+
+/* Define to appropriate substitue if compiler doesnt have __func__ */
+/* #undef _EVENT___func__ */
+
+/* Define to empty if `const' does not conform to ANSI C. */
+/* #undef _EVENT_const */
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef _EVENT___cplusplus
+#define _EVENT_inline __inline
+#endif
+
+/* Define to `int' if <sys/types.h> does not define. */
+/* #undef _EVENT_pid_t */
+
+/* Define to `unsigned' if <sys/types.h> does not define. */
+/* #undef _EVENT_size_t */
+
+/* Define to unsigned int if you dont have it */
+#define _EVENT_socklen_t unsigned int
+
+/* Define to `unsigned short' if <sys/types.h> does not define. */
+/* #undef _EVENT_uint16_t */
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+/* #undef _EVENT_uint32_t */
+
+/* Define to `unsigned long long' if <sys/types.h> does not define. */
+/* #undef _EVENT_uint64_t */
+
+/* Define to `unsigned char' if <sys/types.h> does not define. */
+/* #undef _EVENT_uint8_t */
+#endif
diff --git a/libevent/WIN32-Code/misc.c b/libevent/WIN32-Code/misc.c
new file mode 100644
index 00000000000..371e192beae
--- /dev/null
+++ b/libevent/WIN32-Code/misc.c
@@ -0,0 +1,93 @@
+#include <stdio.h>
+#include <string.h>
+#include <windows.h>
+#include <sys/timeb.h>
+#include <time.h>
+
+#ifdef __GNUC__
+/*our prototypes for timeval and timezone are in here, just in case the above
+  headers don't have them*/
+#include "misc.h"
+#endif
+
+/****************************************************************************
+ *
+ * Function: gettimeofday(struct timeval *, struct timezone *)
+ *
+ * Purpose:  Get current time of day.
+ *
+ * Arguments: tv => Place to store the curent time of day.
+ *            tz => Ignored.
+ *
+ * Returns: 0 => Success.
+ *
+ ****************************************************************************/
+
+#ifndef HAVE_GETTIMEOFDAY
+int gettimeofday(struct timeval *tv, struct timezone *tz) {
+	struct _timeb tb;
+
+	if(tv == NULL)
+		return -1;
+
+	_ftime(&tb);
+	tv->tv_sec = (long) tb.time;
+	tv->tv_usec = ((int) tb.millitm) * 1000;
+	return 0;
+}
+#endif
+
+#if 0
+int
+win_read(int fd, void *buf, unsigned int length)
+{
+	DWORD dwBytesRead;
+	int res = ReadFile((HANDLE) fd, buf, length, &dwBytesRead, NULL);
+	if (res == 0) {
+		DWORD error = GetLastError();
+		if (error == ERROR_NO_DATA)
+			return (0);
+		return (-1);
+	} else
+		return (dwBytesRead);
+}
+
+int
+win_write(int fd, void *buf, unsigned int length)
+{
+	DWORD dwBytesWritten;
+	int res = WriteFile((HANDLE) fd, buf, length, &dwBytesWritten, NULL);
+	if (res == 0) {
+		DWORD error = GetLastError();
+		if (error == ERROR_NO_DATA)
+			return (0);
+		return (-1);
+	} else
+		return (dwBytesWritten);
+}
+
+int
+socketpair(int d, int type, int protocol, int *sv)
+{
+	static int count;
+	char buf[64];
+	HANDLE fd;
+	DWORD dwMode;
+	sprintf(buf, "\\\\.\\pipe\\levent-%d", count++);
+	/* Create a duplex pipe which will behave like a socket pair */
+	fd = CreateNamedPipe(buf, PIPE_ACCESS_DUPLEX, PIPE_TYPE_BYTE | PIPE_NOWAIT, 
+		PIPE_UNLIMITED_INSTANCES, 4096, 4096, 0, NULL);
+	if (fd == INVALID_HANDLE_VALUE)
+		return (-1);
+	sv[0] = (int)fd;
+
+	fd = CreateFile(buf, GENERIC_READ|GENERIC_WRITE, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+	if (fd == INVALID_HANDLE_VALUE)
+		return (-1);
+	dwMode = PIPE_NOWAIT;
+	SetNamedPipeHandleState(fd, &dwMode, NULL, NULL);
+	sv[1] = (int)fd;
+
+	return (0);
+}
+#endif
diff --git a/libevent/WIN32-Code/misc.h b/libevent/WIN32-Code/misc.h
new file mode 100644
index 00000000000..aced574687c
--- /dev/null
+++ b/libevent/WIN32-Code/misc.h
@@ -0,0 +1,11 @@
+#ifndef MISC_H
+#define MISC_H
+
+struct timezone;
+struct timeval;
+
+#ifndef HAVE_GETTIMEOFDAY
+int gettimeofday(struct timeval *,struct timezone *);
+#endif
+
+#endif
diff --git a/libevent/WIN32-Code/tree.h b/libevent/WIN32-Code/tree.h
new file mode 100644
index 00000000000..79e8d91f0eb
--- /dev/null
+++ b/libevent/WIN32-Code/tree.h
@@ -0,0 +1,1354 @@
+/*	$OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $	*/
+/*
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	_SYS_TREE_H_
+#define	_SYS_TREE_H_
+
+/*
+ * This file defines data structures for different types of trees:
+ * splay trees and red-black trees.
+ *
+ * A splay tree is a self-organizing data structure.  Every operation
+ * on the tree causes a splay to happen.  The splay moves the requested
+ * node to the root of the tree and partly rebalances it.
+ *
+ * This has the benefit that request locality causes faster lookups as
+ * the requested nodes move to the top of the tree.  On the other hand,
+ * every lookup causes memory writes.
+ *
+ * The Balance Theorem bounds the total access time for m operations
+ * and n inserts on an initially empty tree as O((m + n)lg n).  The
+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute.  It fulfills a set of conditions:
+ *	- every search path from the root to a leaf consists of the
+ *	  same number of black nodes,
+ *	- each red node (except for the root) has a black parent,
+ *	- each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+#define SPLAY_HEAD(name, type)						\
+struct name {								\
+	struct type *sph_root; /* root of the tree */			\
+}
+
+#define SPLAY_INITIALIZER(root)						\
+	{ NULL }
+
+#define SPLAY_INIT(root) do {						\
+	(root)->sph_root = NULL;					\
+} while (0)
+
+#define SPLAY_ENTRY(type)						\
+struct {								\
+	struct type *spe_left; /* left element */			\
+	struct type *spe_right; /* right element */			\
+}
+
+#define SPLAY_LEFT(elm, field)		(elm)->field.spe_left
+#define SPLAY_RIGHT(elm, field)		(elm)->field.spe_right
+#define SPLAY_ROOT(head)		(head)->sph_root
+#define SPLAY_EMPTY(head)		(SPLAY_ROOT(head) == NULL)
+
+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
+#define SPLAY_ROTATE_RIGHT(head, tmp, field) do {			\
+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field);	\
+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
+	(head)->sph_root = tmp;						\
+} while (0)
+	
+#define SPLAY_ROTATE_LEFT(head, tmp, field) do {			\
+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field);	\
+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
+	(head)->sph_root = tmp;						\
+} while (0)
+
+#define SPLAY_LINKLEFT(head, tmp, field) do {				\
+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
+	tmp = (head)->sph_root;						\
+	(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);		\
+} while (0)
+
+#define SPLAY_LINKRIGHT(head, tmp, field) do {				\
+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
+	tmp = (head)->sph_root;						\
+	(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);	\
+} while (0)
+
+#define SPLAY_ASSEMBLE(head, node, left, right, field) do {		\
+	SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field);	\
+	SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field);	\
+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field);	\
+} while (0)
+
+/* Generates prototypes and inline functions */
+
+#define SPLAY_PROTOTYPE(name, type, field, cmp)				\
+void name##_SPLAY(struct name *, struct type *);			\
+void name##_SPLAY_MINMAX(struct name *, int);				\
+struct type *name##_SPLAY_INSERT(struct name *, struct type *);		\
+struct type *name##_SPLAY_REMOVE(struct name *, struct type *);		\
+									\
+/* Finds the node with the same key as elm */				\
+static __inline struct type *						\
+name##_SPLAY_FIND(struct name *head, struct type *elm)			\
+{									\
+	if (SPLAY_EMPTY(head))						\
+		return(NULL);						\
+	name##_SPLAY(head, elm);					\
+	if ((cmp)(elm, (head)->sph_root) == 0)				\
+		return (head->sph_root);				\
+	return (NULL);							\
+}									\
+									\
+static __inline struct type *						\
+name##_SPLAY_NEXT(struct name *head, struct type *elm)			\
+{									\
+	name##_SPLAY(head, elm);					\
+	if (SPLAY_RIGHT(elm, field) != NULL) {				\
+		elm = SPLAY_RIGHT(elm, field);				\
+		while (SPLAY_LEFT(elm, field) != NULL) {		\
+			elm = SPLAY_LEFT(elm, field);			\
+		}							\
+	} else								\
+		elm = NULL;						\
+	return (elm);							\
+}									\
+									\
+static __inline struct type *						\
+name##_SPLAY_MIN_MAX(struct name *head, int val)			\
+{									\
+	name##_SPLAY_MINMAX(head, val);					\
+        return (SPLAY_ROOT(head));					\
+}
+
+/* Main splay operation.
+ * Moves node close to the key of elm to top
+ */
+#define SPLAY_GENERATE(name, type, field, cmp)				\
+struct type *								\
+name##_SPLAY_INSERT(struct name *head, struct type *elm)		\
+{									\
+    if (SPLAY_EMPTY(head)) {						\
+	    SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL;	\
+    } else {								\
+	    int __comp;							\
+	    name##_SPLAY(head, elm);					\
+	    __comp = (cmp)(elm, (head)->sph_root);			\
+	    if(__comp < 0) {						\
+		    SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
+		    SPLAY_RIGHT(elm, field) = (head)->sph_root;		\
+		    SPLAY_LEFT((head)->sph_root, field) = NULL;		\
+	    } else if (__comp > 0) {					\
+		    SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
+		    SPLAY_LEFT(elm, field) = (head)->sph_root;		\
+		    SPLAY_RIGHT((head)->sph_root, field) = NULL;	\
+	    } else							\
+		    return ((head)->sph_root);				\
+    }									\
+    (head)->sph_root = (elm);						\
+    return (NULL);							\
+}									\
+									\
+struct type *								\
+name##_SPLAY_REMOVE(struct name *head, struct type *elm)		\
+{									\
+	struct type *__tmp;						\
+	if (SPLAY_EMPTY(head))						\
+		return (NULL);						\
+	name##_SPLAY(head, elm);					\
+	if ((cmp)(elm, (head)->sph_root) == 0) {			\
+		if (SPLAY_LEFT((head)->sph_root, field) == NULL) {	\
+			(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
+		} else {						\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
+			name##_SPLAY(head, elm);			\
+			SPLAY_RIGHT((head)->sph_root, field) = __tmp;	\
+		}							\
+		return (elm);						\
+	}								\
+	return (NULL);							\
+}									\
+									\
+void									\
+name##_SPLAY(struct name *head, struct type *elm)			\
+{									\
+	struct type __node, *__left, *__right, *__tmp;			\
+	int __comp;							\
+\
+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+	__left = __right = &__node;					\
+\
+	while ((__comp = (cmp)(elm, (head)->sph_root))) {		\
+		if (__comp < 0) {					\
+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if ((cmp)(elm, __tmp) < 0){			\
+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
+				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKLEFT(head, __right, field);		\
+		} else if (__comp > 0) {				\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if ((cmp)(elm, __tmp) > 0){			\
+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
+				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKRIGHT(head, __left, field);		\
+		}							\
+	}								\
+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
+}									\
+									\
+/* Splay with either the minimum or the maximum element			\
+ * Used to find minimum or maximum element in tree.			\
+ */									\
+void name##_SPLAY_MINMAX(struct name *head, int __comp) \
+{									\
+	struct type __node, *__left, *__right, *__tmp;			\
+\
+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+	__left = __right = &__node;					\
+\
+	while (1) {							\
+		if (__comp < 0) {					\
+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if (__comp < 0){				\
+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
+				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKLEFT(head, __right, field);		\
+		} else if (__comp > 0) {				\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if (__comp > 0) {				\
+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
+				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKRIGHT(head, __left, field);		\
+		}							\
+	}								\
+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
+}
+
+#define SPLAY_NEGINF	-1
+#define SPLAY_INF	1
+
+#define SPLAY_INSERT(name, x, y)	name##_SPLAY_INSERT(x, y)
+#define SPLAY_REMOVE(name, x, y)	name##_SPLAY_REMOVE(x, y)
+#define SPLAY_FIND(name, x, y)		name##_SPLAY_FIND(x, y)
+#define SPLAY_NEXT(name, x, y)		name##_SPLAY_NEXT(x, y)
+#define SPLAY_MIN(name, x)		(SPLAY_EMPTY(x) ? NULL	\
+					: name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
+#define SPLAY_MAX(name, x)		(SPLAY_EMPTY(x) ? NULL	\
+					: name##_SPLAY_MIN_MAX(x, SPLAY_INF))
+
+#define SPLAY_FOREACH(x, name, head)					\
+	for ((x) = SPLAY_MIN(name, head);				\
+	     (x) != NULL;						\
+	     (x) = SPLAY_NEXT(name, head, x))
+
+/* Macros that define a red-back tree */
+#define RB_HEAD(name, type)						\
+struct name {								\
+	struct type *rbh_root; /* root of the tree */			\
+}
+
+#define RB_INITIALIZER(root)						\
+	{ NULL }
+
+#define RB_INIT(root) do {						\
+	(root)->rbh_root = NULL;					\
+} while (0)
+
+#define RB_BLACK	0
+#define RB_RED		1
+#define RB_ENTRY(type)							\
+struct {								\
+	struct type *rbe_left;		/* left element */		\
+	struct type *rbe_right;		/* right element */		\
+	struct type *rbe_parent;	/* parent element */		\
+	int rbe_color;			/* node color */		\
+}
+
+#define RB_LEFT(elm, field)		(elm)->field.rbe_left
+#define RB_RIGHT(elm, field)		(elm)->field.rbe_right
+#define RB_PARENT(elm, field)		(elm)->field.rbe_parent
+#define RB_COLOR(elm, field)		(elm)->field.rbe_color
+#define RB_ROOT(head)			(head)->rbh_root
+#define RB_EMPTY(head)			(RB_ROOT(head) == NULL)
+
+#define RB_SET(elm, parent, field) do {					\
+	RB_PARENT(elm, field) = parent;					\
+	RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL;		\
+	RB_COLOR(elm, field) = RB_RED;					\
+} while (0)
+
+#define RB_SET_BLACKRED(black, red, field) do {				\
+	RB_COLOR(black, field) = RB_BLACK;				\
+	RB_COLOR(red, field) = RB_RED;					\
+} while (0)
+
+#ifndef RB_AUGMENT
+#define RB_AUGMENT(x)
+#endif
+
+#define RB_ROTATE_LEFT(head, elm, tmp, field) do {			\
+	(tmp) = RB_RIGHT(elm, field);					\
+	if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field))) {		\
+		RB_PARENT(RB_LEFT(tmp, field), field) = (elm);		\
+	}								\
+	RB_AUGMENT(elm);						\
+	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) {		\
+		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
+			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
+		else							\
+			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
+	} else								\
+		(head)->rbh_root = (tmp);				\
+	RB_LEFT(tmp, field) = (elm);					\
+	RB_PARENT(elm, field) = (tmp);					\
+	RB_AUGMENT(tmp);						\
+	if ((RB_PARENT(tmp, field)))					\
+		RB_AUGMENT(RB_PARENT(tmp, field));			\
+} while (0)
+
+#define RB_ROTATE_RIGHT(head, elm, tmp, field) do {			\
+	(tmp) = RB_LEFT(elm, field);					\
+	if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field))) {		\
+		RB_PARENT(RB_RIGHT(tmp, field), field) = (elm);		\
+	}								\
+	RB_AUGMENT(elm);						\
+	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) {		\
+		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
+			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
+		else							\
+			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
+	} else								\
+		(head)->rbh_root = (tmp);				\
+	RB_RIGHT(tmp, field) = (elm);					\
+	RB_PARENT(elm, field) = (tmp);					\
+	RB_AUGMENT(tmp);						\
+	if ((RB_PARENT(tmp, field)))					\
+		RB_AUGMENT(RB_PARENT(tmp, field));			\
+} while (0)
+
+/* Generates prototypes and inline functions */
+#define RB_PROTOTYPE(name, type, field, cmp)				\
+void name##_RB_INSERT_COLOR(struct name *, struct type *);	\
+void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\
+struct type *name##_RB_REMOVE(struct name *, struct type *);		\
+struct type *name##_RB_INSERT(struct name *, struct type *);		\
+struct type *name##_RB_FIND(struct name *, struct type *);		\
+struct type *name##_RB_NEXT(struct type *);				\
+struct type *name##_RB_MINMAX(struct name *, int);			\
+									\
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define RB_GENERATE(name, type, field, cmp)				\
+void									\
+name##_RB_INSERT_COLOR(struct name *head, struct type *elm)		\
+{									\
+	struct type *parent, *gparent, *tmp;				\
+	while ((parent = RB_PARENT(elm, field)) &&			\
+	    RB_COLOR(parent, field) == RB_RED) {			\
+		gparent = RB_PARENT(parent, field);			\
+		if (parent == RB_LEFT(gparent, field)) {		\
+			tmp = RB_RIGHT(gparent, field);			\
+			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
+				RB_COLOR(tmp, field) = RB_BLACK;	\
+				RB_SET_BLACKRED(parent, gparent, field);\
+				elm = gparent;				\
+				continue;				\
+			}						\
+			if (RB_RIGHT(parent, field) == elm) {		\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				tmp = parent;				\
+				parent = elm;				\
+				elm = tmp;				\
+			}						\
+			RB_SET_BLACKRED(parent, gparent, field);	\
+			RB_ROTATE_RIGHT(head, gparent, tmp, field);	\
+		} else {						\
+			tmp = RB_LEFT(gparent, field);			\
+			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
+				RB_COLOR(tmp, field) = RB_BLACK;	\
+				RB_SET_BLACKRED(parent, gparent, field);\
+				elm = gparent;				\
+				continue;				\
+			}						\
+			if (RB_LEFT(parent, field) == elm) {		\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				tmp = parent;				\
+				parent = elm;				\
+				elm = tmp;				\
+			}						\
+			RB_SET_BLACKRED(parent, gparent, field);	\
+			RB_ROTATE_LEFT(head, gparent, tmp, field);	\
+		}							\
+	}								\
+	RB_COLOR(head->rbh_root, field) = RB_BLACK;			\
+}									\
+									\
+void									\
+name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
+{									\
+	struct type *tmp;						\
+	while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) &&	\
+	    elm != RB_ROOT(head)) {					\
+		if (RB_LEFT(parent, field) == elm) {			\
+			tmp = RB_RIGHT(parent, field);			\
+			if (RB_COLOR(tmp, field) == RB_RED) {		\
+				RB_SET_BLACKRED(tmp, parent, field);	\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				tmp = RB_RIGHT(parent, field);		\
+			}						\
+			if ((RB_LEFT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+			    (RB_RIGHT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+				RB_COLOR(tmp, field) = RB_RED;		\
+				elm = parent;				\
+				parent = RB_PARENT(elm, field);		\
+			} else {					\
+				if (RB_RIGHT(tmp, field) == NULL ||	\
+				    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
+					struct type *oleft;		\
+					if ((oleft = RB_LEFT(tmp, field)))\
+						RB_COLOR(oleft, field) = RB_BLACK;\
+					RB_COLOR(tmp, field) = RB_RED;	\
+					RB_ROTATE_RIGHT(head, tmp, oleft, field);\
+					tmp = RB_RIGHT(parent, field);	\
+				}					\
+				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+				RB_COLOR(parent, field) = RB_BLACK;	\
+				if (RB_RIGHT(tmp, field))		\
+					RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				elm = RB_ROOT(head);			\
+				break;					\
+			}						\
+		} else {						\
+			tmp = RB_LEFT(parent, field);			\
+			if (RB_COLOR(tmp, field) == RB_RED) {		\
+				RB_SET_BLACKRED(tmp, parent, field);	\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				tmp = RB_LEFT(parent, field);		\
+			}						\
+			if ((RB_LEFT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+			    (RB_RIGHT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+				RB_COLOR(tmp, field) = RB_RED;		\
+				elm = parent;				\
+				parent = RB_PARENT(elm, field);		\
+			} else {					\
+				if (RB_LEFT(tmp, field) == NULL ||	\
+				    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
+					struct type *oright;		\
+					if ((oright = RB_RIGHT(tmp, field)))\
+						RB_COLOR(oright, field) = RB_BLACK;\
+					RB_COLOR(tmp, field) = RB_RED;	\
+					RB_ROTATE_LEFT(head, tmp, oright, field);\
+					tmp = RB_LEFT(parent, field);	\
+				}					\
+				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+				RB_COLOR(parent, field) = RB_BLACK;	\
+				if (RB_LEFT(tmp, field))		\
+					RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				elm = RB_ROOT(head);			\
+				break;					\
+			}						\
+		}							\
+	}								\
+	if (elm)							\
+		RB_COLOR(elm, field) = RB_BLACK;			\
+}									\
+									\
+struct type *								\
+name##_RB_REMOVE(struct name *head, struct type *elm)			\
+{									\
+	struct type *child, *parent, *old = elm;			\
+	int color;							\
+	if (RB_LEFT(elm, field) == NULL)				\
+		child = RB_RIGHT(elm, field);				\
+	else if (RB_RIGHT(elm, field) == NULL)				\
+		child = RB_LEFT(elm, field);				\
+	else {								\
+		struct type *left;					\
+		elm = RB_RIGHT(elm, field);				\
+		while ((left = RB_LEFT(elm, field)))			\
+			elm = left;					\
+		child = RB_RIGHT(elm, field);				\
+		parent = RB_PARENT(elm, field);				\
+		color = RB_COLOR(elm, field);				\
+		if (child)						\
+			RB_PARENT(child, field) = parent;		\
+		if (parent) {						\
+			if (RB_LEFT(parent, field) == elm)		\
+				RB_LEFT(parent, field) = child;		\
+			else						\
+				RB_RIGHT(parent, field) = child;	\
+			RB_AUGMENT(parent);				\
+		} else							\
+			RB_ROOT(head) = child;				\
+		if (RB_PARENT(elm, field) == old)			\
+			parent = elm;					\
+		(elm)->field = (old)->field;				\
+		if (RB_PARENT(old, field)) {				\
+			if (RB_LEFT(RB_PARENT(old, field), field) == old)\
+				RB_LEFT(RB_PARENT(old, field), field) = elm;\
+			else						\
+				RB_RIGHT(RB_PARENT(old, field), field) = elm;\
+			RB_AUGMENT(RB_PARENT(old, field));		\
+		} else							\
+			RB_ROOT(head) = elm;				\
+		RB_PARENT(RB_LEFT(old, field), field) = elm;		\
+		if (RB_RIGHT(old, field))				\
+			RB_PARENT(RB_RIGHT(old, field), field) = elm;	\
+		if (parent) {						\
+			left = parent;					\
+			do {						\
+				RB_AUGMENT(left);			\
+			} while ((left = RB_PARENT(left, field)));	\
+		}							\
+		goto color;						\
+	}								\
+	parent = RB_PARENT(elm, field);					\
+	color = RB_COLOR(elm, field);					\
+	if (child)							\
+		RB_PARENT(child, field) = parent;			\
+	if (parent) {							\
+		if (RB_LEFT(parent, field) == elm)			\
+			RB_LEFT(parent, field) = child;			\
+		else							\
+			RB_RIGHT(parent, field) = child;		\
+		RB_AUGMENT(parent);					\
+	} else								\
+		RB_ROOT(head) = child;					\
+color:									\
+	if (color == RB_BLACK)						\
+		name##_RB_REMOVE_COLOR(head, parent, child);		\
+	return (old);							\
+}									\
+									\
+/* Inserts a node into the RB tree */					\
+struct type *								\
+name##_RB_INSERT(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp;						\
+	struct type *parent = NULL;					\
+	int comp = 0;							\
+	tmp = RB_ROOT(head);						\
+	while (tmp) {							\
+		parent = tmp;						\
+		comp = (cmp)(elm, parent);				\
+		if (comp < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	RB_SET(elm, parent, field);					\
+	if (parent != NULL) {						\
+		if (comp < 0)						\
+			RB_LEFT(parent, field) = elm;			\
+		else							\
+			RB_RIGHT(parent, field) = elm;			\
+		RB_AUGMENT(parent);					\
+	} else								\
+		RB_ROOT(head) = elm;					\
+	name##_RB_INSERT_COLOR(head, elm);				\
+	return (NULL);							\
+}									\
+									\
+/* Finds the node with the same key as elm */				\
+struct type *								\
+name##_RB_FIND(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	int comp;							\
+	while (tmp) {							\
+		comp = cmp(elm, tmp);					\
+		if (comp < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	return (NULL);							\
+}									\
+									\
+struct type *								\
+name##_RB_NEXT(struct type *elm)					\
+{									\
+	if (RB_RIGHT(elm, field)) {					\
+		elm = RB_RIGHT(elm, field);				\
+		while (RB_LEFT(elm, field))				\
+			elm = RB_LEFT(elm, field);			\
+	} else {							\
+		if (RB_PARENT(elm, field) &&				\
+		    (elm == RB_LEFT(RB_PARENT(elm, field), field)))	\
+			elm = RB_PARENT(elm, field);			\
+		else {							\
+			while (RB_PARENT(elm, field) &&			\
+			    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
+				elm = RB_PARENT(elm, field);		\
+			elm = RB_PARENT(elm, field);			\
+		}							\
+	}								\
+	return (elm);							\
+}									\
+									\
+struct type *								\
+name##_RB_MINMAX(struct name *head, int val)				\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	struct type *parent = NULL;					\
+	while (tmp) {							\
+		parent = tmp;						\
+		if (val < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else							\
+			tmp = RB_RIGHT(tmp, field);			\
+	}								\
+	return (parent);						\
+}
+
+#define RB_NEGINF	-1
+#define RB_INF	1
+
+#define RB_INSERT(name, x, y)	name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y)	name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y)	name##_RB_FIND(x, y)
+#define RB_NEXT(name, x, y)	name##_RB_NEXT(y)
+#define RB_MIN(name, x)		name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x)		name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head)					\
+	for ((x) = RB_MIN(name, head);					\
+	     (x) != NULL;						\
+	     (x) = name##_RB_NEXT(x))
+
+#endif	/* _SYS_TREE_H_ */
+/*	$OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $	*/
+/*
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	_SYS_TREE_H_
+#define	_SYS_TREE_H_
+
+/*
+ * This file defines data structures for different types of trees:
+ * splay trees and red-black trees.
+ *
+ * A splay tree is a self-organizing data structure.  Every operation
+ * on the tree causes a splay to happen.  The splay moves the requested
+ * node to the root of the tree and partly rebalances it.
+ *
+ * This has the benefit that request locality causes faster lookups as
+ * the requested nodes move to the top of the tree.  On the other hand,
+ * every lookup causes memory writes.
+ *
+ * The Balance Theorem bounds the total access time for m operations
+ * and n inserts on an initially empty tree as O((m + n)lg n).  The
+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute.  It fulfills a set of conditions:
+ *	- every search path from the root to a leaf consists of the
+ *	  same number of black nodes,
+ *	- each red node (except for the root) has a black parent,
+ *	- each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+#define SPLAY_HEAD(name, type)						\
+struct name {								\
+	struct type *sph_root; /* root of the tree */			\
+}
+
+#define SPLAY_INITIALIZER(root)						\
+	{ NULL }
+
+#define SPLAY_INIT(root) do {						\
+	(root)->sph_root = NULL;					\
+} while (0)
+
+#define SPLAY_ENTRY(type)						\
+struct {								\
+	struct type *spe_left; /* left element */			\
+	struct type *spe_right; /* right element */			\
+}
+
+#define SPLAY_LEFT(elm, field)		(elm)->field.spe_left
+#define SPLAY_RIGHT(elm, field)		(elm)->field.spe_right
+#define SPLAY_ROOT(head)		(head)->sph_root
+#define SPLAY_EMPTY(head)		(SPLAY_ROOT(head) == NULL)
+
+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
+#define SPLAY_ROTATE_RIGHT(head, tmp, field) do {			\
+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field);	\
+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
+	(head)->sph_root = tmp;						\
+} while (0)
+	
+#define SPLAY_ROTATE_LEFT(head, tmp, field) do {			\
+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field);	\
+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
+	(head)->sph_root = tmp;						\
+} while (0)
+
+#define SPLAY_LINKLEFT(head, tmp, field) do {				\
+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
+	tmp = (head)->sph_root;						\
+	(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);		\
+} while (0)
+
+#define SPLAY_LINKRIGHT(head, tmp, field) do {				\
+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
+	tmp = (head)->sph_root;						\
+	(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);	\
+} while (0)
+
+#define SPLAY_ASSEMBLE(head, node, left, right, field) do {		\
+	SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field);	\
+	SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field);	\
+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field);	\
+} while (0)
+
+/* Generates prototypes and inline functions */
+
+#define SPLAY_PROTOTYPE(name, type, field, cmp)				\
+void name##_SPLAY(struct name *, struct type *);			\
+void name##_SPLAY_MINMAX(struct name *, int);				\
+struct type *name##_SPLAY_INSERT(struct name *, struct type *);		\
+struct type *name##_SPLAY_REMOVE(struct name *, struct type *);		\
+									\
+/* Finds the node with the same key as elm */				\
+static __inline struct type *						\
+name##_SPLAY_FIND(struct name *head, struct type *elm)			\
+{									\
+	if (SPLAY_EMPTY(head))						\
+		return(NULL);						\
+	name##_SPLAY(head, elm);					\
+	if ((cmp)(elm, (head)->sph_root) == 0)				\
+		return (head->sph_root);				\
+	return (NULL);							\
+}									\
+									\
+static __inline struct type *						\
+name##_SPLAY_NEXT(struct name *head, struct type *elm)			\
+{									\
+	name##_SPLAY(head, elm);					\
+	if (SPLAY_RIGHT(elm, field) != NULL) {				\
+		elm = SPLAY_RIGHT(elm, field);				\
+		while (SPLAY_LEFT(elm, field) != NULL) {		\
+			elm = SPLAY_LEFT(elm, field);			\
+		}							\
+	} else								\
+		elm = NULL;						\
+	return (elm);							\
+}									\
+									\
+static __inline struct type *						\
+name##_SPLAY_MIN_MAX(struct name *head, int val)			\
+{									\
+	name##_SPLAY_MINMAX(head, val);					\
+        return (SPLAY_ROOT(head));					\
+}
+
+/* Main splay operation.
+ * Moves node close to the key of elm to top
+ */
+#define SPLAY_GENERATE(name, type, field, cmp)				\
+struct type *								\
+name##_SPLAY_INSERT(struct name *head, struct type *elm)		\
+{									\
+    if (SPLAY_EMPTY(head)) {						\
+	    SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL;	\
+    } else {								\
+	    int __comp;							\
+	    name##_SPLAY(head, elm);					\
+	    __comp = (cmp)(elm, (head)->sph_root);			\
+	    if(__comp < 0) {						\
+		    SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
+		    SPLAY_RIGHT(elm, field) = (head)->sph_root;		\
+		    SPLAY_LEFT((head)->sph_root, field) = NULL;		\
+	    } else if (__comp > 0) {					\
+		    SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
+		    SPLAY_LEFT(elm, field) = (head)->sph_root;		\
+		    SPLAY_RIGHT((head)->sph_root, field) = NULL;	\
+	    } else							\
+		    return ((head)->sph_root);				\
+    }									\
+    (head)->sph_root = (elm);						\
+    return (NULL);							\
+}									\
+									\
+struct type *								\
+name##_SPLAY_REMOVE(struct name *head, struct type *elm)		\
+{									\
+	struct type *__tmp;						\
+	if (SPLAY_EMPTY(head))						\
+		return (NULL);						\
+	name##_SPLAY(head, elm);					\
+	if ((cmp)(elm, (head)->sph_root) == 0) {			\
+		if (SPLAY_LEFT((head)->sph_root, field) == NULL) {	\
+			(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
+		} else {						\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
+			name##_SPLAY(head, elm);			\
+			SPLAY_RIGHT((head)->sph_root, field) = __tmp;	\
+		}							\
+		return (elm);						\
+	}								\
+	return (NULL);							\
+}									\
+									\
+void									\
+name##_SPLAY(struct name *head, struct type *elm)			\
+{									\
+	struct type __node, *__left, *__right, *__tmp;			\
+	int __comp;							\
+\
+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+	__left = __right = &__node;					\
+\
+	while ((__comp = (cmp)(elm, (head)->sph_root))) {		\
+		if (__comp < 0) {					\
+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if ((cmp)(elm, __tmp) < 0){			\
+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
+				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKLEFT(head, __right, field);		\
+		} else if (__comp > 0) {				\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if ((cmp)(elm, __tmp) > 0){			\
+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
+				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKRIGHT(head, __left, field);		\
+		}							\
+	}								\
+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
+}									\
+									\
+/* Splay with either the minimum or the maximum element			\
+ * Used to find minimum or maximum element in tree.			\
+ */									\
+void name##_SPLAY_MINMAX(struct name *head, int __comp) \
+{									\
+	struct type __node, *__left, *__right, *__tmp;			\
+\
+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+	__left = __right = &__node;					\
+\
+	while (1) {							\
+		if (__comp < 0) {					\
+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if (__comp < 0){				\
+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
+				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKLEFT(head, __right, field);		\
+		} else if (__comp > 0) {				\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if (__comp > 0) {				\
+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
+				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKRIGHT(head, __left, field);		\
+		}							\
+	}								\
+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
+}
+
+#define SPLAY_NEGINF	-1
+#define SPLAY_INF	1
+
+#define SPLAY_INSERT(name, x, y)	name##_SPLAY_INSERT(x, y)
+#define SPLAY_REMOVE(name, x, y)	name##_SPLAY_REMOVE(x, y)
+#define SPLAY_FIND(name, x, y)		name##_SPLAY_FIND(x, y)
+#define SPLAY_NEXT(name, x, y)		name##_SPLAY_NEXT(x, y)
+#define SPLAY_MIN(name, x)		(SPLAY_EMPTY(x) ? NULL	\
+					: name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
+#define SPLAY_MAX(name, x)		(SPLAY_EMPTY(x) ? NULL	\
+					: name##_SPLAY_MIN_MAX(x, SPLAY_INF))
+
+#define SPLAY_FOREACH(x, name, head)					\
+	for ((x) = SPLAY_MIN(name, head);				\
+	     (x) != NULL;						\
+	     (x) = SPLAY_NEXT(name, head, x))
+
+/* Macros that define a red-back tree */
+#define RB_HEAD(name, type)						\
+struct name {								\
+	struct type *rbh_root; /* root of the tree */			\
+}
+
+#define RB_INITIALIZER(root)						\
+	{ NULL }
+
+#define RB_INIT(root) do {						\
+	(root)->rbh_root = NULL;					\
+} while (0)
+
+#define RB_BLACK	0
+#define RB_RED		1
+#define RB_ENTRY(type)							\
+struct {								\
+	struct type *rbe_left;		/* left element */		\
+	struct type *rbe_right;		/* right element */		\
+	struct type *rbe_parent;	/* parent element */		\
+	int rbe_color;			/* node color */		\
+}
+
+#define RB_LEFT(elm, field)		(elm)->field.rbe_left
+#define RB_RIGHT(elm, field)		(elm)->field.rbe_right
+#define RB_PARENT(elm, field)		(elm)->field.rbe_parent
+#define RB_COLOR(elm, field)		(elm)->field.rbe_color
+#define RB_ROOT(head)			(head)->rbh_root
+#define RB_EMPTY(head)			(RB_ROOT(head) == NULL)
+
+#define RB_SET(elm, parent, field) do {					\
+	RB_PARENT(elm, field) = parent;					\
+	RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL;		\
+	RB_COLOR(elm, field) = RB_RED;					\
+} while (0)
+
+#define RB_SET_BLACKRED(black, red, field) do {				\
+	RB_COLOR(black, field) = RB_BLACK;				\
+	RB_COLOR(red, field) = RB_RED;					\
+} while (0)
+
+#ifndef RB_AUGMENT
+#define RB_AUGMENT(x)
+#endif
+
+#define RB_ROTATE_LEFT(head, elm, tmp, field) do {			\
+	(tmp) = RB_RIGHT(elm, field);					\
+	if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field))) {		\
+		RB_PARENT(RB_LEFT(tmp, field), field) = (elm);		\
+	}								\
+	RB_AUGMENT(elm);						\
+	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) {		\
+		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
+			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
+		else							\
+			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
+	} else								\
+		(head)->rbh_root = (tmp);				\
+	RB_LEFT(tmp, field) = (elm);					\
+	RB_PARENT(elm, field) = (tmp);					\
+	RB_AUGMENT(tmp);						\
+	if ((RB_PARENT(tmp, field)))					\
+		RB_AUGMENT(RB_PARENT(tmp, field));			\
+} while (0)
+
+#define RB_ROTATE_RIGHT(head, elm, tmp, field) do {			\
+	(tmp) = RB_LEFT(elm, field);					\
+	if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field))) {		\
+		RB_PARENT(RB_RIGHT(tmp, field), field) = (elm);		\
+	}								\
+	RB_AUGMENT(elm);						\
+	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) {		\
+		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
+			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
+		else							\
+			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
+	} else								\
+		(head)->rbh_root = (tmp);				\
+	RB_RIGHT(tmp, field) = (elm);					\
+	RB_PARENT(elm, field) = (tmp);					\
+	RB_AUGMENT(tmp);						\
+	if ((RB_PARENT(tmp, field)))					\
+		RB_AUGMENT(RB_PARENT(tmp, field));			\
+} while (0)
+
+/* Generates prototypes and inline functions */
+#define RB_PROTOTYPE(name, type, field, cmp)				\
+void name##_RB_INSERT_COLOR(struct name *, struct type *);	\
+void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\
+struct type *name##_RB_REMOVE(struct name *, struct type *);		\
+struct type *name##_RB_INSERT(struct name *, struct type *);		\
+struct type *name##_RB_FIND(struct name *, struct type *);		\
+struct type *name##_RB_NEXT(struct type *);				\
+struct type *name##_RB_MINMAX(struct name *, int);			\
+									\
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define RB_GENERATE(name, type, field, cmp)				\
+void									\
+name##_RB_INSERT_COLOR(struct name *head, struct type *elm)		\
+{									\
+	struct type *parent, *gparent, *tmp;				\
+	while ((parent = RB_PARENT(elm, field)) &&			\
+	    RB_COLOR(parent, field) == RB_RED) {			\
+		gparent = RB_PARENT(parent, field);			\
+		if (parent == RB_LEFT(gparent, field)) {		\
+			tmp = RB_RIGHT(gparent, field);			\
+			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
+				RB_COLOR(tmp, field) = RB_BLACK;	\
+				RB_SET_BLACKRED(parent, gparent, field);\
+				elm = gparent;				\
+				continue;				\
+			}						\
+			if (RB_RIGHT(parent, field) == elm) {		\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				tmp = parent;				\
+				parent = elm;				\
+				elm = tmp;				\
+			}						\
+			RB_SET_BLACKRED(parent, gparent, field);	\
+			RB_ROTATE_RIGHT(head, gparent, tmp, field);	\
+		} else {						\
+			tmp = RB_LEFT(gparent, field);			\
+			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
+				RB_COLOR(tmp, field) = RB_BLACK;	\
+				RB_SET_BLACKRED(parent, gparent, field);\
+				elm = gparent;				\
+				continue;				\
+			}						\
+			if (RB_LEFT(parent, field) == elm) {		\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				tmp = parent;				\
+				parent = elm;				\
+				elm = tmp;				\
+			}						\
+			RB_SET_BLACKRED(parent, gparent, field);	\
+			RB_ROTATE_LEFT(head, gparent, tmp, field);	\
+		}							\
+	}								\
+	RB_COLOR(head->rbh_root, field) = RB_BLACK;			\
+}									\
+									\
+void									\
+name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
+{									\
+	struct type *tmp;						\
+	while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) &&	\
+	    elm != RB_ROOT(head)) {					\
+		if (RB_LEFT(parent, field) == elm) {			\
+			tmp = RB_RIGHT(parent, field);			\
+			if (RB_COLOR(tmp, field) == RB_RED) {		\
+				RB_SET_BLACKRED(tmp, parent, field);	\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				tmp = RB_RIGHT(parent, field);		\
+			}						\
+			if ((RB_LEFT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+			    (RB_RIGHT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+				RB_COLOR(tmp, field) = RB_RED;		\
+				elm = parent;				\
+				parent = RB_PARENT(elm, field);		\
+			} else {					\
+				if (RB_RIGHT(tmp, field) == NULL ||	\
+				    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
+					struct type *oleft;		\
+					if ((oleft = RB_LEFT(tmp, field)))\
+						RB_COLOR(oleft, field) = RB_BLACK;\
+					RB_COLOR(tmp, field) = RB_RED;	\
+					RB_ROTATE_RIGHT(head, tmp, oleft, field);\
+					tmp = RB_RIGHT(parent, field);	\
+				}					\
+				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+				RB_COLOR(parent, field) = RB_BLACK;	\
+				if (RB_RIGHT(tmp, field))		\
+					RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				elm = RB_ROOT(head);			\
+				break;					\
+			}						\
+		} else {						\
+			tmp = RB_LEFT(parent, field);			\
+			if (RB_COLOR(tmp, field) == RB_RED) {		\
+				RB_SET_BLACKRED(tmp, parent, field);	\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				tmp = RB_LEFT(parent, field);		\
+			}						\
+			if ((RB_LEFT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+			    (RB_RIGHT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+				RB_COLOR(tmp, field) = RB_RED;		\
+				elm = parent;				\
+				parent = RB_PARENT(elm, field);		\
+			} else {					\
+				if (RB_LEFT(tmp, field) == NULL ||	\
+				    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
+					struct type *oright;		\
+					if ((oright = RB_RIGHT(tmp, field)))\
+						RB_COLOR(oright, field) = RB_BLACK;\
+					RB_COLOR(tmp, field) = RB_RED;	\
+					RB_ROTATE_LEFT(head, tmp, oright, field);\
+					tmp = RB_LEFT(parent, field);	\
+				}					\
+				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+				RB_COLOR(parent, field) = RB_BLACK;	\
+				if (RB_LEFT(tmp, field))		\
+					RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				elm = RB_ROOT(head);			\
+				break;					\
+			}						\
+		}							\
+	}								\
+	if (elm)							\
+		RB_COLOR(elm, field) = RB_BLACK;			\
+}									\
+									\
+struct type *								\
+name##_RB_REMOVE(struct name *head, struct type *elm)			\
+{									\
+	struct type *child, *parent, *old = elm;			\
+	int color;							\
+	if (RB_LEFT(elm, field) == NULL)				\
+		child = RB_RIGHT(elm, field);				\
+	else if (RB_RIGHT(elm, field) == NULL)				\
+		child = RB_LEFT(elm, field);				\
+	else {								\
+		struct type *left;					\
+		elm = RB_RIGHT(elm, field);				\
+		while ((left = RB_LEFT(elm, field)))			\
+			elm = left;					\
+		child = RB_RIGHT(elm, field);				\
+		parent = RB_PARENT(elm, field);				\
+		color = RB_COLOR(elm, field);				\
+		if (child)						\
+			RB_PARENT(child, field) = parent;		\
+		if (parent) {						\
+			if (RB_LEFT(parent, field) == elm)		\
+				RB_LEFT(parent, field) = child;		\
+			else						\
+				RB_RIGHT(parent, field) = child;	\
+			RB_AUGMENT(parent);				\
+		} else							\
+			RB_ROOT(head) = child;				\
+		if (RB_PARENT(elm, field) == old)			\
+			parent = elm;					\
+		(elm)->field = (old)->field;				\
+		if (RB_PARENT(old, field)) {				\
+			if (RB_LEFT(RB_PARENT(old, field), field) == old)\
+				RB_LEFT(RB_PARENT(old, field), field) = elm;\
+			else						\
+				RB_RIGHT(RB_PARENT(old, field), field) = elm;\
+			RB_AUGMENT(RB_PARENT(old, field));		\
+		} else							\
+			RB_ROOT(head) = elm;				\
+		RB_PARENT(RB_LEFT(old, field), field) = elm;		\
+		if (RB_RIGHT(old, field))				\
+			RB_PARENT(RB_RIGHT(old, field), field) = elm;	\
+		if (parent) {						\
+			left = parent;					\
+			do {						\
+				RB_AUGMENT(left);			\
+			} while ((left = RB_PARENT(left, field)));	\
+		}							\
+		goto color;						\
+	}								\
+	parent = RB_PARENT(elm, field);					\
+	color = RB_COLOR(elm, field);					\
+	if (child)							\
+		RB_PARENT(child, field) = parent;			\
+	if (parent) {							\
+		if (RB_LEFT(parent, field) == elm)			\
+			RB_LEFT(parent, field) = child;			\
+		else							\
+			RB_RIGHT(parent, field) = child;		\
+		RB_AUGMENT(parent);					\
+	} else								\
+		RB_ROOT(head) = child;					\
+color:									\
+	if (color == RB_BLACK)						\
+		name##_RB_REMOVE_COLOR(head, parent, child);		\
+	return (old);							\
+}									\
+									\
+/* Inserts a node into the RB tree */					\
+struct type *								\
+name##_RB_INSERT(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp;						\
+	struct type *parent = NULL;					\
+	int comp = 0;							\
+	tmp = RB_ROOT(head);						\
+	while (tmp) {							\
+		parent = tmp;						\
+		comp = (cmp)(elm, parent);				\
+		if (comp < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	RB_SET(elm, parent, field);					\
+	if (parent != NULL) {						\
+		if (comp < 0)						\
+			RB_LEFT(parent, field) = elm;			\
+		else							\
+			RB_RIGHT(parent, field) = elm;			\
+		RB_AUGMENT(parent);					\
+	} else								\
+		RB_ROOT(head) = elm;					\
+	name##_RB_INSERT_COLOR(head, elm);				\
+	return (NULL);							\
+}									\
+									\
+/* Finds the node with the same key as elm */				\
+struct type *								\
+name##_RB_FIND(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	int comp;							\
+	while (tmp) {							\
+		comp = cmp(elm, tmp);					\
+		if (comp < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	return (NULL);							\
+}									\
+									\
+struct type *								\
+name##_RB_NEXT(struct type *elm)					\
+{									\
+	if (RB_RIGHT(elm, field)) {					\
+		elm = RB_RIGHT(elm, field);				\
+		while (RB_LEFT(elm, field))				\
+			elm = RB_LEFT(elm, field);			\
+	} else {							\
+		if (RB_PARENT(elm, field) &&				\
+		    (elm == RB_LEFT(RB_PARENT(elm, field), field)))	\
+			elm = RB_PARENT(elm, field);			\
+		else {							\
+			while (RB_PARENT(elm, field) &&			\
+			    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
+				elm = RB_PARENT(elm, field);		\
+			elm = RB_PARENT(elm, field);			\
+		}							\
+	}								\
+	return (elm);							\
+}									\
+									\
+struct type *								\
+name##_RB_MINMAX(struct name *head, int val)				\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	struct type *parent = NULL;					\
+	while (tmp) {							\
+		parent = tmp;						\
+		if (val < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else							\
+			tmp = RB_RIGHT(tmp, field);			\
+	}								\
+	return (parent);						\
+}
+
+#define RB_NEGINF	-1
+#define RB_INF	1
+
+#define RB_INSERT(name, x, y)	name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y)	name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y)	name##_RB_FIND(x, y)
+#define RB_NEXT(name, x, y)	name##_RB_NEXT(y)
+#define RB_MIN(name, x)		name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x)		name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head)					\
+	for ((x) = RB_MIN(name, head);					\
+	     (x) != NULL;						\
+	     (x) = name##_RB_NEXT(x))
+
+#endif	/* _SYS_TREE_H_ */
diff --git a/libevent/WIN32-Code/win32.c b/libevent/WIN32-Code/win32.c
new file mode 100644
index 00000000000..8a603b7eceb
--- /dev/null
+++ b/libevent/WIN32-Code/win32.c
@@ -0,0 +1,486 @@
+/*
+ * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
+ * Copyright 2003 Michael A. Davis <mike@datanerds.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef _MSC_VER
+#include "./config.h"
+#else
+/* Avoid the windows/msvc thing. */
+#include "../config.h"
+#endif
+
+#include <winsock2.h>
+#include <windows.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+
+#define RB_AUGMENT(x) (void)(x)
+#include "./tree.h"
+#include "log.h"
+#include "event.h"
+#include "event-internal.h"
+
+#define XFREE(ptr) do { if (ptr) free(ptr); } while(0)
+
+extern struct event_list timequeue;
+extern struct event_list addqueue;
+#if 0
+extern struct event_list signalqueue;
+#endif
+
+struct win_fd_set {
+	u_int fd_count;
+	SOCKET fd_array[1];
+};
+
+int evsigcaught[NSIG];
+volatile sig_atomic_t signal_caught = 0;
+/* MSDN says this is required to handle SIGFPE */
+volatile double SIGFPE_REQ = 0.0f;
+
+#if 0
+static void signal_handler(int sig);
+
+void signal_process(void);
+int signal_recalc(void);
+#endif
+
+struct event_entry {
+	RB_ENTRY(event_entry) node;
+	SOCKET sock;
+	int read_pos;
+	int write_pos;
+	struct event *read_event;
+	struct event *write_event;
+};
+
+static int
+compare(struct event_entry *a, struct event_entry *b)
+{
+	if (a->sock < b->sock)
+		return -1;
+	else if (a->sock > b->sock)
+		return 1;
+	else
+		return 0;
+}
+
+struct win32op {
+	int fd_setsz;
+	struct win_fd_set *readset_in;
+	struct win_fd_set *writeset_in;
+	struct win_fd_set *readset_out;
+	struct win_fd_set *writeset_out;
+	struct win_fd_set *exset_out;
+	RB_HEAD(event_map, event_entry) event_root;
+
+	unsigned signals_are_broken : 1;
+};
+
+RB_PROTOTYPE(event_map, event_entry, node, compare);
+RB_GENERATE(event_map, event_entry, node, compare);
+
+void *win32_init	(struct event_base *);
+int win32_insert	(void *, struct event *);
+int win32_del	(void *, struct event *);
+int win32_dispatch	(struct event_base *base, void *, struct timeval *);
+void win32_dealloc	(struct event_base *, void *);
+
+struct eventop win32ops = {
+	"win32",
+	win32_init,
+	win32_insert,
+	win32_del,
+	win32_dispatch,
+	win32_dealloc,
+	0
+};
+
+#define FD_SET_ALLOC_SIZE(n) ((sizeof(struct win_fd_set) + ((n)-1)*sizeof(SOCKET)))
+
+static int
+realloc_fd_sets(struct win32op *op, size_t new_size)
+{
+	size_t size;
+
+	assert(new_size >= op->readset_in->fd_count &&
+	       new_size >= op->writeset_in->fd_count);
+	assert(new_size >= 1);
+
+	size = FD_SET_ALLOC_SIZE(new_size);
+	if (!(op->readset_in = realloc(op->readset_in, size)))
+		return (-1);
+	if (!(op->writeset_in = realloc(op->writeset_in, size)))
+		return (-1);
+	if (!(op->readset_out = realloc(op->readset_out, size)))
+		return (-1);
+	if (!(op->exset_out = realloc(op->exset_out, size)))
+		return (-1);
+	if (!(op->writeset_out = realloc(op->writeset_out, size)))
+		return (-1);
+	op->fd_setsz = new_size;
+	return (0);
+}
+
+static int
+timeval_to_ms(struct timeval *tv)
+{
+	return ((tv->tv_sec * 1000) + (tv->tv_usec / 1000));
+}
+
+static struct event_entry*
+get_event_entry(struct win32op *op, SOCKET s, int create)
+{
+	struct event_entry key, *val;
+	key.sock = s;
+	val = RB_FIND(event_map, &op->event_root, &key);
+	if (val || !create)
+		return val;
+	if (!(val = calloc(1, sizeof(struct event_entry)))) {
+		event_warn("%s: calloc", __func__);
+		return NULL;
+	}
+	val->sock = s;
+	val->read_pos = val->write_pos = -1;
+	RB_INSERT(event_map, &op->event_root, val);
+	return val;
+}
+
+static int
+do_fd_set(struct win32op *op, struct event_entry *ent, int read)
+{
+	SOCKET s = ent->sock;
+	struct win_fd_set *set = read ? op->readset_in : op->writeset_in;
+	if (read) {
+		if (ent->read_pos >= 0)
+			return (0);
+	} else {
+		if (ent->write_pos >= 0)
+			return (0);
+	}
+	if (set->fd_count == op->fd_setsz) {
+		if (realloc_fd_sets(op, op->fd_setsz*2))
+			return (-1);
+		/* set pointer will have changed and needs reiniting! */
+		set = read ? op->readset_in : op->writeset_in;
+	}
+	set->fd_array[set->fd_count] = s;
+	if (read)
+		ent->read_pos = set->fd_count;
+	else
+		ent->write_pos = set->fd_count;
+	return (set->fd_count++);
+}
+
+static int
+do_fd_clear(struct win32op *op, struct event_entry *ent, int read)
+{
+	int i;
+	struct win_fd_set *set = read ? op->readset_in : op->writeset_in;
+	if (read) {
+		i = ent->read_pos;
+		ent->read_pos = -1;
+	} else {
+		i = ent->write_pos;
+		ent->write_pos = -1;
+	}
+	if (i < 0)
+		return (0);
+	if (--set->fd_count != i) {
+		struct event_entry *ent2;
+		SOCKET s2;
+		s2 = set->fd_array[i] = set->fd_array[set->fd_count];
+		ent2 = get_event_entry(op, s2, 0);
+		if (!ent) /* This indicates a bug. */
+			return (0);
+		if (read)
+			ent2->read_pos = i;
+		else
+			ent2->write_pos = i;
+	}
+	return (0);
+}
+
+#define NEVENT 64
+void *
+win32_init(struct event_base *_base)
+{
+	struct win32op *winop;
+	size_t size;
+	if (!(winop = calloc(1, sizeof(struct win32op))))
+		return NULL;
+	winop->fd_setsz = NEVENT;
+	size = FD_SET_ALLOC_SIZE(NEVENT);
+	if (!(winop->readset_in = malloc(size)))
+		goto err;
+	if (!(winop->writeset_in = malloc(size)))
+		goto err;
+	if (!(winop->readset_out = malloc(size)))
+		goto err;
+	if (!(winop->writeset_out = malloc(size)))
+		goto err;
+	if (!(winop->exset_out = malloc(size)))
+		goto err;
+	RB_INIT(&winop->event_root);
+	winop->readset_in->fd_count = winop->writeset_in->fd_count = 0;
+	winop->readset_out->fd_count = winop->writeset_out->fd_count
+		= winop->exset_out->fd_count = 0;
+
+	if (evsignal_init(_base) < 0)
+		winop->signals_are_broken = 1;
+
+	return (winop);
+ err:
+        XFREE(winop->readset_in);
+        XFREE(winop->writeset_in);
+        XFREE(winop->readset_out);
+        XFREE(winop->writeset_out);
+        XFREE(winop->exset_out);
+        XFREE(winop);
+        return (NULL);
+}
+
+int
+win32_insert(void *op, struct event *ev)
+{
+	struct win32op *win32op = op;
+	struct event_entry *ent;
+
+	if (ev->ev_events & EV_SIGNAL) {
+		if (win32op->signals_are_broken)
+			return (-1);
+		return (evsignal_add(ev));
+	}
+	if (!(ev->ev_events & (EV_READ|EV_WRITE)))
+		return (0);
+	ent = get_event_entry(win32op, ev->ev_fd, 1);
+	if (!ent)
+		return (-1); /* out of memory */
+
+	event_debug(("%s: adding event for %d", __func__, (int)ev->ev_fd));
+	if (ev->ev_events & EV_READ) {
+		if (do_fd_set(win32op, ent, 1)<0)
+			return (-1);
+		ent->read_event = ev;
+	}
+	if (ev->ev_events & EV_WRITE) {
+		if (do_fd_set(win32op, ent, 0)<0)
+			return (-1);
+		ent->write_event = ev;
+	}
+	return (0);
+}
+
+int
+win32_del(void *op, struct event *ev)
+{
+	struct win32op *win32op = op;
+	struct event_entry *ent;
+
+	if (ev->ev_events & EV_SIGNAL)
+		return (evsignal_del(ev));
+
+	if (!(ent = get_event_entry(win32op, ev->ev_fd, 0)))
+		return (-1);
+	event_debug(("%s: Removing event for %d", __func__, ev->ev_fd));
+	if (ev == ent->read_event) {
+		do_fd_clear(win32op, ent, 1);
+		ent->read_event = NULL;
+	}
+	if (ev == ent->write_event) {
+		do_fd_clear(win32op, ent, 0);
+		ent->write_event = NULL;
+	}
+	if (!ent->read_event && !ent->write_event) {
+		RB_REMOVE(event_map, &win32op->event_root, ent);
+		free(ent);
+	}
+
+	return 0;
+}
+
+static void
+fd_set_copy(struct win_fd_set *out, const struct win_fd_set *in)
+{
+	out->fd_count = in->fd_count;
+	memcpy(out->fd_array, in->fd_array, in->fd_count * (sizeof(SOCKET)));
+}
+
+/*
+  static void dump_fd_set(struct win_fd_set *s)
+  {
+  unsigned int i;
+  printf("[ ");
+  for(i=0;i<s->fd_count;++i)
+  printf("%d ",(int)s->fd_array[i]);
+  printf("]\n");
+  }
+*/
+
+int
+win32_dispatch(struct event_base *base, void *op,
+	       struct timeval *tv)
+{
+	struct win32op *win32op = op;
+	int res = 0;
+	unsigned j, i;
+	int fd_count;
+	SOCKET s;
+	struct event_entry *ent;
+
+	fd_set_copy(win32op->readset_out, win32op->readset_in);
+	fd_set_copy(win32op->exset_out, win32op->readset_in);
+	fd_set_copy(win32op->writeset_out, win32op->writeset_in);
+
+	fd_count =
+           (win32op->readset_out->fd_count > win32op->writeset_out->fd_count) ?
+	    win32op->readset_out->fd_count : win32op->writeset_out->fd_count;
+
+	if (!fd_count) {
+		/* Windows doesn't like you to call select() with no sockets */
+		Sleep(timeval_to_ms(tv));
+		evsignal_process(base);
+		return (0);
+	}
+
+	res = select(fd_count,
+		     (struct fd_set*)win32op->readset_out,
+		     (struct fd_set*)win32op->writeset_out,
+		     (struct fd_set*)win32op->exset_out, tv);
+
+	event_debug(("%s: select returned %d", __func__, res));
+
+	if(res <= 0) {
+		evsignal_process(base);
+		return res;
+	} else if (base->sig.evsignal_caught) {
+		evsignal_process(base);
+	}
+
+	if (win32op->readset_out->fd_count) {
+		i = rand() % win32op->readset_out->fd_count;
+		for (j=0; j<win32op->readset_out->fd_count; ++j) {
+			if (++i >= win32op->readset_out->fd_count)
+				i = 0;
+			s = win32op->readset_out->fd_array[i];
+			if ((ent = get_event_entry(win32op, s, 0)) && ent->read_event)
+				event_active(ent->read_event, EV_READ, 1);
+		}
+	}
+	if (win32op->exset_out->fd_count) {
+		i = rand() % win32op->exset_out->fd_count;
+		for (j=0; j<win32op->exset_out->fd_count; ++j) {
+			if (++i >= win32op->exset_out->fd_count)
+				i = 0;
+			s = win32op->exset_out->fd_array[i];
+			if ((ent = get_event_entry(win32op, s, 0)) && ent->read_event)
+				event_active(ent->read_event, EV_READ, 1);
+		}
+	}
+	if (win32op->writeset_out->fd_count) {
+		i = rand() % win32op->writeset_out->fd_count;
+		for (j=0; j<win32op->writeset_out->fd_count; ++j) {
+			if (++i >= win32op->exset_out->fd_count)
+				i = 0;
+			s = win32op->writeset_out->fd_array[i];
+			if ((ent = get_event_entry(win32op, s, 0)) && ent->write_event)
+				event_active(ent->write_event, EV_WRITE, 1);
+
+		}
+	}
+
+	return (0);
+}
+
+void
+win32_dealloc(struct event_base *_base, void *arg)
+{
+	struct win32op *win32op = arg;
+
+	evsignal_dealloc(_base);
+	if (win32op->readset_in)
+		free(win32op->readset_in);
+	if (win32op->writeset_in)
+		free(win32op->writeset_in);
+	if (win32op->readset_out)
+		free(win32op->readset_out);
+	if (win32op->writeset_out)
+		free(win32op->writeset_out);
+	if (win32op->exset_out)
+		free(win32op->exset_out);
+	/* XXXXX free the tree. */
+
+	memset(win32op, 0, sizeof(win32op));
+	free(win32op);
+}
+
+#if 0
+static void
+signal_handler(int sig)
+{
+	evsigcaught[sig]++;
+	signal_caught = 1;
+}
+
+int
+signal_recalc(void)
+{
+	struct event *ev;
+
+	/* Reinstall our signal handler. */
+	TAILQ_FOREACH(ev, &signalqueue, ev_signal_next) {
+		if((int)signal(EVENT_SIGNAL(ev), signal_handler) == -1)
+			return (-1);
+	}
+	return (0);
+}
+
+void
+signal_process(void)
+{
+	struct event *ev;
+	short ncalls;
+
+	TAILQ_FOREACH(ev, &signalqueue, ev_signal_next) {
+		ncalls = evsigcaught[EVENT_SIGNAL(ev)];
+		if (ncalls) {
+			if (!(ev->ev_events & EV_PERSIST))
+				event_del(ev);
+			event_active(ev, EV_SIGNAL, ncalls);
+		}
+	}
+
+	memset(evsigcaught, 0, sizeof(evsigcaught));
+	signal_caught = 0;
+}
+#endif
+
diff --git a/libevent/WIN32-Prj/libevent.dsw b/libevent/WIN32-Prj/libevent.dsw
new file mode 100644
index 00000000000..fb05451ca25
--- /dev/null
+++ b/libevent/WIN32-Prj/libevent.dsw
@@ -0,0 +1,74 @@
+Microsoft Developer Studio Workspace File, Format Version 6.00
+# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
+
+###############################################################################
+
+Project: "event_test"=".\event_test\event_test.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+    Begin Project Dependency
+    Project_Dep_Name libevent
+    End Project Dependency
+}}}
+
+###############################################################################
+
+Project: "libevent"=".\libevent.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+}}}
+
+###############################################################################
+
+Project: "signal_test"=".\signal_test\signal_test.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+    Begin Project Dependency
+    Project_Dep_Name libevent
+    End Project Dependency
+}}}
+
+###############################################################################
+
+Project: "time_test"=".\time_test\time_test.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+    Begin Project Dependency
+    Project_Dep_Name libevent
+    End Project Dependency
+}}}
+
+###############################################################################
+
+Global:
+
+Package=<5>
+{{{
+}}}
+
+Package=<3>
+{{{
+}}}
+
+###############################################################################
+
diff --git a/libevent/WIN32-Prj/libevent.sln b/libevent/WIN32-Prj/libevent.sln
new file mode 100644
index 00000000000..17e0c98bae6
--- /dev/null
+++ b/libevent/WIN32-Prj/libevent.sln
@@ -0,0 +1,53 @@
+
+Microsoft Visual Studio Solution File, Format Version 9.00
+# Visual Studio 2005
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "event_test", "event_test\event_test.vcproj", "{52099A8B-455B-4BE9-8E61-A3D6E8A4338D}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libevent", "libevent.vcproj", "{B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "signal_test", "signal_test\signal_test.vcproj", "{768DB9DD-2694-4274-89B8-74106E8F7786}"
+	ProjectSection(ProjectDependencies) = postProject
+		{B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9} = {B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "time_test", "time_test\time_test.vcproj", "{D4BE29FB-E45C-4177-9647-74BBAFDC1257}"
+	ProjectSection(ProjectDependencies) = postProject
+		{B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9} = {B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "regress", "regress\regress.vcproj", "{F7C26008-6066-4AD3-8543-452EFE58BD2E}"
+	ProjectSection(ProjectDependencies) = postProject
+		{B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9} = {B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}
+	EndProjectSection
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Release|Win32 = Release|Win32
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{52099A8B-455B-4BE9-8E61-A3D6E8A4338D}.Debug|Win32.ActiveCfg = Debug|Win32
+		{52099A8B-455B-4BE9-8E61-A3D6E8A4338D}.Debug|Win32.Build.0 = Debug|Win32
+		{52099A8B-455B-4BE9-8E61-A3D6E8A4338D}.Release|Win32.ActiveCfg = Release|Win32
+		{52099A8B-455B-4BE9-8E61-A3D6E8A4338D}.Release|Win32.Build.0 = Release|Win32
+		{B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}.Debug|Win32.ActiveCfg = Debug|Win32
+		{B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}.Debug|Win32.Build.0 = Debug|Win32
+		{B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}.Release|Win32.ActiveCfg = Release|Win32
+		{B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}.Release|Win32.Build.0 = Release|Win32
+		{768DB9DD-2694-4274-89B8-74106E8F7786}.Debug|Win32.ActiveCfg = Debug|Win32
+		{768DB9DD-2694-4274-89B8-74106E8F7786}.Debug|Win32.Build.0 = Debug|Win32
+		{768DB9DD-2694-4274-89B8-74106E8F7786}.Release|Win32.ActiveCfg = Release|Win32
+		{768DB9DD-2694-4274-89B8-74106E8F7786}.Release|Win32.Build.0 = Release|Win32
+		{D4BE29FB-E45C-4177-9647-74BBAFDC1257}.Debug|Win32.ActiveCfg = Debug|Win32
+		{D4BE29FB-E45C-4177-9647-74BBAFDC1257}.Debug|Win32.Build.0 = Debug|Win32
+		{D4BE29FB-E45C-4177-9647-74BBAFDC1257}.Release|Win32.ActiveCfg = Release|Win32
+		{D4BE29FB-E45C-4177-9647-74BBAFDC1257}.Release|Win32.Build.0 = Release|Win32
+		{F7C26008-6066-4AD3-8543-452EFE58BD2E}.Debug|Win32.ActiveCfg = Debug|Win32
+		{F7C26008-6066-4AD3-8543-452EFE58BD2E}.Debug|Win32.Build.0 = Debug|Win32
+		{F7C26008-6066-4AD3-8543-452EFE58BD2E}.Release|Win32.ActiveCfg = Release|Win32
+		{F7C26008-6066-4AD3-8543-452EFE58BD2E}.Release|Win32.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/libevent/autogen.sh b/libevent/autogen.sh
new file mode 100644
index 00000000000..6d4275a6392
--- /dev/null
+++ b/libevent/autogen.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+LIBTOOLIZE=libtoolize
+SYSNAME=`uname`
+if [ "x$SYSNAME" = "xDarwin" ] ; then
+  LIBTOOLIZE=glibtoolize
+fi
+aclocal && \
+	autoheader && \
+	$LIBTOOLIZE && \
+	autoconf && \
+	automake --add-missing --copy
diff --git a/libevent/buffer.c b/libevent/buffer.c
new file mode 100644
index 00000000000..9cb0f0ce323
--- /dev/null
+++ b/libevent/buffer.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright (c) 2002, 2003 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef WIN32
+#include <winsock2.h>
+#include <windows.h>
+#endif
+
+#ifdef HAVE_VASPRINTF
+/* If we have vasprintf, we need to define this before we include stdio.h. */
+#define _GNU_SOURCE
+#endif
+
+#include <sys/types.h>
+
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+
+#ifdef HAVE_SYS_IOCTL_H
+#include <sys/ioctl.h>
+#endif
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_STDARG_H
+#include <stdarg.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "event.h"
+#include "config.h"
+#include "evutil.h"
+
+struct evbuffer *
+evbuffer_new(void)
+{
+	struct evbuffer *buffer;
+	
+	buffer = calloc(1, sizeof(struct evbuffer));
+
+	return (buffer);
+}
+
+void
+evbuffer_free(struct evbuffer *buffer)
+{
+	if (buffer->orig_buffer != NULL)
+		free(buffer->orig_buffer);
+	free(buffer);
+}
+
+/* 
+ * This is a destructive add.  The data from one buffer moves into
+ * the other buffer.
+ */
+
+#define SWAP(x,y) do { \
+	(x)->buffer = (y)->buffer; \
+	(x)->orig_buffer = (y)->orig_buffer; \
+	(x)->misalign = (y)->misalign; \
+	(x)->totallen = (y)->totallen; \
+	(x)->off = (y)->off; \
+} while (0)
+
+int
+evbuffer_add_buffer(struct evbuffer *outbuf, struct evbuffer *inbuf)
+{
+	int res;
+
+	/* Short cut for better performance */
+	if (outbuf->off == 0) {
+		struct evbuffer tmp;
+		size_t oldoff = inbuf->off;
+
+		/* Swap them directly */
+		SWAP(&tmp, outbuf);
+		SWAP(outbuf, inbuf);
+		SWAP(inbuf, &tmp);
+
+		/* 
+		 * Optimization comes with a price; we need to notify the
+		 * buffer if necessary of the changes. oldoff is the amount
+		 * of data that we transfered from inbuf to outbuf
+		 */
+		if (inbuf->off != oldoff && inbuf->cb != NULL)
+			(*inbuf->cb)(inbuf, oldoff, inbuf->off, inbuf->cbarg);
+		if (oldoff && outbuf->cb != NULL)
+			(*outbuf->cb)(outbuf, 0, oldoff, outbuf->cbarg);
+		
+		return (0);
+	}
+
+	res = evbuffer_add(outbuf, inbuf->buffer, inbuf->off);
+	if (res == 0) {
+		/* We drain the input buffer on success */
+		evbuffer_drain(inbuf, inbuf->off);
+	}
+
+	return (res);
+}
+
+int
+evbuffer_add_vprintf(struct evbuffer *buf, const char *fmt, va_list ap)
+{
+	char *buffer;
+	size_t space;
+	size_t oldoff = buf->off;
+	int sz;
+	va_list aq;
+
+	/* make sure that at least some space is available */
+	evbuffer_expand(buf, 64);
+	for (;;) {
+		size_t used = buf->misalign + buf->off;
+		buffer = (char *)buf->buffer + buf->off;
+		assert(buf->totallen >= used);
+		space = buf->totallen - used;
+
+#ifndef va_copy
+#define	va_copy(dst, src)	memcpy(&(dst), &(src), sizeof(va_list))
+#endif
+		va_copy(aq, ap);
+
+		sz = evutil_vsnprintf(buffer, space, fmt, aq);
+
+		va_end(aq);
+
+		if (sz < 0)
+			return (-1);
+		if ((size_t)sz < space) {
+			buf->off += sz;
+			if (buf->cb != NULL)
+				(*buf->cb)(buf, oldoff, buf->off, buf->cbarg);
+			return (sz);
+		}
+		if (evbuffer_expand(buf, sz + 1) == -1)
+			return (-1);
+
+	}
+	/* NOTREACHED */
+}
+
+int
+evbuffer_add_printf(struct evbuffer *buf, const char *fmt, ...)
+{
+	int res = -1;
+	va_list ap;
+
+	va_start(ap, fmt);
+	res = evbuffer_add_vprintf(buf, fmt, ap);
+	va_end(ap);
+
+	return (res);
+}
+
+/* Reads data from an event buffer and drains the bytes read */
+
+int
+evbuffer_remove(struct evbuffer *buf, void *data, size_t datlen)
+{
+	size_t nread = datlen;
+	if (nread >= buf->off)
+		nread = buf->off;
+
+	memcpy(data, buf->buffer, nread);
+	evbuffer_drain(buf, nread);
+	
+	return (nread);
+}
+
+/*
+ * Reads a line terminated by either '\r\n', '\n\r' or '\r' or '\n'.
+ * The returned buffer needs to be freed by the called.
+ */
+
+char *
+evbuffer_readline(struct evbuffer *buffer)
+{
+	u_char *data = EVBUFFER_DATA(buffer);
+	size_t len = EVBUFFER_LENGTH(buffer);
+	char *line;
+	unsigned int i;
+
+	for (i = 0; i < len; i++) {
+		if (data[i] == '\r' || data[i] == '\n')
+			break;
+	}
+
+	if (i == len)
+		return (NULL);
+
+	if ((line = malloc(i + 1)) == NULL) {
+		fprintf(stderr, "%s: out of memory\n", __func__);
+		evbuffer_drain(buffer, i);
+		return (NULL);
+	}
+
+	memcpy(line, data, i);
+	line[i] = '\0';
+
+	/*
+	 * Some protocols terminate a line with '\r\n', so check for
+	 * that, too.
+	 */
+	if ( i < len - 1 ) {
+		char fch = data[i], sch = data[i+1];
+
+		/* Drain one more character if needed */
+		if ( (sch == '\r' || sch == '\n') && sch != fch )
+			i += 1;
+	}
+
+	evbuffer_drain(buffer, i + 1);
+
+	return (line);
+}
+
+/* Adds data to an event buffer */
+
+static void
+evbuffer_align(struct evbuffer *buf)
+{
+	memmove(buf->orig_buffer, buf->buffer, buf->off);
+	buf->buffer = buf->orig_buffer;
+	buf->misalign = 0;
+}
+
+/* Expands the available space in the event buffer to at least datlen */
+
+int
+evbuffer_expand(struct evbuffer *buf, size_t datlen)
+{
+	size_t need = buf->misalign + buf->off + datlen;
+
+	/* If we can fit all the data, then we don't have to do anything */
+	if (buf->totallen >= need)
+		return (0);
+
+	/*
+	 * If the misalignment fulfills our data needs, we just force an
+	 * alignment to happen.  Afterwards, we have enough space.
+	 */
+	if (buf->misalign >= datlen) {
+		evbuffer_align(buf);
+	} else {
+		void *newbuf;
+		size_t length = buf->totallen;
+
+		if (length < 256)
+			length = 256;
+		while (length < need)
+			length <<= 1;
+
+		if (buf->orig_buffer != buf->buffer)
+			evbuffer_align(buf);
+		if ((newbuf = realloc(buf->buffer, length)) == NULL)
+			return (-1);
+
+		buf->orig_buffer = buf->buffer = newbuf;
+		buf->totallen = length;
+	}
+
+	return (0);
+}
+
+int
+evbuffer_add(struct evbuffer *buf, const void *data, size_t datlen)
+{
+	size_t need = buf->misalign + buf->off + datlen;
+	size_t oldoff = buf->off;
+
+	if (buf->totallen < need) {
+		if (evbuffer_expand(buf, datlen) == -1)
+			return (-1);
+	}
+
+	memcpy(buf->buffer + buf->off, data, datlen);
+	buf->off += datlen;
+
+	if (datlen && buf->cb != NULL)
+		(*buf->cb)(buf, oldoff, buf->off, buf->cbarg);
+
+	return (0);
+}
+
+void
+evbuffer_drain(struct evbuffer *buf, size_t len)
+{
+	size_t oldoff = buf->off;
+
+	if (len >= buf->off) {
+		buf->off = 0;
+		buf->buffer = buf->orig_buffer;
+		buf->misalign = 0;
+		goto done;
+	}
+
+	buf->buffer += len;
+	buf->misalign += len;
+
+	buf->off -= len;
+
+ done:
+	/* Tell someone about changes in this buffer */
+	if (buf->off != oldoff && buf->cb != NULL)
+		(*buf->cb)(buf, oldoff, buf->off, buf->cbarg);
+
+}
+
+/*
+ * Reads data from a file descriptor into a buffer.
+ */
+
+#define EVBUFFER_MAX_READ	4096
+
+int
+evbuffer_read(struct evbuffer *buf, int fd, int howmuch)
+{
+	u_char *p;
+	size_t oldoff = buf->off;
+	int n = EVBUFFER_MAX_READ;
+
+#if defined(FIONREAD)
+#ifdef WIN32
+	long lng = n;
+	if (ioctlsocket(fd, FIONREAD, &lng) == -1 || (n=lng) == 0) {
+#else
+	if (ioctl(fd, FIONREAD, &n) == -1 || n == 0) {
+#endif
+		n = EVBUFFER_MAX_READ;
+	} else if (n > EVBUFFER_MAX_READ && n > howmuch) {
+		/*
+		 * It's possible that a lot of data is available for
+		 * reading.  We do not want to exhaust resources
+		 * before the reader has a chance to do something
+		 * about it.  If the reader does not tell us how much
+		 * data we should read, we artifically limit it.
+		 */
+		if ((size_t)n > buf->totallen << 2)
+			n = buf->totallen << 2;
+		if (n < EVBUFFER_MAX_READ)
+			n = EVBUFFER_MAX_READ;
+	}
+#endif	
+	if (howmuch < 0 || howmuch > n)
+		howmuch = n;
+
+	/* If we don't have FIONREAD, we might waste some space here */
+	if (evbuffer_expand(buf, howmuch) == -1)
+		return (-1);
+
+	/* We can append new data at this point */
+	p = buf->buffer + buf->off;
+
+#ifndef WIN32
+	n = read(fd, p, howmuch);
+#else
+	n = recv(fd, p, howmuch, 0);
+#endif
+	if (n == -1)
+		return (-1);
+	if (n == 0)
+		return (0);
+
+	buf->off += n;
+
+	/* Tell someone about changes in this buffer */
+	if (buf->off != oldoff && buf->cb != NULL)
+		(*buf->cb)(buf, oldoff, buf->off, buf->cbarg);
+
+	return (n);
+}
+
+int
+evbuffer_write(struct evbuffer *buffer, int fd)
+{
+	int n;
+
+#ifndef WIN32
+	n = write(fd, buffer->buffer, buffer->off);
+#else
+	n = send(fd, buffer->buffer, buffer->off, 0);
+#endif
+	if (n == -1)
+		return (-1);
+	if (n == 0)
+		return (0);
+	evbuffer_drain(buffer, n);
+
+	return (n);
+}
+
+u_char *
+evbuffer_find(struct evbuffer *buffer, const u_char *what, size_t len)
+{
+	u_char *search = buffer->buffer, *end = search + buffer->off;
+	u_char *p;
+
+	while (search < end &&
+	    (p = memchr(search, *what, end - search)) != NULL) {
+		if (p + len > end)
+			break;
+		if (memcmp(p, what, len) == 0)
+			return (p);
+		search = p + 1;
+	}
+
+	return (NULL);
+}
+
+void evbuffer_setcb(struct evbuffer *buffer,
+    void (*cb)(struct evbuffer *, size_t, size_t, void *),
+    void *cbarg)
+{
+	buffer->cb = cb;
+	buffer->cbarg = cbarg;
+}
diff --git a/libevent/cmake_install.cmake b/libevent/cmake_install.cmake
new file mode 100644
index 00000000000..341d9b9d7b9
--- /dev/null
+++ b/libevent/cmake_install.cmake
@@ -0,0 +1,34 @@
+# Install script for directory: /my/maria-10.0-merge/libevent
+
+# Set the install prefix
+IF(NOT DEFINED CMAKE_INSTALL_PREFIX)
+  SET(CMAKE_INSTALL_PREFIX "/usr/local/mysql")
+ENDIF(NOT DEFINED CMAKE_INSTALL_PREFIX)
+STRING(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
+
+# Set the install configuration name.
+IF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
+  IF(BUILD_TYPE)
+    STRING(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
+           CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
+  ELSE(BUILD_TYPE)
+    SET(CMAKE_INSTALL_CONFIG_NAME "Debug")
+  ENDIF(BUILD_TYPE)
+  MESSAGE(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
+ENDIF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
+
+# Set the component getting installed.
+IF(NOT CMAKE_INSTALL_COMPONENT)
+  IF(COMPONENT)
+    MESSAGE(STATUS "Install component: \"${COMPONENT}\"")
+    SET(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
+  ELSE(COMPONENT)
+    SET(CMAKE_INSTALL_COMPONENT)
+  ENDIF(COMPONENT)
+ENDIF(NOT CMAKE_INSTALL_COMPONENT)
+
+# Install shared libraries without execute permission?
+IF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
+  SET(CMAKE_INSTALL_SO_NO_EXE "0")
+ENDIF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
+
diff --git a/libevent/compat/sys/_time.h b/libevent/compat/sys/_time.h
new file mode 100644
index 00000000000..8cabb0d55e7
--- /dev/null
+++ b/libevent/compat/sys/_time.h
@@ -0,0 +1,163 @@
+/*	$OpenBSD: time.h,v 1.11 2000/10/10 13:36:48 itojun Exp $	*/
+/*	$NetBSD: time.h,v 1.18 1996/04/23 10:29:33 mycroft Exp $	*/
+
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)time.h	8.2 (Berkeley) 7/10/94
+ */
+
+#ifndef _SYS_TIME_H_
+#define _SYS_TIME_H_
+
+#include <sys/types.h>
+
+/*
+ * Structure returned by gettimeofday(2) system call,
+ * and used in other calls.
+ */
+struct timeval {
+	long	tv_sec;		/* seconds */
+	long	tv_usec;	/* and microseconds */
+};
+
+/*
+ * Structure defined by POSIX.1b to be like a timeval.
+ */
+struct timespec {
+	time_t	tv_sec;		/* seconds */
+	long	tv_nsec;	/* and nanoseconds */
+};
+
+#define	TIMEVAL_TO_TIMESPEC(tv, ts) {					\
+	(ts)->tv_sec = (tv)->tv_sec;					\
+	(ts)->tv_nsec = (tv)->tv_usec * 1000;				\
+}
+#define	TIMESPEC_TO_TIMEVAL(tv, ts) {					\
+	(tv)->tv_sec = (ts)->tv_sec;					\
+	(tv)->tv_usec = (ts)->tv_nsec / 1000;				\
+}
+
+struct timezone {
+	int	tz_minuteswest;	/* minutes west of Greenwich */
+	int	tz_dsttime;	/* type of dst correction */
+};
+#define	DST_NONE	0	/* not on dst */
+#define	DST_USA		1	/* USA style dst */
+#define	DST_AUST	2	/* Australian style dst */
+#define	DST_WET		3	/* Western European dst */
+#define	DST_MET		4	/* Middle European dst */
+#define	DST_EET		5	/* Eastern European dst */
+#define	DST_CAN		6	/* Canada */
+
+/* Operations on timevals. */
+#define	timerclear(tvp)		(tvp)->tv_sec = (tvp)->tv_usec = 0
+#define	timerisset(tvp)		((tvp)->tv_sec || (tvp)->tv_usec)
+#define	timercmp(tvp, uvp, cmp)						\
+	(((tvp)->tv_sec == (uvp)->tv_sec) ?				\
+	    ((tvp)->tv_usec cmp (uvp)->tv_usec) :			\
+	    ((tvp)->tv_sec cmp (uvp)->tv_sec))
+#define	timeradd(tvp, uvp, vvp)						\
+	do {								\
+		(vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec;		\
+		(vvp)->tv_usec = (tvp)->tv_usec + (uvp)->tv_usec;	\
+		if ((vvp)->tv_usec >= 1000000) {			\
+			(vvp)->tv_sec++;				\
+			(vvp)->tv_usec -= 1000000;			\
+		}							\
+	} while (0)
+#define	timersub(tvp, uvp, vvp)						\
+	do {								\
+		(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;		\
+		(vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;	\
+		if ((vvp)->tv_usec < 0) {				\
+			(vvp)->tv_sec--;				\
+			(vvp)->tv_usec += 1000000;			\
+		}							\
+	} while (0)
+
+/* Operations on timespecs. */
+#define	timespecclear(tsp)		(tsp)->tv_sec = (tsp)->tv_nsec = 0
+#define	timespecisset(tsp)		((tsp)->tv_sec || (tsp)->tv_nsec)
+#define	timespeccmp(tsp, usp, cmp)					\
+	(((tsp)->tv_sec == (usp)->tv_sec) ?				\
+	    ((tsp)->tv_nsec cmp (usp)->tv_nsec) :			\
+	    ((tsp)->tv_sec cmp (usp)->tv_sec))
+#define	timespecadd(tsp, usp, vsp)					\
+	do {								\
+		(vsp)->tv_sec = (tsp)->tv_sec + (usp)->tv_sec;		\
+		(vsp)->tv_nsec = (tsp)->tv_nsec + (usp)->tv_nsec;	\
+		if ((vsp)->tv_nsec >= 1000000000L) {			\
+			(vsp)->tv_sec++;				\
+			(vsp)->tv_nsec -= 1000000000L;			\
+		}							\
+	} while (0)
+#define	timespecsub(tsp, usp, vsp)					\
+	do {								\
+		(vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec;		\
+		(vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec;	\
+		if ((vsp)->tv_nsec < 0) {				\
+			(vsp)->tv_sec--;				\
+			(vsp)->tv_nsec += 1000000000L;			\
+		}							\
+	} while (0)
+
+/*
+ * Names of the interval timers, and structure
+ * defining a timer setting.
+ */
+#define	ITIMER_REAL	0
+#define	ITIMER_VIRTUAL	1
+#define	ITIMER_PROF	2
+
+struct	itimerval {
+	struct	timeval it_interval;	/* timer interval */
+	struct	timeval it_value;	/* current value */
+};
+
+/*
+ * Getkerninfo clock information structure
+ */
+struct clockinfo {
+	int	hz;		/* clock frequency */
+	int	tick;		/* micro-seconds per hz tick */
+	int	tickadj;	/* clock skew rate for adjtime() */
+	int	stathz;		/* statistics clock frequency */
+	int	profhz;		/* profiling clock frequency */
+};
+
+#define CLOCK_REALTIME	0
+#define CLOCK_VIRTUAL	1
+#define CLOCK_PROF	2
+
+#define TIMER_RELTIME	0x0	/* relative timer */
+#define TIMER_ABSTIME	0x1	/* absolute timer */
+
+/* --- stuff got cut here - niels --- */
+
+#endif /* !_SYS_TIME_H_ */
diff --git a/libevent/compat/sys/queue.h b/libevent/compat/sys/queue.h
new file mode 100644
index 00000000000..c0956ddce43
--- /dev/null
+++ b/libevent/compat/sys/queue.h
@@ -0,0 +1,488 @@
+/*	$OpenBSD: queue.h,v 1.16 2000/09/07 19:47:59 art Exp $	*/
+/*	$NetBSD: queue.h,v 1.11 1996/05/16 05:17:14 mycroft Exp $	*/
+
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)queue.h	8.5 (Berkeley) 8/20/94
+ */
+
+#ifndef	_SYS_QUEUE_H_
+#define	_SYS_QUEUE_H_
+
+/*
+ * This file defines five types of data structures: singly-linked lists, 
+ * lists, simple queues, tail queues, and circular queues.
+ *
+ *
+ * A singly-linked list is headed by a single forward pointer. The elements
+ * are singly linked for minimum space and pointer manipulation overhead at
+ * the expense of O(n) removal for arbitrary elements. New elements can be
+ * added to the list after an existing element or at the head of the list.
+ * Elements being removed from the head of the list should use the explicit
+ * macro for this purpose for optimum efficiency. A singly-linked list may
+ * only be traversed in the forward direction.  Singly-linked lists are ideal
+ * for applications with large datasets and few or no removals or for
+ * implementing a LIFO queue.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before
+ * or after an existing element or at the head of the list. A list
+ * may only be traversed in the forward direction.
+ *
+ * A simple queue is headed by a pair of pointers, one the head of the
+ * list and the other to the tail of the list. The elements are singly
+ * linked to save space, so elements can only be removed from the
+ * head of the list. New elements can be added to the list before or after
+ * an existing element, at the head of the list, or at the end of the
+ * list. A simple queue may only be traversed in the forward direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may be traversed in either direction.
+ *
+ * A circle queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or after
+ * an existing element, at the head of the list, or at the end of the list.
+ * A circle queue may be traversed in either direction, but has a more
+ * complex end of list detection.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ */
+
+/*
+ * Singly-linked List definitions.
+ */
+#define SLIST_HEAD(name, type)						\
+struct name {								\
+	struct type *slh_first;	/* first element */			\
+}
+ 
+#define	SLIST_HEAD_INITIALIZER(head)					\
+	{ NULL }
+
+#ifndef WIN32
+#define SLIST_ENTRY(type)						\
+struct {								\
+	struct type *sle_next;	/* next element */			\
+}
+#endif
+
+/*
+ * Singly-linked List access methods.
+ */
+#define	SLIST_FIRST(head)	((head)->slh_first)
+#define	SLIST_END(head)		NULL
+#define	SLIST_EMPTY(head)	(SLIST_FIRST(head) == SLIST_END(head))
+#define	SLIST_NEXT(elm, field)	((elm)->field.sle_next)
+
+#define	SLIST_FOREACH(var, head, field)					\
+	for((var) = SLIST_FIRST(head);					\
+	    (var) != SLIST_END(head);					\
+	    (var) = SLIST_NEXT(var, field))
+
+/*
+ * Singly-linked List functions.
+ */
+#define	SLIST_INIT(head) {						\
+	SLIST_FIRST(head) = SLIST_END(head);				\
+}
+
+#define	SLIST_INSERT_AFTER(slistelm, elm, field) do {			\
+	(elm)->field.sle_next = (slistelm)->field.sle_next;		\
+	(slistelm)->field.sle_next = (elm);				\
+} while (0)
+
+#define	SLIST_INSERT_HEAD(head, elm, field) do {			\
+	(elm)->field.sle_next = (head)->slh_first;			\
+	(head)->slh_first = (elm);					\
+} while (0)
+
+#define	SLIST_REMOVE_HEAD(head, field) do {				\
+	(head)->slh_first = (head)->slh_first->field.sle_next;		\
+} while (0)
+
+/*
+ * List definitions.
+ */
+#define LIST_HEAD(name, type)						\
+struct name {								\
+	struct type *lh_first;	/* first element */			\
+}
+
+#define LIST_HEAD_INITIALIZER(head)					\
+	{ NULL }
+
+#define LIST_ENTRY(type)						\
+struct {								\
+	struct type *le_next;	/* next element */			\
+	struct type **le_prev;	/* address of previous next element */	\
+}
+
+/*
+ * List access methods
+ */
+#define	LIST_FIRST(head)		((head)->lh_first)
+#define	LIST_END(head)			NULL
+#define	LIST_EMPTY(head)		(LIST_FIRST(head) == LIST_END(head))
+#define	LIST_NEXT(elm, field)		((elm)->field.le_next)
+
+#define LIST_FOREACH(var, head, field)					\
+	for((var) = LIST_FIRST(head);					\
+	    (var)!= LIST_END(head);					\
+	    (var) = LIST_NEXT(var, field))
+
+/*
+ * List functions.
+ */
+#define	LIST_INIT(head) do {						\
+	LIST_FIRST(head) = LIST_END(head);				\
+} while (0)
+
+#define LIST_INSERT_AFTER(listelm, elm, field) do {			\
+	if (((elm)->field.le_next = (listelm)->field.le_next) != NULL)	\
+		(listelm)->field.le_next->field.le_prev =		\
+		    &(elm)->field.le_next;				\
+	(listelm)->field.le_next = (elm);				\
+	(elm)->field.le_prev = &(listelm)->field.le_next;		\
+} while (0)
+
+#define	LIST_INSERT_BEFORE(listelm, elm, field) do {			\
+	(elm)->field.le_prev = (listelm)->field.le_prev;		\
+	(elm)->field.le_next = (listelm);				\
+	*(listelm)->field.le_prev = (elm);				\
+	(listelm)->field.le_prev = &(elm)->field.le_next;		\
+} while (0)
+
+#define LIST_INSERT_HEAD(head, elm, field) do {				\
+	if (((elm)->field.le_next = (head)->lh_first) != NULL)		\
+		(head)->lh_first->field.le_prev = &(elm)->field.le_next;\
+	(head)->lh_first = (elm);					\
+	(elm)->field.le_prev = &(head)->lh_first;			\
+} while (0)
+
+#define LIST_REMOVE(elm, field) do {					\
+	if ((elm)->field.le_next != NULL)				\
+		(elm)->field.le_next->field.le_prev =			\
+		    (elm)->field.le_prev;				\
+	*(elm)->field.le_prev = (elm)->field.le_next;			\
+} while (0)
+
+#define LIST_REPLACE(elm, elm2, field) do {				\
+	if (((elm2)->field.le_next = (elm)->field.le_next) != NULL)	\
+		(elm2)->field.le_next->field.le_prev =			\
+		    &(elm2)->field.le_next;				\
+	(elm2)->field.le_prev = (elm)->field.le_prev;			\
+	*(elm2)->field.le_prev = (elm2);				\
+} while (0)
+
+/*
+ * Simple queue definitions.
+ */
+#define SIMPLEQ_HEAD(name, type)					\
+struct name {								\
+	struct type *sqh_first;	/* first element */			\
+	struct type **sqh_last;	/* addr of last next element */		\
+}
+
+#define SIMPLEQ_HEAD_INITIALIZER(head)					\
+	{ NULL, &(head).sqh_first }
+
+#define SIMPLEQ_ENTRY(type)						\
+struct {								\
+	struct type *sqe_next;	/* next element */			\
+}
+
+/*
+ * Simple queue access methods.
+ */
+#define	SIMPLEQ_FIRST(head)	    ((head)->sqh_first)
+#define	SIMPLEQ_END(head)	    NULL
+#define	SIMPLEQ_EMPTY(head)	    (SIMPLEQ_FIRST(head) == SIMPLEQ_END(head))
+#define	SIMPLEQ_NEXT(elm, field)    ((elm)->field.sqe_next)
+
+#define SIMPLEQ_FOREACH(var, head, field)				\
+	for((var) = SIMPLEQ_FIRST(head);				\
+	    (var) != SIMPLEQ_END(head);					\
+	    (var) = SIMPLEQ_NEXT(var, field))
+
+/*
+ * Simple queue functions.
+ */
+#define	SIMPLEQ_INIT(head) do {						\
+	(head)->sqh_first = NULL;					\
+	(head)->sqh_last = &(head)->sqh_first;				\
+} while (0)
+
+#define SIMPLEQ_INSERT_HEAD(head, elm, field) do {			\
+	if (((elm)->field.sqe_next = (head)->sqh_first) == NULL)	\
+		(head)->sqh_last = &(elm)->field.sqe_next;		\
+	(head)->sqh_first = (elm);					\
+} while (0)
+
+#define SIMPLEQ_INSERT_TAIL(head, elm, field) do {			\
+	(elm)->field.sqe_next = NULL;					\
+	*(head)->sqh_last = (elm);					\
+	(head)->sqh_last = &(elm)->field.sqe_next;			\
+} while (0)
+
+#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do {		\
+	if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL)\
+		(head)->sqh_last = &(elm)->field.sqe_next;		\
+	(listelm)->field.sqe_next = (elm);				\
+} while (0)
+
+#define SIMPLEQ_REMOVE_HEAD(head, elm, field) do {			\
+	if (((head)->sqh_first = (elm)->field.sqe_next) == NULL)	\
+		(head)->sqh_last = &(head)->sqh_first;			\
+} while (0)
+
+/*
+ * Tail queue definitions.
+ */
+#define TAILQ_HEAD(name, type)						\
+struct name {								\
+	struct type *tqh_first;	/* first element */			\
+	struct type **tqh_last;	/* addr of last next element */		\
+}
+
+#define TAILQ_HEAD_INITIALIZER(head)					\
+	{ NULL, &(head).tqh_first }
+
+#define TAILQ_ENTRY(type)						\
+struct {								\
+	struct type *tqe_next;	/* next element */			\
+	struct type **tqe_prev;	/* address of previous next element */	\
+}
+
+/* 
+ * tail queue access methods 
+ */
+#define	TAILQ_FIRST(head)		((head)->tqh_first)
+#define	TAILQ_END(head)			NULL
+#define	TAILQ_NEXT(elm, field)		((elm)->field.tqe_next)
+#define TAILQ_LAST(head, headname)					\
+	(*(((struct headname *)((head)->tqh_last))->tqh_last))
+/* XXX */
+#define TAILQ_PREV(elm, headname, field)				\
+	(*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+#define	TAILQ_EMPTY(head)						\
+	(TAILQ_FIRST(head) == TAILQ_END(head))
+
+#define TAILQ_FOREACH(var, head, field)					\
+	for((var) = TAILQ_FIRST(head);					\
+	    (var) != TAILQ_END(head);					\
+	    (var) = TAILQ_NEXT(var, field))
+
+#define TAILQ_FOREACH_REVERSE(var, head, field, headname)		\
+	for((var) = TAILQ_LAST(head, headname);				\
+	    (var) != TAILQ_END(head);					\
+	    (var) = TAILQ_PREV(var, headname, field))
+
+/*
+ * Tail queue functions.
+ */
+#define	TAILQ_INIT(head) do {						\
+	(head)->tqh_first = NULL;					\
+	(head)->tqh_last = &(head)->tqh_first;				\
+} while (0)
+
+#define TAILQ_INSERT_HEAD(head, elm, field) do {			\
+	if (((elm)->field.tqe_next = (head)->tqh_first) != NULL)	\
+		(head)->tqh_first->field.tqe_prev =			\
+		    &(elm)->field.tqe_next;				\
+	else								\
+		(head)->tqh_last = &(elm)->field.tqe_next;		\
+	(head)->tqh_first = (elm);					\
+	(elm)->field.tqe_prev = &(head)->tqh_first;			\
+} while (0)
+
+#define TAILQ_INSERT_TAIL(head, elm, field) do {			\
+	(elm)->field.tqe_next = NULL;					\
+	(elm)->field.tqe_prev = (head)->tqh_last;			\
+	*(head)->tqh_last = (elm);					\
+	(head)->tqh_last = &(elm)->field.tqe_next;			\
+} while (0)
+
+#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do {		\
+	if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\
+		(elm)->field.tqe_next->field.tqe_prev =			\
+		    &(elm)->field.tqe_next;				\
+	else								\
+		(head)->tqh_last = &(elm)->field.tqe_next;		\
+	(listelm)->field.tqe_next = (elm);				\
+	(elm)->field.tqe_prev = &(listelm)->field.tqe_next;		\
+} while (0)
+
+#define	TAILQ_INSERT_BEFORE(listelm, elm, field) do {			\
+	(elm)->field.tqe_prev = (listelm)->field.tqe_prev;		\
+	(elm)->field.tqe_next = (listelm);				\
+	*(listelm)->field.tqe_prev = (elm);				\
+	(listelm)->field.tqe_prev = &(elm)->field.tqe_next;		\
+} while (0)
+
+#define TAILQ_REMOVE(head, elm, field) do {				\
+	if (((elm)->field.tqe_next) != NULL)				\
+		(elm)->field.tqe_next->field.tqe_prev =			\
+		    (elm)->field.tqe_prev;				\
+	else								\
+		(head)->tqh_last = (elm)->field.tqe_prev;		\
+	*(elm)->field.tqe_prev = (elm)->field.tqe_next;			\
+} while (0)
+
+#define TAILQ_REPLACE(head, elm, elm2, field) do {			\
+	if (((elm2)->field.tqe_next = (elm)->field.tqe_next) != NULL)	\
+		(elm2)->field.tqe_next->field.tqe_prev =		\
+		    &(elm2)->field.tqe_next;				\
+	else								\
+		(head)->tqh_last = &(elm2)->field.tqe_next;		\
+	(elm2)->field.tqe_prev = (elm)->field.tqe_prev;			\
+	*(elm2)->field.tqe_prev = (elm2);				\
+} while (0)
+
+/*
+ * Circular queue definitions.
+ */
+#define CIRCLEQ_HEAD(name, type)					\
+struct name {								\
+	struct type *cqh_first;		/* first element */		\
+	struct type *cqh_last;		/* last element */		\
+}
+
+#define CIRCLEQ_HEAD_INITIALIZER(head)					\
+	{ CIRCLEQ_END(&head), CIRCLEQ_END(&head) }
+
+#define CIRCLEQ_ENTRY(type)						\
+struct {								\
+	struct type *cqe_next;		/* next element */		\
+	struct type *cqe_prev;		/* previous element */		\
+}
+
+/*
+ * Circular queue access methods 
+ */
+#define	CIRCLEQ_FIRST(head)		((head)->cqh_first)
+#define	CIRCLEQ_LAST(head)		((head)->cqh_last)
+#define	CIRCLEQ_END(head)		((void *)(head))
+#define	CIRCLEQ_NEXT(elm, field)	((elm)->field.cqe_next)
+#define	CIRCLEQ_PREV(elm, field)	((elm)->field.cqe_prev)
+#define	CIRCLEQ_EMPTY(head)						\
+	(CIRCLEQ_FIRST(head) == CIRCLEQ_END(head))
+
+#define CIRCLEQ_FOREACH(var, head, field)				\
+	for((var) = CIRCLEQ_FIRST(head);				\
+	    (var) != CIRCLEQ_END(head);					\
+	    (var) = CIRCLEQ_NEXT(var, field))
+
+#define CIRCLEQ_FOREACH_REVERSE(var, head, field)			\
+	for((var) = CIRCLEQ_LAST(head);					\
+	    (var) != CIRCLEQ_END(head);					\
+	    (var) = CIRCLEQ_PREV(var, field))
+
+/*
+ * Circular queue functions.
+ */
+#define	CIRCLEQ_INIT(head) do {						\
+	(head)->cqh_first = CIRCLEQ_END(head);				\
+	(head)->cqh_last = CIRCLEQ_END(head);				\
+} while (0)
+
+#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do {		\
+	(elm)->field.cqe_next = (listelm)->field.cqe_next;		\
+	(elm)->field.cqe_prev = (listelm);				\
+	if ((listelm)->field.cqe_next == CIRCLEQ_END(head))		\
+		(head)->cqh_last = (elm);				\
+	else								\
+		(listelm)->field.cqe_next->field.cqe_prev = (elm);	\
+	(listelm)->field.cqe_next = (elm);				\
+} while (0)
+
+#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do {		\
+	(elm)->field.cqe_next = (listelm);				\
+	(elm)->field.cqe_prev = (listelm)->field.cqe_prev;		\
+	if ((listelm)->field.cqe_prev == CIRCLEQ_END(head))		\
+		(head)->cqh_first = (elm);				\
+	else								\
+		(listelm)->field.cqe_prev->field.cqe_next = (elm);	\
+	(listelm)->field.cqe_prev = (elm);				\
+} while (0)
+
+#define CIRCLEQ_INSERT_HEAD(head, elm, field) do {			\
+	(elm)->field.cqe_next = (head)->cqh_first;			\
+	(elm)->field.cqe_prev = CIRCLEQ_END(head);			\
+	if ((head)->cqh_last == CIRCLEQ_END(head))			\
+		(head)->cqh_last = (elm);				\
+	else								\
+		(head)->cqh_first->field.cqe_prev = (elm);		\
+	(head)->cqh_first = (elm);					\
+} while (0)
+
+#define CIRCLEQ_INSERT_TAIL(head, elm, field) do {			\
+	(elm)->field.cqe_next = CIRCLEQ_END(head);			\
+	(elm)->field.cqe_prev = (head)->cqh_last;			\
+	if ((head)->cqh_first == CIRCLEQ_END(head))			\
+		(head)->cqh_first = (elm);				\
+	else								\
+		(head)->cqh_last->field.cqe_next = (elm);		\
+	(head)->cqh_last = (elm);					\
+} while (0)
+
+#define	CIRCLEQ_REMOVE(head, elm, field) do {				\
+	if ((elm)->field.cqe_next == CIRCLEQ_END(head))			\
+		(head)->cqh_last = (elm)->field.cqe_prev;		\
+	else								\
+		(elm)->field.cqe_next->field.cqe_prev =			\
+		    (elm)->field.cqe_prev;				\
+	if ((elm)->field.cqe_prev == CIRCLEQ_END(head))			\
+		(head)->cqh_first = (elm)->field.cqe_next;		\
+	else								\
+		(elm)->field.cqe_prev->field.cqe_next =			\
+		    (elm)->field.cqe_next;				\
+} while (0)
+
+#define CIRCLEQ_REPLACE(head, elm, elm2, field) do {			\
+	if (((elm2)->field.cqe_next = (elm)->field.cqe_next) ==		\
+	    CIRCLEQ_END(head))						\
+		(head).cqh_last = (elm2);				\
+	else								\
+		(elm2)->field.cqe_next->field.cqe_prev = (elm2);	\
+	if (((elm2)->field.cqe_prev = (elm)->field.cqe_prev) ==		\
+	    CIRCLEQ_END(head))						\
+		(head).cqh_first = (elm2);				\
+	else								\
+		(elm2)->field.cqe_prev->field.cqe_next = (elm2);	\
+} while (0)
+
+#endif	/* !_SYS_QUEUE_H_ */
diff --git a/libevent/configure.in b/libevent/configure.in
new file mode 100644
index 00000000000..bc3eca1f043
--- /dev/null
+++ b/libevent/configure.in
@@ -0,0 +1,387 @@
+dnl configure.in for libevent
+dnl Dug Song <dugsong@monkey.org>
+AC_INIT(event.c)
+
+AM_INIT_AUTOMAKE(libevent,1.4.12-stable)
+AM_CONFIG_HEADER(config.h)
+dnl AM_MAINTAINER_MODE
+
+dnl Initialize prefix.
+if test "$prefix" = "NONE"; then
+   prefix="/usr/local"
+fi
+
+dnl Checks for programs.
+AC_PROG_CC
+AC_PROG_INSTALL
+AC_PROG_LN_S
+
+AC_PROG_GCC_TRADITIONAL
+if test "$GCC" = yes ; then
+        CFLAGS="$CFLAGS -Wall"
+        # And disable the strict-aliasing optimization, since it breaks
+        # our sockaddr-handling code in strange ways.
+        CFLAGS="$CFLAGS -fno-strict-aliasing"
+fi
+
+AC_ARG_ENABLE(gcc-warnings,
+     AS_HELP_STRING(--enable-gcc-warnings, enable verbose warnings with GCC))
+
+AC_PROG_LIBTOOL
+
+dnl   Uncomment "AC_DISABLE_SHARED" to make shared librraries not get
+dnl   built by default.  You can also turn shared libs on and off from 
+dnl   the command line with --enable-shared and --disable-shared.
+dnl AC_DISABLE_SHARED
+AC_SUBST(LIBTOOL_DEPS)
+
+dnl Checks for libraries.
+AC_CHECK_LIB(socket, socket)
+AC_CHECK_LIB(resolv, inet_aton)
+AC_CHECK_LIB(rt, clock_gettime)
+AC_CHECK_LIB(nsl, inet_ntoa)
+
+dnl Checks for header files.
+AC_HEADER_STDC
+AC_CHECK_HEADERS(fcntl.h stdarg.h inttypes.h stdint.h poll.h signal.h unistd.h sys/epoll.h sys/time.h sys/queue.h sys/event.h sys/param.h sys/ioctl.h sys/select.h sys/devpoll.h port.h netinet/in6.h sys/socket.h)
+if test "x$ac_cv_header_sys_queue_h" = "xyes"; then
+	AC_MSG_CHECKING(for TAILQ_FOREACH in sys/queue.h)
+	AC_EGREP_CPP(yes,
+[
+#include <sys/queue.h>
+#ifdef TAILQ_FOREACH
+ yes
+#endif
+],	[AC_MSG_RESULT(yes)
+	 AC_DEFINE(HAVE_TAILQFOREACH, 1,
+		[Define if TAILQ_FOREACH is defined in <sys/queue.h>])],
+	AC_MSG_RESULT(no)
+	)
+fi
+
+if test "x$ac_cv_header_sys_time_h" = "xyes"; then
+	AC_MSG_CHECKING(for timeradd in sys/time.h)
+	AC_EGREP_CPP(yes,
+[
+#include <sys/time.h>
+#ifdef timeradd
+ yes
+#endif
+],	[ AC_DEFINE(HAVE_TIMERADD, 1,
+		[Define if timeradd is defined in <sys/time.h>])
+	  AC_MSG_RESULT(yes)] ,AC_MSG_RESULT(no)
+)
+fi
+
+if test "x$ac_cv_header_sys_time_h" = "xyes"; then
+	AC_MSG_CHECKING(for timercmp in sys/time.h)
+	AC_EGREP_CPP(yes,
+[
+#include <sys/time.h>
+#ifdef timercmp
+ yes
+#endif
+],	[ AC_DEFINE(HAVE_TIMERCMP, 1,
+		[Define if timercmp is defined in <sys/time.h>])
+	  AC_MSG_RESULT(yes)] ,AC_MSG_RESULT(no)
+)
+fi
+
+if test "x$ac_cv_header_sys_time_h" = "xyes"; then
+	AC_MSG_CHECKING(for timerclear in sys/time.h)
+	AC_EGREP_CPP(yes,
+[
+#include <sys/time.h>
+#ifdef timerclear
+ yes
+#endif
+],	[ AC_DEFINE(HAVE_TIMERCLEAR, 1,
+		[Define if timerclear is defined in <sys/time.h>])
+	  AC_MSG_RESULT(yes)] ,AC_MSG_RESULT(no)
+)
+fi
+
+if test "x$ac_cv_header_sys_time_h" = "xyes"; then
+	AC_MSG_CHECKING(for timerisset in sys/time.h)
+	AC_EGREP_CPP(yes,
+[
+#include <sys/time.h>
+#ifdef timerisset
+ yes
+#endif
+],	[ AC_DEFINE(HAVE_TIMERISSET, 1,
+		[Define if timerisset is defined in <sys/time.h>])
+	  AC_MSG_RESULT(yes)] ,AC_MSG_RESULT(no)
+)
+fi
+
+dnl - check if the macro WIN32 is defined on this compiler.
+dnl - (this is how we check for a windows version of GCC)
+AC_MSG_CHECKING(for WIN32)
+AC_TRY_COMPILE(,
+	[
+#ifndef WIN32
+die horribly
+#endif
+	],
+	bwin32=true; AC_MSG_RESULT(yes),
+	bwin32=false; AC_MSG_RESULT(no),
+)
+
+AM_CONDITIONAL(BUILD_WIN32, test x$bwin32 = xtrue)
+
+dnl Checks for typedefs, structures, and compiler characteristics.
+AC_C_CONST
+AC_C_INLINE
+AC_HEADER_TIME
+
+dnl Checks for library functions.
+AC_CHECK_FUNCS(gettimeofday vasprintf fcntl clock_gettime strtok_r strsep getaddrinfo getnameinfo strlcpy inet_ntop signal sigaction strtoll)
+
+AC_CHECK_SIZEOF(long)
+
+if test "x$ac_cv_func_clock_gettime" = "xyes"; then
+   AC_DEFINE(DNS_USE_CPU_CLOCK_FOR_ID, 1, [Define if clock_gettime is available in libc])
+else
+   AC_DEFINE(DNS_USE_GETTIMEOFDAY_FOR_ID, 1, [Define is no secure id variant is available])
+fi
+
+AC_MSG_CHECKING(for F_SETFD in fcntl.h)
+AC_EGREP_CPP(yes,
+[
+#define _GNU_SOURCE
+#include <fcntl.h>
+#ifdef F_SETFD
+yes
+#endif
+],	[ AC_DEFINE(HAVE_SETFD, 1,
+	      [Define if F_SETFD is defined in <fcntl.h>])
+	  AC_MSG_RESULT(yes) ], AC_MSG_RESULT(no))
+
+needsignal=no
+haveselect=no
+AC_CHECK_FUNCS(select, [haveselect=yes], )
+if test "x$haveselect" = "xyes" ; then
+	AC_LIBOBJ(select)
+	needsignal=yes
+fi
+
+havepoll=no
+AC_CHECK_FUNCS(poll, [havepoll=yes], )
+if test "x$havepoll" = "xyes" ; then
+	AC_LIBOBJ(poll)
+	needsignal=yes
+fi
+
+haveepoll=no
+AC_CHECK_FUNCS(epoll_ctl, [haveepoll=yes], )
+if test "x$haveepoll" = "xyes" ; then
+	AC_DEFINE(HAVE_EPOLL, 1,
+		[Define if your system supports the epoll system calls])
+	AC_LIBOBJ(epoll)
+	needsignal=yes
+fi
+
+havedevpoll=no
+if test "x$ac_cv_header_sys_devpoll_h" = "xyes"; then
+	AC_DEFINE(HAVE_DEVPOLL, 1,
+		    [Define if /dev/poll is available])
+        AC_LIBOBJ(devpoll)
+fi
+
+havekqueue=no
+if test "x$ac_cv_header_sys_event_h" = "xyes"; then
+	AC_CHECK_FUNCS(kqueue, [havekqueue=yes], )
+	if test "x$havekqueue" = "xyes" ; then
+		AC_MSG_CHECKING(for working kqueue)
+		AC_TRY_RUN(
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/event.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+int
+main(int argc, char **argv)
+{
+	int kq;
+	int n;
+	int fd[[2]];
+	struct kevent ev;
+	struct timespec ts;
+	char buf[[8000]];
+
+	if (pipe(fd) == -1)
+		exit(1);
+	if (fcntl(fd[[1]], F_SETFL, O_NONBLOCK) == -1)
+		exit(1);
+
+	while ((n = write(fd[[1]], buf, sizeof(buf))) == sizeof(buf))
+		;
+
+        if ((kq = kqueue()) == -1)
+		exit(1);
+
+	ev.ident = fd[[1]];
+	ev.filter = EVFILT_WRITE;
+	ev.flags = EV_ADD | EV_ENABLE;
+	n = kevent(kq, &ev, 1, NULL, 0, NULL);
+	if (n == -1)
+		exit(1);
+	
+	read(fd[[0]], buf, sizeof(buf));
+
+	ts.tv_sec = 0;
+	ts.tv_nsec = 0;
+	n = kevent(kq, NULL, 0, &ev, 1, &ts);
+	if (n == -1 || n == 0)
+		exit(1);
+
+	exit(0);
+}, [AC_MSG_RESULT(yes)
+    AC_DEFINE(HAVE_WORKING_KQUEUE, 1,
+		[Define if kqueue works correctly with pipes])
+    AC_LIBOBJ(kqueue)], AC_MSG_RESULT(no), AC_MSG_RESULT(no))
+	fi
+fi
+
+haveepollsyscall=no
+if test "x$ac_cv_header_sys_epoll_h" = "xyes"; then
+	if test "x$haveepoll" = "xno" ; then
+		AC_MSG_CHECKING(for epoll system call)
+		AC_TRY_RUN(
+#include <stdint.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+
+int
+epoll_create(int size)
+{
+	return (syscall(__NR_epoll_create, size));
+}
+
+int
+main(int argc, char **argv)
+{
+	int epfd;
+
+	epfd = epoll_create(256);
+	exit (epfd == -1 ? 1 : 0);
+}, [AC_MSG_RESULT(yes)
+    AC_DEFINE(HAVE_EPOLL, 1,
+	[Define if your system supports the epoll system calls])
+    needsignal=yes
+    AC_LIBOBJ(epoll_sub)
+    AC_LIBOBJ(epoll)], AC_MSG_RESULT(no), AC_MSG_RESULT(no))
+	fi
+fi
+
+haveeventports=no
+AC_CHECK_FUNCS(port_create, [haveeventports=yes], )
+if test "x$haveeventports" = "xyes" ; then
+	AC_DEFINE(HAVE_EVENT_PORTS, 1,
+		[Define if your system supports event ports])
+	AC_LIBOBJ(evport)
+	needsignal=yes
+fi
+if test "x$bwin32" = "xtrue"; then
+	needsignal=yes
+fi
+if test "x$bwin32" = "xtrue"; then
+	needsignal=yes
+fi
+if test "x$needsignal" = "xyes" ; then
+	AC_LIBOBJ(signal)
+fi
+
+AC_TYPE_PID_T
+AC_TYPE_SIZE_T
+AC_CHECK_TYPES([uint64_t, uint32_t, uint16_t, uint8_t], , ,
+[#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#elif defined(HAVE_INTTYPES_H)
+#include <inttypes.h>
+#endif
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif])
+AC_CHECK_SIZEOF(long long)
+AC_CHECK_SIZEOF(int)
+AC_CHECK_SIZEOF(short)
+AC_CHECK_TYPES([struct in6_addr], , ,
+[#ifdef WIN32
+#include <winsock2.h>
+#else
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#endif
+#ifdef HAVE_NETINET_IN6_H
+#include <netinet/in6.h>
+#endif])
+
+AC_MSG_CHECKING([for socklen_t])
+AC_TRY_COMPILE([
+ #include <sys/types.h>
+ #include <sys/socket.h>],
+  [socklen_t x;],
+  AC_MSG_RESULT([yes]),
+  [AC_MSG_RESULT([no])
+  AC_DEFINE(socklen_t, unsigned int,
+	[Define to unsigned int if you dont have it])]
+)
+
+AC_MSG_CHECKING([whether our compiler supports __func__])
+AC_TRY_COMPILE([],
+ [ const char *cp = __func__; ],
+ AC_MSG_RESULT([yes]),
+ AC_MSG_RESULT([no])
+ AC_MSG_CHECKING([whether our compiler supports __FUNCTION__])
+ AC_TRY_COMPILE([],
+   [ const char *cp = __FUNCTION__; ],
+   AC_MSG_RESULT([yes])
+   AC_DEFINE(__func__, __FUNCTION__,
+         [Define to appropriate substitue if compiler doesnt have __func__]),
+   AC_MSG_RESULT([no])
+   AC_DEFINE(__func__, __FILE__,
+         [Define to appropriate substitue if compiler doesnt have __func__])))
+
+
+# Add some more warnings which we use in development but not in the
+# released versions.  (Some relevant gcc versions can't handle these.)
+if test x$enable_gcc_warnings = xyes; then
+
+  AC_COMPILE_IFELSE(AC_LANG_PROGRAM([], [
+#if !defined(__GNUC__) || (__GNUC__ < 4)
+#error
+#endif]), have_gcc4=yes, have_gcc4=no)
+
+  AC_COMPILE_IFELSE(AC_LANG_PROGRAM([], [
+#if !defined(__GNUC__) || (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 2)
+#error
+#endif]), have_gcc42=yes, have_gcc42=no)
+
+  CFLAGS="$CFLAGS -W -Wfloat-equal -Wundef -Wpointer-arith -Wstrict-prototypes -Wmissing-prototypes -Wwrite-strings -Wredundant-decls -Wchar-subscripts -Wcomment -Wformat=2 -Wwrite-strings -Wmissing-declarations -Wredundant-decls -Wnested-externs -Wbad-function-cast -Wswitch-enum -Werror"
+  CFLAGS="$CFLAGS -Wno-unused-parameter -Wno-sign-compare -Wstrict-aliasing"
+
+  if test x$have_gcc4 = xyes ; then 
+    # These warnings break gcc 3.3.5 and work on gcc 4.0.2
+    CFLAGS="$CFLAGS -Winit-self -Wmissing-field-initializers -Wdeclaration-after-statement"
+    #CFLAGS="$CFLAGS -Wold-style-definition"
+  fi
+
+  if test x$have_gcc42 = xyes ; then 
+    # These warnings break gcc 4.0.2 and work on gcc 4.2
+    CFLAGS="$CFLAGS -Waddress -Wnormalized=id -Woverride-init"
+  fi
+
+##This will break the world on some 64-bit architectures
+# CFLAGS="$CFLAGS -Winline"
+
+fi
+
+AC_OUTPUT(Makefile test/Makefile sample/Makefile)
diff --git a/libevent/devpoll.c b/libevent/devpoll.c
new file mode 100644
index 00000000000..cbd27309079
--- /dev/null
+++ b/libevent/devpoll.c
@@ -0,0 +1,417 @@
+/*
+ * Copyright 2000-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/resource.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <sys/_time.h>
+#endif
+#include <sys/queue.h>
+#include <sys/devpoll.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "event.h"
+#include "event-internal.h"
+#include "evsignal.h"
+#include "log.h"
+
+/* due to limitations in the devpoll interface, we need to keep track of
+ * all file descriptors outself.
+ */
+struct evdevpoll {
+	struct event *evread;
+	struct event *evwrite;
+};
+
+struct devpollop {
+	struct evdevpoll *fds;
+	int nfds;
+	struct pollfd *events;
+	int nevents;
+	int dpfd;
+	struct pollfd *changes;
+	int nchanges;
+};
+
+static void *devpoll_init	(struct event_base *);
+static int devpoll_add	(void *, struct event *);
+static int devpoll_del	(void *, struct event *);
+static int devpoll_dispatch	(struct event_base *, void *, struct timeval *);
+static void devpoll_dealloc	(struct event_base *, void *);
+
+const struct eventop devpollops = {
+	"devpoll",
+	devpoll_init,
+	devpoll_add,
+	devpoll_del,
+	devpoll_dispatch,
+	devpoll_dealloc,
+	1 /* need reinit */
+};
+
+#define NEVENT	32000
+
+static int
+devpoll_commit(struct devpollop *devpollop)
+{
+	/*
+	 * Due to a bug in Solaris, we have to use pwrite with an offset of 0.
+	 * Write is limited to 2GB of data, until it will fail.
+	 */
+	if (pwrite(devpollop->dpfd, devpollop->changes,
+		sizeof(struct pollfd) * devpollop->nchanges, 0) == -1)
+		return(-1);
+
+	devpollop->nchanges = 0;
+	return(0);
+}
+
+static int
+devpoll_queue(struct devpollop *devpollop, int fd, int events) {
+	struct pollfd *pfd;
+
+	if (devpollop->nchanges >= devpollop->nevents) {
+		/*
+		 * Change buffer is full, must commit it to /dev/poll before 
+		 * adding more 
+		 */
+		if (devpoll_commit(devpollop) != 0)
+			return(-1);
+	}
+
+	pfd = &devpollop->changes[devpollop->nchanges++];
+	pfd->fd = fd;
+	pfd->events = events;
+	pfd->revents = 0;
+
+	return(0);
+}
+
+static void *
+devpoll_init(struct event_base *base)
+{
+	int dpfd, nfiles = NEVENT;
+	struct rlimit rl;
+	struct devpollop *devpollop;
+
+	/* Disable devpoll when this environment variable is set */
+	if (getenv("EVENT_NODEVPOLL"))
+		return (NULL);
+
+	if (!(devpollop = calloc(1, sizeof(struct devpollop))))
+		return (NULL);
+
+	if (getrlimit(RLIMIT_NOFILE, &rl) == 0 &&
+	    rl.rlim_cur != RLIM_INFINITY)
+		nfiles = rl.rlim_cur;
+
+	/* Initialize the kernel queue */
+	if ((dpfd = open("/dev/poll", O_RDWR)) == -1) {
+                event_warn("open: /dev/poll");
+		free(devpollop);
+		return (NULL);
+	}
+
+	devpollop->dpfd = dpfd;
+
+	/* Initialize fields */
+	devpollop->events = calloc(nfiles, sizeof(struct pollfd));
+	if (devpollop->events == NULL) {
+		free(devpollop);
+		close(dpfd);
+		return (NULL);
+	}
+	devpollop->nevents = nfiles;
+
+	devpollop->fds = calloc(nfiles, sizeof(struct evdevpoll));
+	if (devpollop->fds == NULL) {
+		free(devpollop->events);
+		free(devpollop);
+		close(dpfd);
+		return (NULL);
+	}
+	devpollop->nfds = nfiles;
+
+	devpollop->changes = calloc(nfiles, sizeof(struct pollfd));
+	if (devpollop->changes == NULL) {
+		free(devpollop->fds);
+		free(devpollop->events);
+		free(devpollop);
+		close(dpfd);
+		return (NULL);
+	}
+
+	evsignal_init(base);
+
+	return (devpollop);
+}
+
+static int
+devpoll_recalc(struct event_base *base, void *arg, int max)
+{
+	struct devpollop *devpollop = arg;
+
+	if (max >= devpollop->nfds) {
+		struct evdevpoll *fds;
+		int nfds;
+
+		nfds = devpollop->nfds;
+		while (nfds <= max)
+			nfds <<= 1;
+
+		fds = realloc(devpollop->fds, nfds * sizeof(struct evdevpoll));
+		if (fds == NULL) {
+			event_warn("realloc");
+			return (-1);
+		}
+		devpollop->fds = fds;
+		memset(fds + devpollop->nfds, 0,
+		    (nfds - devpollop->nfds) * sizeof(struct evdevpoll));
+		devpollop->nfds = nfds;
+	}
+
+	return (0);
+}
+
+static int
+devpoll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
+{
+	struct devpollop *devpollop = arg;
+	struct pollfd *events = devpollop->events;
+	struct dvpoll dvp;
+	struct evdevpoll *evdp;
+	int i, res, timeout = -1;
+
+	if (devpollop->nchanges)
+		devpoll_commit(devpollop);
+
+	if (tv != NULL)
+		timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
+
+	dvp.dp_fds = devpollop->events;
+	dvp.dp_nfds = devpollop->nevents;
+	dvp.dp_timeout = timeout;
+
+	res = ioctl(devpollop->dpfd, DP_POLL, &dvp);
+
+	if (res == -1) {
+		if (errno != EINTR) {
+			event_warn("ioctl: DP_POLL");
+			return (-1);
+		}
+
+		evsignal_process(base);
+		return (0);
+	} else if (base->sig.evsignal_caught) {
+		evsignal_process(base);
+	}
+
+	event_debug(("%s: devpoll_wait reports %d", __func__, res));
+
+	for (i = 0; i < res; i++) {
+		int which = 0;
+		int what = events[i].revents;
+		struct event *evread = NULL, *evwrite = NULL;
+
+		assert(events[i].fd < devpollop->nfds);
+		evdp = &devpollop->fds[events[i].fd];
+   
+                if (what & POLLHUP)
+                        what |= POLLIN | POLLOUT;
+                else if (what & POLLERR)
+                        what |= POLLIN | POLLOUT;
+
+		if (what & POLLIN) {
+			evread = evdp->evread;
+			which |= EV_READ;
+		}
+
+		if (what & POLLOUT) {
+			evwrite = evdp->evwrite;
+			which |= EV_WRITE;
+		}
+
+		if (!which)
+			continue;
+
+		if (evread != NULL && !(evread->ev_events & EV_PERSIST))
+			event_del(evread);
+		if (evwrite != NULL && evwrite != evread &&
+		    !(evwrite->ev_events & EV_PERSIST))
+			event_del(evwrite);
+
+		if (evread != NULL)
+			event_active(evread, EV_READ, 1);
+		if (evwrite != NULL)
+			event_active(evwrite, EV_WRITE, 1);
+	}
+
+	return (0);
+}
+
+
+static int
+devpoll_add(void *arg, struct event *ev)
+{
+	struct devpollop *devpollop = arg;
+	struct evdevpoll *evdp;
+	int fd, events;
+
+	if (ev->ev_events & EV_SIGNAL)
+		return (evsignal_add(ev));
+
+	fd = ev->ev_fd;
+	if (fd >= devpollop->nfds) {
+		/* Extend the file descriptor array as necessary */
+		if (devpoll_recalc(ev->ev_base, devpollop, fd) == -1)
+			return (-1);
+	}
+	evdp = &devpollop->fds[fd];
+
+	/* 
+	 * It's not necessary to OR the existing read/write events that we
+	 * are currently interested in with the new event we are adding.
+	 * The /dev/poll driver ORs any new events with the existing events
+	 * that it has cached for the fd.
+	 */
+
+	events = 0;
+	if (ev->ev_events & EV_READ) {
+		if (evdp->evread && evdp->evread != ev) {
+		   /* There is already a different read event registered */
+		   return(-1);
+		}
+		events |= POLLIN;
+	}
+
+	if (ev->ev_events & EV_WRITE) {
+		if (evdp->evwrite && evdp->evwrite != ev) {
+		   /* There is already a different write event registered */
+		   return(-1);
+		}
+		events |= POLLOUT;
+	}
+
+	if (devpoll_queue(devpollop, fd, events) != 0)
+		return(-1);
+
+	/* Update events responsible */
+	if (ev->ev_events & EV_READ)
+		evdp->evread = ev;
+	if (ev->ev_events & EV_WRITE)
+		evdp->evwrite = ev;
+
+	return (0);
+}
+
+static int
+devpoll_del(void *arg, struct event *ev)
+{
+	struct devpollop *devpollop = arg;
+	struct evdevpoll *evdp;
+	int fd, events;
+	int needwritedelete = 1, needreaddelete = 1;
+
+	if (ev->ev_events & EV_SIGNAL)
+		return (evsignal_del(ev));
+
+	fd = ev->ev_fd;
+	if (fd >= devpollop->nfds)
+		return (0);
+	evdp = &devpollop->fds[fd];
+
+	events = 0;
+	if (ev->ev_events & EV_READ)
+		events |= POLLIN;
+	if (ev->ev_events & EV_WRITE)
+		events |= POLLOUT;
+
+	/*
+	 * The only way to remove an fd from the /dev/poll monitored set is
+	 * to use POLLREMOVE by itself.  This removes ALL events for the fd 
+	 * provided so if we care about two events and are only removing one 
+	 * we must re-add the other event after POLLREMOVE.
+	 */
+
+	if (devpoll_queue(devpollop, fd, POLLREMOVE) != 0)
+		return(-1);
+
+	if ((events & (POLLIN|POLLOUT)) != (POLLIN|POLLOUT)) {
+		/*
+		 * We're not deleting all events, so we must resubmit the
+		 * event that we are still interested in if one exists.
+		 */
+
+		if ((events & POLLIN) && evdp->evwrite != NULL) {
+			/* Deleting read, still care about write */
+			devpoll_queue(devpollop, fd, POLLOUT);
+			needwritedelete = 0;
+		} else if ((events & POLLOUT) && evdp->evread != NULL) {
+			/* Deleting write, still care about read */
+			devpoll_queue(devpollop, fd, POLLIN);
+			needreaddelete = 0;
+		}
+	}
+
+	if (needreaddelete)
+		evdp->evread = NULL;
+	if (needwritedelete)
+		evdp->evwrite = NULL;
+
+	return (0);
+}
+
+static void
+devpoll_dealloc(struct event_base *base, void *arg)
+{
+	struct devpollop *devpollop = arg;
+
+	evsignal_dealloc(base);
+	if (devpollop->fds)
+		free(devpollop->fds);
+	if (devpollop->events)
+		free(devpollop->events);
+	if (devpollop->changes)
+		free(devpollop->changes);
+	if (devpollop->dpfd >= 0)
+		close(devpollop->dpfd);
+
+	memset(devpollop, 0, sizeof(struct devpollop));
+	free(devpollop);
+}
diff --git a/libevent/epoll.c b/libevent/epoll.c
new file mode 100644
index 00000000000..b479b9c07e9
--- /dev/null
+++ b/libevent/epoll.c
@@ -0,0 +1,373 @@
+/*
+ * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <sys/_time.h>
+#endif
+#include <sys/queue.h>
+#include <sys/epoll.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+
+#include "event.h"
+#include "event-internal.h"
+#include "evsignal.h"
+#include "log.h"
+
+/* due to limitations in the epoll interface, we need to keep track of
+ * all file descriptors outself.
+ */
+struct evepoll {
+	struct event *evread;
+	struct event *evwrite;
+};
+
+struct epollop {
+	struct evepoll *fds;
+	int nfds;
+	struct epoll_event *events;
+	int nevents;
+	int epfd;
+};
+
+static void *epoll_init	(struct event_base *);
+static int epoll_add	(void *, struct event *);
+static int epoll_del	(void *, struct event *);
+static int epoll_dispatch	(struct event_base *, void *, struct timeval *);
+static void epoll_dealloc	(struct event_base *, void *);
+
+const struct eventop epollops = {
+	"epoll",
+	epoll_init,
+	epoll_add,
+	epoll_del,
+	epoll_dispatch,
+	epoll_dealloc,
+	1 /* need reinit */
+};
+
+#ifdef HAVE_SETFD
+#define FD_CLOSEONEXEC(x) do { \
+        if (fcntl(x, F_SETFD, 1) == -1) \
+                event_warn("fcntl(%d, F_SETFD)", x); \
+} while (0)
+#else
+#define FD_CLOSEONEXEC(x)
+#endif
+
+#define NEVENT	32000
+
+/* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout
+ * values bigger than (LONG_MAX - 999ULL)/HZ.  HZ in the wild can be
+ * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the
+ * largest number of msec we can support here is 2147482.  Let's
+ * round that down by 47 seconds.
+ */
+#define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000)
+
+static void *
+epoll_init(struct event_base *base)
+{
+	int epfd, nfiles = NEVENT;
+	struct rlimit rl;
+	struct epollop *epollop;
+
+	/* Disable epollueue when this environment variable is set */
+	if (getenv("EVENT_NOEPOLL"))
+		return (NULL);
+
+	if (getrlimit(RLIMIT_NOFILE, &rl) == 0 &&
+	    rl.rlim_cur != RLIM_INFINITY) {
+		/*
+		 * Solaris is somewhat retarded - it's important to drop
+		 * backwards compatibility when making changes.  So, don't
+		 * dare to put rl.rlim_cur here.
+		 */
+		nfiles = rl.rlim_cur - 1;
+	}
+
+	/* Initalize the kernel queue */
+
+	if ((epfd = epoll_create(nfiles)) == -1) {
+		if (errno != ENOSYS)
+			event_warn("epoll_create");
+		return (NULL);
+	}
+
+	FD_CLOSEONEXEC(epfd);
+
+	if (!(epollop = calloc(1, sizeof(struct epollop))))
+		return (NULL);
+
+	epollop->epfd = epfd;
+
+	/* Initalize fields */
+	epollop->events = malloc(nfiles * sizeof(struct epoll_event));
+	if (epollop->events == NULL) {
+		free(epollop);
+		return (NULL);
+	}
+	epollop->nevents = nfiles;
+
+	epollop->fds = calloc(nfiles, sizeof(struct evepoll));
+	if (epollop->fds == NULL) {
+		free(epollop->events);
+		free(epollop);
+		return (NULL);
+	}
+	epollop->nfds = nfiles;
+
+	evsignal_init(base);
+
+	return (epollop);
+}
+
+static int
+epoll_recalc(struct event_base *base, void *arg, int max)
+{
+	struct epollop *epollop = arg;
+
+	if (max >= epollop->nfds) {
+		struct evepoll *fds;
+		int nfds;
+
+		nfds = epollop->nfds;
+		while (nfds <= max)
+			nfds <<= 1;
+
+		fds = realloc(epollop->fds, nfds * sizeof(struct evepoll));
+		if (fds == NULL) {
+			event_warn("realloc");
+			return (-1);
+		}
+		epollop->fds = fds;
+		memset(fds + epollop->nfds, 0,
+		    (nfds - epollop->nfds) * sizeof(struct evepoll));
+		epollop->nfds = nfds;
+	}
+
+	return (0);
+}
+
+static int
+epoll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
+{
+	struct epollop *epollop = arg;
+	struct epoll_event *events = epollop->events;
+	struct evepoll *evep;
+	int i, res, timeout = -1;
+
+	if (tv != NULL)
+		timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
+
+	if (timeout > MAX_EPOLL_TIMEOUT_MSEC) {
+		/* Linux kernels can wait forever if the timeout is too big;
+		 * see comment on MAX_EPOLL_TIMEOUT_MSEC. */
+		timeout = MAX_EPOLL_TIMEOUT_MSEC;
+	}
+
+	res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);
+
+	if (res == -1) {
+		if (errno != EINTR) {
+			event_warn("epoll_wait");
+			return (-1);
+		}
+
+		evsignal_process(base);
+		return (0);
+	} else if (base->sig.evsignal_caught) {
+		evsignal_process(base);
+	}
+
+	event_debug(("%s: epoll_wait reports %d", __func__, res));
+
+	for (i = 0; i < res; i++) {
+		int what = events[i].events;
+		struct event *evread = NULL, *evwrite = NULL;
+		int fd = events[i].data.fd;
+
+		if (fd < 0 || fd >= epollop->nfds)
+			continue;
+		evep = &epollop->fds[fd];
+
+		if (what & (EPOLLHUP|EPOLLERR)) {
+			evread = evep->evread;
+			evwrite = evep->evwrite;
+		} else {
+			if (what & EPOLLIN) {
+				evread = evep->evread;
+			}
+
+			if (what & EPOLLOUT) {
+				evwrite = evep->evwrite;
+			}
+		}
+
+		if (!(evread||evwrite))
+			continue;
+
+		if (evread != NULL)
+			event_active(evread, EV_READ, 1);
+		if (evwrite != NULL)
+			event_active(evwrite, EV_WRITE, 1);
+	}
+
+	return (0);
+}
+
+
+static int
+epoll_add(void *arg, struct event *ev)
+{
+	struct epollop *epollop = arg;
+	struct epoll_event epev = {0, {0}};
+	struct evepoll *evep;
+	int fd, op, events;
+
+	if (ev->ev_events & EV_SIGNAL)
+		return (evsignal_add(ev));
+
+	fd = ev->ev_fd;
+	if (fd >= epollop->nfds) {
+		/* Extent the file descriptor array as necessary */
+		if (epoll_recalc(ev->ev_base, epollop, fd) == -1)
+			return (-1);
+	}
+	evep = &epollop->fds[fd];
+	op = EPOLL_CTL_ADD;
+	events = 0;
+	if (evep->evread != NULL) {
+		events |= EPOLLIN;
+		op = EPOLL_CTL_MOD;
+	}
+	if (evep->evwrite != NULL) {
+		events |= EPOLLOUT;
+		op = EPOLL_CTL_MOD;
+	}
+
+	if (ev->ev_events & EV_READ)
+		events |= EPOLLIN;
+	if (ev->ev_events & EV_WRITE)
+		events |= EPOLLOUT;
+
+	epev.data.fd = fd;
+	epev.events = events;
+	if (epoll_ctl(epollop->epfd, op, ev->ev_fd, &epev) == -1)
+			return (-1);
+
+	/* Update events responsible */
+	if (ev->ev_events & EV_READ)
+		evep->evread = ev;
+	if (ev->ev_events & EV_WRITE)
+		evep->evwrite = ev;
+
+	return (0);
+}
+
+static int
+epoll_del(void *arg, struct event *ev)
+{
+	struct epollop *epollop = arg;
+	struct epoll_event epev = {0, {0}};
+	struct evepoll *evep;
+	int fd, events, op;
+	int needwritedelete = 1, needreaddelete = 1;
+
+	if (ev->ev_events & EV_SIGNAL)
+		return (evsignal_del(ev));
+
+	fd = ev->ev_fd;
+	if (fd >= epollop->nfds)
+		return (0);
+	evep = &epollop->fds[fd];
+
+	op = EPOLL_CTL_DEL;
+	events = 0;
+
+	if (ev->ev_events & EV_READ)
+		events |= EPOLLIN;
+	if (ev->ev_events & EV_WRITE)
+		events |= EPOLLOUT;
+
+	if ((events & (EPOLLIN|EPOLLOUT)) != (EPOLLIN|EPOLLOUT)) {
+		if ((events & EPOLLIN) && evep->evwrite != NULL) {
+			needwritedelete = 0;
+			events = EPOLLOUT;
+			op = EPOLL_CTL_MOD;
+		} else if ((events & EPOLLOUT) && evep->evread != NULL) {
+			needreaddelete = 0;
+			events = EPOLLIN;
+			op = EPOLL_CTL_MOD;
+		}
+	}
+
+	epev.events = events;
+	epev.data.fd = fd;
+
+	if (needreaddelete)
+		evep->evread = NULL;
+	if (needwritedelete)
+		evep->evwrite = NULL;
+
+	if (epoll_ctl(epollop->epfd, op, fd, &epev) == -1)
+		return (-1);
+
+	return (0);
+}
+
+static void
+epoll_dealloc(struct event_base *base, void *arg)
+{
+	struct epollop *epollop = arg;
+
+	evsignal_dealloc(base);
+	if (epollop->fds)
+		free(epollop->fds);
+	if (epollop->events)
+		free(epollop->events);
+	if (epollop->epfd >= 0)
+		close(epollop->epfd);
+
+	memset(epollop, 0, sizeof(struct epollop));
+	free(epollop);
+}
diff --git a/libevent/epoll_sub.c b/libevent/epoll_sub.c
new file mode 100644
index 00000000000..431970c73a6
--- /dev/null
+++ b/libevent/epoll_sub.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2003 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+
+int
+epoll_create(int size)
+{
+	return (syscall(__NR_epoll_create, size));
+}
+
+int
+epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
+{
+
+	return (syscall(__NR_epoll_ctl, epfd, op, fd, event));
+}
+
+int
+epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout)
+{
+	return (syscall(__NR_epoll_wait, epfd, events, maxevents, timeout));
+}
diff --git a/libevent/evbuffer.c b/libevent/evbuffer.c
new file mode 100644
index 00000000000..f2179a5044f
--- /dev/null
+++ b/libevent/evbuffer.c
@@ -0,0 +1,455 @@
+/*
+ * Copyright (c) 2002-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_STDARG_H
+#include <stdarg.h>
+#endif
+
+#ifdef WIN32
+#include <winsock2.h>
+#endif
+
+#include "evutil.h"
+#include "event.h"
+
+/* prototypes */
+
+void bufferevent_read_pressure_cb(struct evbuffer *, size_t, size_t, void *);
+
+static int
+bufferevent_add(struct event *ev, int timeout)
+{
+	struct timeval tv, *ptv = NULL;
+
+	if (timeout) {
+		evutil_timerclear(&tv);
+		tv.tv_sec = timeout;
+		ptv = &tv;
+	}
+
+	return (event_add(ev, ptv));
+}
+
+/* 
+ * This callback is executed when the size of the input buffer changes.
+ * We use it to apply back pressure on the reading side.
+ */
+
+void
+bufferevent_read_pressure_cb(struct evbuffer *buf, size_t old, size_t now,
+    void *arg) {
+	struct bufferevent *bufev = arg;
+	/* 
+	 * If we are below the watermark then reschedule reading if it's
+	 * still enabled.
+	 */
+	if (bufev->wm_read.high == 0 || now < bufev->wm_read.high) {
+		evbuffer_setcb(buf, NULL, NULL);
+
+		if (bufev->enabled & EV_READ)
+			bufferevent_add(&bufev->ev_read, bufev->timeout_read);
+	}
+}
+
+static void
+bufferevent_readcb(int fd, short event, void *arg)
+{
+	struct bufferevent *bufev = arg;
+	int res = 0;
+	short what = EVBUFFER_READ;
+	size_t len;
+	int howmuch = -1;
+
+	if (event == EV_TIMEOUT) {
+		what |= EVBUFFER_TIMEOUT;
+		goto error;
+	}
+
+	/*
+	 * If we have a high watermark configured then we don't want to
+	 * read more data than would make us reach the watermark.
+	 */
+	if (bufev->wm_read.high != 0) {
+		howmuch = bufev->wm_read.high - EVBUFFER_LENGTH(bufev->input);
+		/* we might have lowered the watermark, stop reading */
+		if (howmuch <= 0) {
+			struct evbuffer *buf = bufev->input;
+			event_del(&bufev->ev_read);
+			evbuffer_setcb(buf,
+			    bufferevent_read_pressure_cb, bufev);
+			return;
+		}
+	}
+
+	res = evbuffer_read(bufev->input, fd, howmuch);
+	if (res == -1) {
+		if (errno == EAGAIN || errno == EINTR)
+			goto reschedule;
+		/* error case */
+		what |= EVBUFFER_ERROR;
+	} else if (res == 0) {
+		/* eof case */
+		what |= EVBUFFER_EOF;
+	}
+
+	if (res <= 0)
+		goto error;
+
+	bufferevent_add(&bufev->ev_read, bufev->timeout_read);
+
+	/* See if this callbacks meets the water marks */
+	len = EVBUFFER_LENGTH(bufev->input);
+	if (bufev->wm_read.low != 0 && len < bufev->wm_read.low)
+		return;
+	if (bufev->wm_read.high != 0 && len >= bufev->wm_read.high) {
+		struct evbuffer *buf = bufev->input;
+		event_del(&bufev->ev_read);
+
+		/* Now schedule a callback for us when the buffer changes */
+		evbuffer_setcb(buf, bufferevent_read_pressure_cb, bufev);
+	}
+
+	/* Invoke the user callback - must always be called last */
+	if (bufev->readcb != NULL)
+		(*bufev->readcb)(bufev, bufev->cbarg);
+	return;
+
+ reschedule:
+	bufferevent_add(&bufev->ev_read, bufev->timeout_read);
+	return;
+
+ error:
+	(*bufev->errorcb)(bufev, what, bufev->cbarg);
+}
+
+static void
+bufferevent_writecb(int fd, short event, void *arg)
+{
+	struct bufferevent *bufev = arg;
+	int res = 0;
+	short what = EVBUFFER_WRITE;
+
+	if (event == EV_TIMEOUT) {
+		what |= EVBUFFER_TIMEOUT;
+		goto error;
+	}
+
+	if (EVBUFFER_LENGTH(bufev->output)) {
+	    res = evbuffer_write(bufev->output, fd);
+	    if (res == -1) {
+#ifndef WIN32
+/*todo. evbuffer uses WriteFile when WIN32 is set. WIN32 system calls do not
+ *set errno. thus this error checking is not portable*/
+		    if (errno == EAGAIN ||
+			errno == EINTR ||
+			errno == EINPROGRESS)
+			    goto reschedule;
+		    /* error case */
+		    what |= EVBUFFER_ERROR;
+
+#else
+				goto reschedule;
+#endif
+
+	    } else if (res == 0) {
+		    /* eof case */
+		    what |= EVBUFFER_EOF;
+	    }
+	    if (res <= 0)
+		    goto error;
+	}
+
+	if (EVBUFFER_LENGTH(bufev->output) != 0)
+		bufferevent_add(&bufev->ev_write, bufev->timeout_write);
+
+	/*
+	 * Invoke the user callback if our buffer is drained or below the
+	 * low watermark.
+	 */
+	if (bufev->writecb != NULL &&
+	    EVBUFFER_LENGTH(bufev->output) <= bufev->wm_write.low)
+		(*bufev->writecb)(bufev, bufev->cbarg);
+
+	return;
+
+ reschedule:
+	if (EVBUFFER_LENGTH(bufev->output) != 0)
+		bufferevent_add(&bufev->ev_write, bufev->timeout_write);
+	return;
+
+ error:
+	(*bufev->errorcb)(bufev, what, bufev->cbarg);
+}
+
+/*
+ * Create a new buffered event object.
+ *
+ * The read callback is invoked whenever we read new data.
+ * The write callback is invoked whenever the output buffer is drained.
+ * The error callback is invoked on a write/read error or on EOF.
+ *
+ * Both read and write callbacks maybe NULL.  The error callback is not
+ * allowed to be NULL and have to be provided always.
+ */
+
+struct bufferevent *
+bufferevent_new(int fd, evbuffercb readcb, evbuffercb writecb,
+    everrorcb errorcb, void *cbarg)
+{
+	struct bufferevent *bufev;
+
+	if ((bufev = calloc(1, sizeof(struct bufferevent))) == NULL)
+		return (NULL);
+
+	if ((bufev->input = evbuffer_new()) == NULL) {
+		free(bufev);
+		return (NULL);
+	}
+
+	if ((bufev->output = evbuffer_new()) == NULL) {
+		evbuffer_free(bufev->input);
+		free(bufev);
+		return (NULL);
+	}
+
+	event_set(&bufev->ev_read, fd, EV_READ, bufferevent_readcb, bufev);
+	event_set(&bufev->ev_write, fd, EV_WRITE, bufferevent_writecb, bufev);
+
+	bufferevent_setcb(bufev, readcb, writecb, errorcb, cbarg);
+
+	/*
+	 * Set to EV_WRITE so that using bufferevent_write is going to
+	 * trigger a callback.  Reading needs to be explicitly enabled
+	 * because otherwise no data will be available.
+	 */
+	bufev->enabled = EV_WRITE;
+
+	return (bufev);
+}
+
+void
+bufferevent_setcb(struct bufferevent *bufev,
+    evbuffercb readcb, evbuffercb writecb, everrorcb errorcb, void *cbarg)
+{
+	bufev->readcb = readcb;
+	bufev->writecb = writecb;
+	bufev->errorcb = errorcb;
+
+	bufev->cbarg = cbarg;
+}
+
+void
+bufferevent_setfd(struct bufferevent *bufev, int fd)
+{
+	event_del(&bufev->ev_read);
+	event_del(&bufev->ev_write);
+
+	event_set(&bufev->ev_read, fd, EV_READ, bufferevent_readcb, bufev);
+	event_set(&bufev->ev_write, fd, EV_WRITE, bufferevent_writecb, bufev);
+	if (bufev->ev_base != NULL) {
+		event_base_set(bufev->ev_base, &bufev->ev_read);
+		event_base_set(bufev->ev_base, &bufev->ev_write);
+	}
+
+	/* might have to manually trigger event registration */
+}
+
+int
+bufferevent_priority_set(struct bufferevent *bufev, int priority)
+{
+	if (event_priority_set(&bufev->ev_read, priority) == -1)
+		return (-1);
+	if (event_priority_set(&bufev->ev_write, priority) == -1)
+		return (-1);
+
+	return (0);
+}
+
+/* Closing the file descriptor is the responsibility of the caller */
+
+void
+bufferevent_free(struct bufferevent *bufev)
+{
+	event_del(&bufev->ev_read);
+	event_del(&bufev->ev_write);
+
+	evbuffer_free(bufev->input);
+	evbuffer_free(bufev->output);
+
+	free(bufev);
+}
+
+/*
+ * Returns 0 on success;
+ *        -1 on failure.
+ */
+
+int
+bufferevent_write(struct bufferevent *bufev, const void *data, size_t size)
+{
+	int res;
+
+	res = evbuffer_add(bufev->output, data, size);
+
+	if (res == -1)
+		return (res);
+
+	/* If everything is okay, we need to schedule a write */
+	if (size > 0 && (bufev->enabled & EV_WRITE))
+		bufferevent_add(&bufev->ev_write, bufev->timeout_write);
+
+	return (res);
+}
+
+int
+bufferevent_write_buffer(struct bufferevent *bufev, struct evbuffer *buf)
+{
+	int res;
+
+	res = bufferevent_write(bufev, buf->buffer, buf->off);
+	if (res != -1)
+		evbuffer_drain(buf, buf->off);
+
+	return (res);
+}
+
+size_t
+bufferevent_read(struct bufferevent *bufev, void *data, size_t size)
+{
+	struct evbuffer *buf = bufev->input;
+
+	if (buf->off < size)
+		size = buf->off;
+
+	/* Copy the available data to the user buffer */
+	memcpy(data, buf->buffer, size);
+
+	if (size)
+		evbuffer_drain(buf, size);
+
+	return (size);
+}
+
+int
+bufferevent_enable(struct bufferevent *bufev, short event)
+{
+	if (event & EV_READ) {
+		if (bufferevent_add(&bufev->ev_read, bufev->timeout_read) == -1)
+			return (-1);
+	}
+	if (event & EV_WRITE) {
+		if (bufferevent_add(&bufev->ev_write, bufev->timeout_write) == -1)
+			return (-1);
+	}
+
+	bufev->enabled |= event;
+	return (0);
+}
+
+int
+bufferevent_disable(struct bufferevent *bufev, short event)
+{
+	if (event & EV_READ) {
+		if (event_del(&bufev->ev_read) == -1)
+			return (-1);
+	}
+	if (event & EV_WRITE) {
+		if (event_del(&bufev->ev_write) == -1)
+			return (-1);
+	}
+
+	bufev->enabled &= ~event;
+	return (0);
+}
+
+/*
+ * Sets the read and write timeout for a buffered event.
+ */
+
+void
+bufferevent_settimeout(struct bufferevent *bufev,
+    int timeout_read, int timeout_write) {
+	bufev->timeout_read = timeout_read;
+	bufev->timeout_write = timeout_write;
+
+	if (event_pending(&bufev->ev_read, EV_READ, NULL))
+		bufferevent_add(&bufev->ev_read, timeout_read);
+	if (event_pending(&bufev->ev_write, EV_WRITE, NULL))
+		bufferevent_add(&bufev->ev_write, timeout_write);
+}
+
+/*
+ * Sets the water marks
+ */
+
+void
+bufferevent_setwatermark(struct bufferevent *bufev, short events,
+    size_t lowmark, size_t highmark)
+{
+	if (events & EV_READ) {
+		bufev->wm_read.low = lowmark;
+		bufev->wm_read.high = highmark;
+	}
+
+	if (events & EV_WRITE) {
+		bufev->wm_write.low = lowmark;
+		bufev->wm_write.high = highmark;
+	}
+
+	/* If the watermarks changed then see if we should call read again */
+	bufferevent_read_pressure_cb(bufev->input,
+	    0, EVBUFFER_LENGTH(bufev->input), bufev);
+}
+
+int
+bufferevent_base_set(struct event_base *base, struct bufferevent *bufev)
+{
+	int res;
+
+	bufev->ev_base = base;
+
+	res = event_base_set(base, &bufev->ev_read);
+	if (res == -1)
+		return (res);
+
+	res = event_base_set(base, &bufev->ev_write);
+	return (res);
+}
diff --git a/libevent/evdns.3 b/libevent/evdns.3
new file mode 100644
index 00000000000..10414fa2efb
--- /dev/null
+++ b/libevent/evdns.3
@@ -0,0 +1,322 @@
+.\"
+.\" Copyright (c) 2006 Niels Provos <provos@citi.umich.edu>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\"
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. The name of the author may not be used to endorse or promote products
+.\"    derived from this software without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+.\" AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+.\" THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.\" EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.\" PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+.\" OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+.\" OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+.\" ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd October 7, 2006
+.Dt EVDNS 3
+.Os
+.Sh NAME
+.Nm evdns_init
+.Nm evdns_shutdown
+.Nm evdns_err_to_string
+.Nm evdns_nameserver_add
+.Nm evdns_count_nameservers
+.Nm evdns_clear_nameservers_and_suspend
+.Nm evdns_resume
+.Nm evdns_nameserver_ip_add
+.Nm evdns_resolve_ipv4
+.Nm evdns_resolve_reverse
+.Nm evdns_resolv_conf_parse
+.Nm evdns_config_windows_nameservers
+.Nm evdns_search_clear
+.Nm evdns_search_add
+.Nm evdns_search_ndots_set
+.Nm evdns_set_log_fn
+.Nd asynchronous functions for DNS resolution.
+.Sh SYNOPSIS
+.Fd #include <sys/time.h>
+.Fd #include <event.h>
+.Fd #include <evdns.h>
+.Ft int
+.Fn evdns_init
+.Ft void
+.Fn evdns_shutdown "int fail_requests"
+.Ft "const char *"
+.Fn evdns_err_to_string "int err"
+.Ft int
+.Fn evdns_nameserver_add "unsigned long int address"
+.Ft int
+.Fn evdns_count_nameservers
+.Ft int
+.Fn evdns_clear_nameservers_and_suspend
+.Ft int
+.Fn evdns_resume
+.Ft int
+.Fn evdns_nameserver_ip_add(const char *ip_as_string);
+.Ft int
+.Fn evdns_resolve_ipv4 "const char *name" "int flags" "evdns_callback_type callback" "void *ptr"
+.Ft int
+.Fn evdns_resolve_reverse "struct in_addr *in" "int flags" "evdns_callback_type callback" "void *ptr"
+.Ft int
+.Fn evdns_resolv_conf_parse "int flags" "const char *"
+.Ft void
+.Fn evdns_search_clear
+.Ft void
+.Fn evdns_search_add "const char *domain"
+.Ft void
+.Fn evdns_search_ndots_set "const int ndots"
+.Ft void
+.Fn evdns_set_log_fn "evdns_debug_log_fn_type fn"
+.Ft int
+.Fn evdns_config_windows_nameservers
+.Sh DESCRIPTION
+Welcome, gentle reader
+.Pp
+Async DNS lookups are really a whole lot harder than they should be,
+mostly stemming from the fact that the libc resolver has never been
+very good at them. Before you use this library you should see if libc
+can do the job for you with the modern async call getaddrinfo_a
+(see http://www.imperialviolet.org/page25.html#e498). Otherwise,
+please continue.
+.Pp
+This code is based on libevent and you must call event_init before
+any of the APIs in this file. You must also seed the OpenSSL random
+source if you are using OpenSSL for ids (see below).
+.Pp
+This library is designed to be included and shipped with your source
+code. You statically link with it. You should also test for the
+existence of strtok_r and define HAVE_STRTOK_R if you have it.
+.Pp
+The DNS protocol requires a good source of id numbers and these
+numbers should be unpredictable for spoofing reasons. There are
+three methods for generating them here and you must define exactly
+one of them. In increasing order of preference:
+.Pp
+.Bl -tag -width "DNS_USE_GETTIMEOFDAY_FOR_ID" -compact -offset indent
+.It DNS_USE_GETTIMEOFDAY_FOR_ID
+Using the bottom 16 bits of the usec result from gettimeofday. This
+is a pretty poor solution but should work anywhere.
+.It DNS_USE_CPU_CLOCK_FOR_ID
+Using the bottom 16 bits of the nsec result from the CPU's time
+counter. This is better, but may not work everywhere. Requires
+POSIX realtime support and you'll need to link against -lrt on
+glibc systems at least.
+.It DNS_USE_OPENSSL_FOR_ID
+Uses the OpenSSL RAND_bytes call to generate the data. You must
+have seeded the pool before making any calls to this library.
+.El
+.Pp
+The library keeps track of the state of nameservers and will avoid
+them when they go down. Otherwise it will round robin between them.
+.Pp
+Quick start guide:
+  #include "evdns.h"
+  void callback(int result, char type, int count, int ttl,
+	 void *addresses, void *arg);
+  evdns_resolv_conf_parse(DNS_OPTIONS_ALL, "/etc/resolv.conf");
+  evdns_resolve("www.hostname.com", 0, callback, NULL);
+.Pp
+When the lookup is complete the callback function is called. The
+first argument will be one of the DNS_ERR_* defines in evdns.h.
+Hopefully it will be DNS_ERR_NONE, in which case type will be
+DNS_IPv4_A, count will be the number of IP addresses, ttl is the time
+which the data can be cached for (in seconds), addresses will point
+to an array of uint32_t's and arg will be whatever you passed to
+evdns_resolve.
+.Pp
+Searching:
+.Pp
+In order for this library to be a good replacement for glibc's resolver it
+supports searching. This involves setting a list of default domains, in
+which names will be queried for. The number of dots in the query name
+determines the order in which this list is used.
+.Pp
+Searching appears to be a single lookup from the point of view of the API,
+although many DNS queries may be generated from a single call to
+evdns_resolve. Searching can also drastically slow down the resolution
+of names.
+.Pp
+To disable searching:
+.Bl -enum -compact -offset indent
+.It
+Never set it up. If you never call
+.Fn evdns_resolv_conf_parse,
+.Fn evdns_init,
+or
+.Fn evdns_search_add
+then no searching will occur.
+.It
+If you do call
+.Fn evdns_resolv_conf_parse
+then don't pass
+.Va DNS_OPTION_SEARCH
+(or
+.Va DNS_OPTIONS_ALL,
+which implies it).
+.It
+When calling
+.Fn evdns_resolve,
+pass the
+.Va DNS_QUERY_NO_SEARCH
+flag.
+.El
+.Pp
+The order of searches depends on the number of dots in the name. If the
+number is greater than the ndots setting then the names is first tried
+globally. Otherwise each search domain is appended in turn.
+.Pp
+The ndots setting can either be set from a resolv.conf, or by calling
+evdns_search_ndots_set.
+.Pp
+For example, with ndots set to 1 (the default) and a search domain list of
+["myhome.net"]:
+ Query: www
+ Order: www.myhome.net, www.
+.Pp
+ Query: www.abc
+ Order: www.abc., www.abc.myhome.net
+.Pp
+.Sh API reference
+.Pp
+.Bl -tag -width 0123456
+.It Ft int Fn evdns_init
+Initializes support for non-blocking name resolution by calling
+.Fn evdns_resolv_conf_parse
+on UNIX and
+.Fn evdns_config_windows_nameservers
+on Windows.
+.It Ft int Fn evdns_nameserver_add "unsigned long int address"
+Add a nameserver. The address should be an IP address in
+network byte order. The type of address is chosen so that
+it matches in_addr.s_addr.
+Returns non-zero on error.
+.It Ft int Fn evdns_nameserver_ip_add "const char *ip_as_string"
+This wraps the above function by parsing a string as an IP
+address and adds it as a nameserver.
+Returns non-zero on error
+.It Ft int Fn evdns_resolve "const char *name" "int flags" "evdns_callback_type callback" "void *ptr"
+Resolve a name. The name parameter should be a DNS name.
+The flags parameter should be 0, or DNS_QUERY_NO_SEARCH
+which disables searching for this query. (see defn of
+searching above).
+.Pp
+The callback argument is a function which is called when
+this query completes and ptr is an argument which is passed
+to that callback function.
+.Pp
+Returns non-zero on error
+.It Ft void Fn evdns_search_clear
+Clears the list of search domains
+.It Ft void Fn evdns_search_add "const char *domain"
+Add a domain to the list of search domains
+.It Ft void Fn evdns_search_ndots_set "int ndots"
+Set the number of dots which, when found in a name, causes
+the first query to be without any search domain.
+.It Ft int Fn evdns_count_nameservers "void"
+Return the number of configured nameservers (not necessarily the
+number of running nameservers).  This is useful for double-checking
+whether our calls to the various nameserver configuration functions
+have been successful.
+.It Ft int Fn evdns_clear_nameservers_and_suspend "void"
+Remove all currently configured nameservers, and suspend all pending
+resolves.  Resolves will not necessarily be re-attempted until
+evdns_resume() is called.
+.It Ft int Fn evdns_resume "void"
+Re-attempt resolves left in limbo after an earlier call to
+evdns_clear_nameservers_and_suspend().
+.It Ft int Fn evdns_config_windows_nameservers "void"
+Attempt to configure a set of nameservers based on platform settings on
+a win32 host.  Preferentially tries to use GetNetworkParams; if that fails,
+looks in the registry.  Returns 0 on success, nonzero on failure.
+.It Ft int Fn evdns_resolv_conf_parse "int flags" "const char *filename"
+Parse a resolv.conf like file from the given filename.
+.Pp
+See the man page for resolv.conf for the format of this file.
+The flags argument determines what information is parsed from
+this file:
+.Bl -tag -width "DNS_OPTION_NAMESERVERS" -offset indent -compact -nested
+.It DNS_OPTION_SEARCH
+domain, search and ndots options
+.It DNS_OPTION_NAMESERVERS
+nameserver lines
+.It DNS_OPTION_MISC
+timeout and attempts options
+.It DNS_OPTIONS_ALL
+all of the above
+.El
+.Pp
+The following directives are not parsed from the file:
+  sortlist, rotate, no-check-names, inet6, debug
+.Pp
+Returns non-zero on error:
+.Bl -tag -width "0" -offset indent -compact -nested
+.It 0
+no errors
+.It 1
+failed to open file
+.It 2
+failed to stat file
+.It 3
+file too large
+.It 4
+out of memory
+.It 5
+short read from file
+.El
+.El
+.Sh Internals:
+Requests are kept in two queues. The first is the inflight queue. In
+this queue requests have an allocated transaction id and nameserver.
+They will soon be transmitted if they haven't already been.
+.Pp
+The second is the waiting queue. The size of the inflight ring is
+limited and all other requests wait in waiting queue for space. This
+bounds the number of concurrent requests so that we don't flood the
+nameserver. Several algorithms require a full walk of the inflight
+queue and so bounding its size keeps thing going nicely under huge
+(many thousands of requests) loads.
+.Pp
+If a nameserver loses too many requests it is considered down and we
+try not to use it. After a while we send a probe to that nameserver
+(a lookup for google.com) and, if it replies, we consider it working
+again. If the nameserver fails a probe we wait longer to try again
+with the next probe.
+.Sh SEE ALSO
+.Xr event 3 ,
+.Xr gethostbyname 3 ,
+.Xr resolv.conf 5
+.Sh HISTORY
+The
+.Nm evdns
+API was developed by Adam Langley on top of the
+.Nm libevent
+API.
+The code was integrate into
+.Nm Tor
+by Nick Mathewson and finally put into
+.Nm libevent
+itself by Niels Provos.
+.Sh AUTHORS
+The
+.Nm evdns
+API and code was written by Adam Langley with significant
+contributions by Nick Mathewson.
+.Sh BUGS
+This documentation is neither complete nor authoritative.
+If you are in doubt about the usage of this API then
+check the source code to find out how it works, write
+up the missing piece of documentation and send it to
+me for inclusion in this man page.
diff --git a/libevent/evdns.c b/libevent/evdns.c
new file mode 100644
index 00000000000..e13357f1596
--- /dev/null
+++ b/libevent/evdns.c
@@ -0,0 +1,3200 @@
+/* $Id: evdns.c 6979 2006-08-04 18:31:13Z nickm $ */
+
+/* The original version of this module was written by Adam Langley; for
+ * a history of modifications, check out the subversion logs.
+ *
+ * When editing this module, try to keep it re-mergeable by Adam.  Don't
+ * reformat the whitespace, add Tor dependencies, or so on.
+ *
+ * TODO:
+ *   - Support IPv6 and PTR records.
+ *   - Replace all externally visible magic numbers with #defined constants.
+ *   - Write doccumentation for APIs of all external functions.
+ */
+
+/* Async DNS Library
+ * Adam Langley <agl@imperialviolet.org>
+ * http://www.imperialviolet.org/eventdns.html
+ * Public Domain code
+ *
+ * This software is Public Domain. To view a copy of the public domain dedication,
+ * visit http://creativecommons.org/licenses/publicdomain/ or send a letter to
+ * Creative Commons, 559 Nathan Abbott Way, Stanford, California 94305, USA.
+ *
+ * I ask and expect, but do not require, that all derivative works contain an
+ * attribution similar to:
+ * 	Parts developed by Adam Langley <agl@imperialviolet.org>
+ *
+ * You may wish to replace the word "Parts" with something else depending on
+ * the amount of original code.
+ *
+ * (Derivative works does not include programs which link against, run or include
+ * the source verbatim in their source distributions)
+ *
+ * Version: 0.1b
+ */
+
+#include <sys/types.h>
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef DNS_USE_FTIME_FOR_ID
+#include <sys/timeb.h>
+#endif
+
+#ifndef DNS_USE_CPU_CLOCK_FOR_ID
+#ifdef HAVE_GETTIMEOFDAY
+#define DNS_USE_GETTIMEOFDAY_FOR_ID     1
+#endif
+#endif
+
+#ifndef DNS_USE_CPU_CLOCK_FOR_ID
+#ifdef HAVE_GETTIMEOFDAY
+#define DNS_USE_GETTIMEOFDAY_FOR_ID     1
+#endif
+#endif
+
+#ifndef DNS_USE_CPU_CLOCK_FOR_ID
+#ifndef DNS_USE_GETTIMEOFDAY_FOR_ID
+#ifndef DNS_USE_OPENSSL_FOR_ID
+#ifndef DNS_USE_FTIME_FOR_ID
+#error Must configure at least one id generation method.
+#error Please see the documentation.
+#endif
+#endif
+#endif
+#endif
+
+/* #define _POSIX_C_SOURCE 200507 */
+#define _GNU_SOURCE
+
+#ifdef DNS_USE_CPU_CLOCK_FOR_ID
+#ifdef DNS_USE_OPENSSL_FOR_ID
+#error Multiple id options selected
+#endif
+#ifdef DNS_USE_GETTIMEOFDAY_FOR_ID
+#error Multiple id options selected
+#endif
+#include <time.h>
+#endif
+
+#ifdef DNS_USE_OPENSSL_FOR_ID
+#ifdef DNS_USE_GETTIMEOFDAY_FOR_ID
+#error Multiple id options selected
+#endif
+#include <openssl/rand.h>
+#endif
+
+#ifndef _FORTIFY_SOURCE
+#define _FORTIFY_SOURCE 3
+#endif
+
+#include <string.h>
+#include <fcntl.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <limits.h>
+#include <sys/stat.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "evdns.h"
+#include "evutil.h"
+#include "log.h"
+#ifdef WIN32
+#include <winsock2.h>
+#include <windows.h>
+#include <iphlpapi.h>
+#include <io.h>
+#else
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#endif
+
+#ifdef HAVE_NETINET_IN6_H
+#include <netinet/in6.h>
+#endif
+
+#define EVDNS_LOG_DEBUG 0
+#define EVDNS_LOG_WARN 1
+
+#ifndef HOST_NAME_MAX
+#define HOST_NAME_MAX 255
+#endif
+
+#include <stdio.h>
+
+#undef MIN
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+#ifdef __USE_ISOC99B
+/* libevent doesn't work without this */
+typedef ev_uint8_t u_char;
+typedef unsigned int uint;
+#endif
+#include <event.h>
+
+#define u64 ev_uint64_t
+#define u32 ev_uint32_t
+#define u16 ev_uint16_t
+#define u8  ev_uint8_t
+
+#ifdef WIN32
+#define open _open
+#define read _read
+#define close _close
+#define strdup _strdup
+#endif
+
+#define MAX_ADDRS 32  /* maximum number of addresses from a single packet */
+/* which we bother recording */
+
+#define TYPE_A         EVDNS_TYPE_A
+#define TYPE_CNAME     5
+#define TYPE_PTR       EVDNS_TYPE_PTR
+#define TYPE_AAAA      EVDNS_TYPE_AAAA
+
+#define CLASS_INET     EVDNS_CLASS_INET
+
+struct request {
+	u8 *request;  /* the dns packet data */
+	unsigned int request_len;
+	int reissue_count;
+	int tx_count;  /* the number of times that this packet has been sent */
+	unsigned int request_type; /* TYPE_PTR or TYPE_A */
+	void *user_pointer;  /* the pointer given to us for this request */
+	evdns_callback_type user_callback;
+	struct nameserver *ns;  /* the server which we last sent it */
+
+	/* elements used by the searching code */
+	int search_index;
+	struct search_state *search_state;
+	char *search_origname;  /* needs to be free()ed */
+	int search_flags;
+
+	/* these objects are kept in a circular list */
+	struct request *next, *prev;
+
+	struct event timeout_event;
+
+	u16 trans_id;  /* the transaction id */
+	char request_appended;  /* true if the request pointer is data which follows this struct */
+	char transmit_me;  /* needs to be transmitted */
+};
+
+#ifndef HAVE_STRUCT_IN6_ADDR
+struct in6_addr {
+	u8 s6_addr[16];
+};
+#endif
+
+struct reply {
+	unsigned int type;
+	unsigned int have_answer;
+	union {
+		struct {
+			u32 addrcount;
+			u32 addresses[MAX_ADDRS];
+		} a;
+		struct {
+			u32 addrcount;
+			struct in6_addr addresses[MAX_ADDRS];
+		} aaaa;
+		struct {
+			char name[HOST_NAME_MAX];
+		} ptr;
+	} data;
+};
+
+struct nameserver {
+	int socket;  /* a connected UDP socket */
+	u32 address;
+	u16 port;
+	int failed_times;  /* number of times which we have given this server a chance */
+	int timedout;  /* number of times in a row a request has timed out */
+	struct event event;
+	/* these objects are kept in a circular list */
+	struct nameserver *next, *prev;
+	struct event timeout_event;  /* used to keep the timeout for */
+				     /* when we next probe this server. */
+				     /* Valid if state == 0 */
+	char state;  /* zero if we think that this server is down */
+	char choked;  /* true if we have an EAGAIN from this server's socket */
+	char write_waiting;  /* true if we are waiting for EV_WRITE events */
+};
+
+static struct request *req_head = NULL, *req_waiting_head = NULL;
+static struct nameserver *server_head = NULL;
+
+/* Represents a local port where we're listening for DNS requests. Right now, */
+/* only UDP is supported. */
+struct evdns_server_port {
+	int socket; /* socket we use to read queries and write replies. */
+	int refcnt; /* reference count. */
+	char choked; /* Are we currently blocked from writing? */
+	char closing; /* Are we trying to close this port, pending writes? */
+	evdns_request_callback_fn_type user_callback; /* Fn to handle requests */
+	void *user_data; /* Opaque pointer passed to user_callback */
+	struct event event; /* Read/write event */
+	/* circular list of replies that we want to write. */
+	struct server_request *pending_replies;
+};
+
+/* Represents part of a reply being built.	(That is, a single RR.) */
+struct server_reply_item {
+	struct server_reply_item *next; /* next item in sequence. */
+	char *name; /* name part of the RR */
+	u16 type : 16; /* The RR type */
+	u16 class : 16; /* The RR class (usually CLASS_INET) */
+	u32 ttl; /* The RR TTL */
+	char is_name; /* True iff data is a label */
+	u16 datalen; /* Length of data; -1 if data is a label */
+	void *data; /* The contents of the RR */
+};
+
+/* Represents a request that we've received as a DNS server, and holds */
+/* the components of the reply as we're constructing it. */
+struct server_request {
+	/* Pointers to the next and previous entries on the list of replies */
+	/* that we're waiting to write.	 Only set if we have tried to respond */
+	/* and gotten EAGAIN. */
+	struct server_request *next_pending;
+	struct server_request *prev_pending;
+
+	u16 trans_id; /* Transaction id. */
+	struct evdns_server_port *port; /* Which port received this request on? */
+	struct sockaddr_storage addr; /* Where to send the response */
+	socklen_t addrlen; /* length of addr */
+
+	int n_answer; /* how many answer RRs have been set? */
+	int n_authority; /* how many authority RRs have been set? */
+	int n_additional; /* how many additional RRs have been set? */
+
+	struct server_reply_item *answer; /* linked list of answer RRs */
+	struct server_reply_item *authority; /* linked list of authority RRs */
+	struct server_reply_item *additional; /* linked list of additional RRs */
+
+	/* Constructed response.  Only set once we're ready to send a reply. */
+	/* Once this is set, the RR fields are cleared, and no more should be set. */
+	char *response;
+	size_t response_len;
+
+	/* Caller-visible fields: flags, questions. */
+	struct evdns_server_request base;
+};
+
+/* helper macro */
+#define OFFSET_OF(st, member) ((off_t) (((char*)&((st*)0)->member)-(char*)0))
+
+/* Given a pointer to an evdns_server_request, get the corresponding */
+/* server_request. */
+#define TO_SERVER_REQUEST(base_ptr)										\
+	((struct server_request*)											\
+	 (((char*)(base_ptr) - OFFSET_OF(struct server_request, base))))
+
+/* The number of good nameservers that we have */
+static int global_good_nameservers = 0;
+
+/* inflight requests are contained in the req_head list */
+/* and are actually going out across the network */
+static int global_requests_inflight = 0;
+/* requests which aren't inflight are in the waiting list */
+/* and are counted here */
+static int global_requests_waiting = 0;
+
+static int global_max_requests_inflight = 64;
+
+static struct timeval global_timeout = {5, 0};  /* 5 seconds */
+static int global_max_reissues = 1;  /* a reissue occurs when we get some errors from the server */
+static int global_max_retransmits = 3;  /* number of times we'll retransmit a request which timed out */
+/* number of timeouts in a row before we consider this server to be down */
+static int global_max_nameserver_timeout = 3;
+
+/* These are the timeout values for nameservers. If we find a nameserver is down */
+/* we try to probe it at intervals as given below. Values are in seconds. */
+static const struct timeval global_nameserver_timeouts[] = {{10, 0}, {60, 0}, {300, 0}, {900, 0}, {3600, 0}};
+static const int global_nameserver_timeouts_length = sizeof(global_nameserver_timeouts)/sizeof(struct timeval);
+
+static struct nameserver *nameserver_pick(void);
+static void evdns_request_insert(struct request *req, struct request **head);
+static void nameserver_ready_callback(int fd, short events, void *arg);
+static int evdns_transmit(void);
+static int evdns_request_transmit(struct request *req);
+static void nameserver_send_probe(struct nameserver *const ns);
+static void search_request_finished(struct request *const);
+static int search_try_next(struct request *const req);
+static int search_request_new(int type, const char *const name, int flags, evdns_callback_type user_callback, void *user_arg);
+static void evdns_requests_pump_waiting_queue(void);
+static u16 transaction_id_pick(void);
+static struct request *request_new(int type, const char *name, int flags, evdns_callback_type callback, void *ptr);
+static void request_submit(struct request *const req);
+
+static int server_request_free(struct server_request *req);
+static void server_request_free_answers(struct server_request *req);
+static void server_port_free(struct evdns_server_port *port);
+static void server_port_ready_callback(int fd, short events, void *arg);
+
+static int strtoint(const char *const str);
+
+#ifdef WIN32
+static int
+last_error(int sock)
+{
+	int optval, optvallen=sizeof(optval);
+	int err = WSAGetLastError();
+	if (err == WSAEWOULDBLOCK && sock >= 0) {
+		if (getsockopt(sock, SOL_SOCKET, SO_ERROR, (void*)&optval,
+			       &optvallen))
+			return err;
+		if (optval)
+			return optval;
+	}
+	return err;
+
+}
+static int
+error_is_eagain(int err)
+{
+	return err == EAGAIN || err == WSAEWOULDBLOCK;
+}
+static int
+inet_aton(const char *c, struct in_addr *addr)
+{
+	ev_uint32_t r;
+	if (strcmp(c, "255.255.255.255") == 0) {
+		addr->s_addr = 0xffffffffu;
+	} else {
+		r = inet_addr(c);
+		if (r == INADDR_NONE)
+			return 0;
+		addr->s_addr = r;
+	}
+	return 1;
+}
+#else
+#define last_error(sock) (errno)
+#define error_is_eagain(err) ((err) == EAGAIN)
+#endif
+#define CLOSE_SOCKET(s) EVUTIL_CLOSESOCKET(s)
+
+#define ISSPACE(c) isspace((int)(unsigned char)(c))
+#define ISDIGIT(c) isdigit((int)(unsigned char)(c))
+
+static const char *
+debug_ntoa(u32 address)
+{
+	static char buf[32];
+	u32 a = ntohl(address);
+	evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d",
+                      (int)(u8)((a>>24)&0xff),
+                      (int)(u8)((a>>16)&0xff),
+                      (int)(u8)((a>>8 )&0xff),
+  		      (int)(u8)((a    )&0xff));
+	return buf;
+}
+
+static evdns_debug_log_fn_type evdns_log_fn = NULL;
+
+void
+evdns_set_log_fn(evdns_debug_log_fn_type fn)
+{
+  evdns_log_fn = fn;
+}
+
+#ifdef __GNUC__
+#define EVDNS_LOG_CHECK  __attribute__ ((format(printf, 2, 3)))
+#else
+#define EVDNS_LOG_CHECK
+#endif
+
+static void _evdns_log(int warn, const char *fmt, ...) EVDNS_LOG_CHECK;
+static void
+_evdns_log(int warn, const char *fmt, ...)
+{
+  va_list args;
+  static char buf[512];
+  if (!evdns_log_fn)
+    return;
+  va_start(args,fmt);
+  evutil_vsnprintf(buf, sizeof(buf), fmt, args);
+  buf[sizeof(buf)-1] = '\0';
+  evdns_log_fn(warn, buf);
+  va_end(args);
+}
+
+#define log _evdns_log
+
+/* This walks the list of inflight requests to find the */
+/* one with a matching transaction id. Returns NULL on */
+/* failure */
+static struct request *
+request_find_from_trans_id(u16 trans_id) {
+	struct request *req = req_head, *const started_at = req_head;
+
+	if (req) {
+		do {
+			if (req->trans_id == trans_id) return req;
+			req = req->next;
+		} while (req != started_at);
+	}
+
+	return NULL;
+}
+
+/* a libevent callback function which is called when a nameserver */
+/* has gone down and we want to test if it has came back to life yet */
+static void
+nameserver_prod_callback(int fd, short events, void *arg) {
+	struct nameserver *const ns = (struct nameserver *) arg;
+        (void)fd;
+        (void)events;
+
+	nameserver_send_probe(ns);
+}
+
+/* a libevent callback which is called when a nameserver probe (to see if */
+/* it has come back to life) times out. We increment the count of failed_times */
+/* and wait longer to send the next probe packet. */
+static void
+nameserver_probe_failed(struct nameserver *const ns) {
+	const struct timeval * timeout;
+	(void) evtimer_del(&ns->timeout_event);
+	if (ns->state == 1) {
+		/* This can happen if the nameserver acts in a way which makes us mark */
+		/* it as bad and then starts sending good replies. */
+		return;
+	}
+
+	timeout =
+	  &global_nameserver_timeouts[MIN(ns->failed_times,
+					  global_nameserver_timeouts_length - 1)];
+	ns->failed_times++;
+
+	if (evtimer_add(&ns->timeout_event, (struct timeval *) timeout) < 0) {
+          log(EVDNS_LOG_WARN,
+              "Error from libevent when adding timer event for %s",
+              debug_ntoa(ns->address));
+          /* ???? Do more? */
+        }
+}
+
+/* called when a nameserver has been deemed to have failed. For example, too */
+/* many packets have timed out etc */
+static void
+nameserver_failed(struct nameserver *const ns, const char *msg) {
+	struct request *req, *started_at;
+	/* if this nameserver has already been marked as failed */
+	/* then don't do anything */
+	if (!ns->state) return;
+
+	log(EVDNS_LOG_WARN, "Nameserver %s has failed: %s",
+            debug_ntoa(ns->address), msg);
+	global_good_nameservers--;
+	assert(global_good_nameservers >= 0);
+	if (global_good_nameservers == 0) {
+		log(EVDNS_LOG_WARN, "All nameservers have failed");
+	}
+
+	ns->state = 0;
+	ns->failed_times = 1;
+
+	if (evtimer_add(&ns->timeout_event, (struct timeval *) &global_nameserver_timeouts[0]) < 0) {
+		log(EVDNS_LOG_WARN,
+		    "Error from libevent when adding timer event for %s",
+		    debug_ntoa(ns->address));
+		/* ???? Do more? */
+        }
+
+	/* walk the list of inflight requests to see if any can be reassigned to */
+	/* a different server. Requests in the waiting queue don't have a */
+	/* nameserver assigned yet */
+
+	/* if we don't have *any* good nameservers then there's no point */
+	/* trying to reassign requests to one */
+	if (!global_good_nameservers) return;
+
+	req = req_head;
+	started_at = req_head;
+	if (req) {
+		do {
+			if (req->tx_count == 0 && req->ns == ns) {
+				/* still waiting to go out, can be moved */
+				/* to another server */
+				req->ns = nameserver_pick();
+			}
+			req = req->next;
+		} while (req != started_at);
+	}
+}
+
+static void
+nameserver_up(struct nameserver *const ns) {
+	if (ns->state) return;
+	log(EVDNS_LOG_WARN, "Nameserver %s is back up",
+	    debug_ntoa(ns->address));
+	evtimer_del(&ns->timeout_event);
+	ns->state = 1;
+	ns->failed_times = 0;
+	ns->timedout = 0;
+	global_good_nameservers++;
+}
+
+static void
+request_trans_id_set(struct request *const req, const u16 trans_id) {
+	req->trans_id = trans_id;
+	*((u16 *) req->request) = htons(trans_id);
+}
+
+/* Called to remove a request from a list and dealloc it. */
+/* head is a pointer to the head of the list it should be */
+/* removed from or NULL if the request isn't in a list. */
+static void
+request_finished(struct request *const req, struct request **head) {
+	if (head) {
+		if (req->next == req) {
+			/* only item in the list */
+			*head = NULL;
+		} else {
+			req->next->prev = req->prev;
+			req->prev->next = req->next;
+			if (*head == req) *head = req->next;
+		}
+	}
+
+	log(EVDNS_LOG_DEBUG, "Removing timeout for request %lx",
+	    (unsigned long) req);
+	evtimer_del(&req->timeout_event);
+
+	search_request_finished(req);
+	global_requests_inflight--;
+
+	if (!req->request_appended) {
+		/* need to free the request data on it's own */
+		free(req->request);
+	} else {
+		/* the request data is appended onto the header */
+		/* so everything gets free()ed when we: */
+	}
+
+	free(req);
+
+	evdns_requests_pump_waiting_queue();
+}
+
+/* This is called when a server returns a funny error code. */
+/* We try the request again with another server. */
+/* */
+/* return: */
+/*   0 ok */
+/*   1 failed/reissue is pointless */
+static int
+request_reissue(struct request *req) {
+	const struct nameserver *const last_ns = req->ns;
+	/* the last nameserver should have been marked as failing */
+	/* by the caller of this function, therefore pick will try */
+	/* not to return it */
+	req->ns = nameserver_pick();
+	if (req->ns == last_ns) {
+		/* ... but pick did return it */
+		/* not a lot of point in trying again with the */
+		/* same server */
+		return 1;
+	}
+
+	req->reissue_count++;
+	req->tx_count = 0;
+	req->transmit_me = 1;
+
+	return 0;
+}
+
+/* this function looks for space on the inflight queue and promotes */
+/* requests from the waiting queue if it can. */
+static void
+evdns_requests_pump_waiting_queue(void) {
+	while (global_requests_inflight < global_max_requests_inflight &&
+	    global_requests_waiting) {
+		struct request *req;
+		/* move a request from the waiting queue to the inflight queue */
+		assert(req_waiting_head);
+		if (req_waiting_head->next == req_waiting_head) {
+			/* only one item in the queue */
+			req = req_waiting_head;
+			req_waiting_head = NULL;
+		} else {
+			req = req_waiting_head;
+			req->next->prev = req->prev;
+			req->prev->next = req->next;
+			req_waiting_head = req->next;
+		}
+
+		global_requests_waiting--;
+		global_requests_inflight++;
+
+		req->ns = nameserver_pick();
+		request_trans_id_set(req, transaction_id_pick());
+
+		evdns_request_insert(req, &req_head);
+		evdns_request_transmit(req);
+		evdns_transmit();
+	}
+}
+
+static void
+reply_callback(struct request *const req, u32 ttl, u32 err, struct reply *reply) {
+	switch (req->request_type) {
+	case TYPE_A:
+		if (reply)
+			req->user_callback(DNS_ERR_NONE, DNS_IPv4_A,
+							   reply->data.a.addrcount, ttl,
+						 reply->data.a.addresses,
+							   req->user_pointer);
+		else
+			req->user_callback(err, 0, 0, 0, NULL, req->user_pointer);
+		return;
+	case TYPE_PTR:
+		if (reply) {
+			char *name = reply->data.ptr.name;
+			req->user_callback(DNS_ERR_NONE, DNS_PTR, 1, ttl,
+							   &name, req->user_pointer);
+		} else {
+			req->user_callback(err, 0, 0, 0, NULL,
+							   req->user_pointer);
+		}
+		return;
+	case TYPE_AAAA:
+		if (reply)
+			req->user_callback(DNS_ERR_NONE, DNS_IPv6_AAAA,
+							   reply->data.aaaa.addrcount, ttl,
+							   reply->data.aaaa.addresses,
+							   req->user_pointer);
+		else
+			req->user_callback(err, 0, 0, 0, NULL, req->user_pointer);
+                return;
+	}
+	assert(0);
+}
+
+/* this processes a parsed reply packet */
+static void
+reply_handle(struct request *const req, u16 flags, u32 ttl, struct reply *reply) {
+	int error;
+	static const int error_codes[] = {
+		DNS_ERR_FORMAT, DNS_ERR_SERVERFAILED, DNS_ERR_NOTEXIST,
+		DNS_ERR_NOTIMPL, DNS_ERR_REFUSED
+	};
+
+	if (flags & 0x020f || !reply || !reply->have_answer) {
+		/* there was an error */
+		if (flags & 0x0200) {
+			error = DNS_ERR_TRUNCATED;
+		} else {
+			u16 error_code = (flags & 0x000f) - 1;
+			if (error_code > 4) {
+				error = DNS_ERR_UNKNOWN;
+			} else {
+				error = error_codes[error_code];
+			}
+		}
+
+		switch(error) {
+		case DNS_ERR_NOTIMPL:
+		case DNS_ERR_REFUSED:
+			/* we regard these errors as marking a bad nameserver */
+			if (req->reissue_count < global_max_reissues) {
+				char msg[64];
+				evutil_snprintf(msg, sizeof(msg),
+				    "Bad response %d (%s)",
+					 error, evdns_err_to_string(error));
+				nameserver_failed(req->ns, msg);
+				if (!request_reissue(req)) return;
+			}
+			break;
+		case DNS_ERR_SERVERFAILED:
+			/* rcode 2 (servfailed) sometimes means "we
+			 * are broken" and sometimes (with some binds)
+			 * means "that request was very confusing."
+			 * Treat this as a timeout, not a failure.
+			 */
+			log(EVDNS_LOG_DEBUG, "Got a SERVERFAILED from nameserver %s; "
+				"will allow the request to time out.",
+				debug_ntoa(req->ns->address));
+			break;
+		default:
+			/* we got a good reply from the nameserver */
+			nameserver_up(req->ns);
+		}
+
+		if (req->search_state && req->request_type != TYPE_PTR) {
+			/* if we have a list of domains to search in,
+			 * try the next one */
+			if (!search_try_next(req)) {
+				/* a new request was issued so this
+				 * request is finished and */
+				/* the user callback will be made when
+				 * that request (or a */
+				/* child of it) finishes. */
+				request_finished(req, &req_head);
+				return;
+			}
+		}
+
+		/* all else failed. Pass the failure up */
+		reply_callback(req, 0, error, NULL);
+		request_finished(req, &req_head);
+	} else {
+		/* all ok, tell the user */
+		reply_callback(req, ttl, 0, reply);
+		nameserver_up(req->ns);
+		request_finished(req, &req_head);
+	}
+}
+
+static int
+name_parse(u8 *packet, int length, int *idx, char *name_out, int name_out_len) {
+	int name_end = -1;
+	int j = *idx;
+	int ptr_count = 0;
+#define GET32(x) do { if (j + 4 > length) goto err; memcpy(&_t32, packet + j, 4); j += 4; x = ntohl(_t32); } while(0)
+#define GET16(x) do { if (j + 2 > length) goto err; memcpy(&_t, packet + j, 2); j += 2; x = ntohs(_t); } while(0)
+#define GET8(x) do { if (j >= length) goto err; x = packet[j++]; } while(0)
+
+	char *cp = name_out;
+	const char *const end = name_out + name_out_len;
+
+	/* Normally, names are a series of length prefixed strings terminated */
+	/* with a length of 0 (the lengths are u8's < 63). */
+	/* However, the length can start with a pair of 1 bits and that */
+	/* means that the next 14 bits are a pointer within the current */
+	/* packet. */
+
+	for(;;) {
+		u8 label_len;
+		if (j >= length) return -1;
+		GET8(label_len);
+		if (!label_len) break;
+		if (label_len & 0xc0) {
+			u8 ptr_low;
+			GET8(ptr_low);
+			if (name_end < 0) name_end = j;
+			j = (((int)label_len & 0x3f) << 8) + ptr_low;
+			/* Make sure that the target offset is in-bounds. */
+			if (j < 0 || j >= length) return -1;
+			/* If we've jumped more times than there are characters in the
+			 * message, we must have a loop. */
+			if (++ptr_count > length) return -1;
+			continue;
+		}
+		if (label_len > 63) return -1;
+		if (cp != name_out) {
+			if (cp + 1 >= end) return -1;
+			*cp++ = '.';
+		}
+		if (cp + label_len >= end) return -1;
+		memcpy(cp, packet + j, label_len);
+		cp += label_len;
+		j += label_len;
+	}
+	if (cp >= end) return -1;
+	*cp = '\0';
+	if (name_end < 0)
+		*idx = j;
+	else
+		*idx = name_end;
+	return 0;
+ err:
+	return -1;
+}
+
+/* parses a raw request from a nameserver */
+static int
+reply_parse(u8 *packet, int length) {
+	int j = 0, k = 0;  /* index into packet */
+	u16 _t;  /* used by the macros */
+	u32 _t32;  /* used by the macros */
+	char tmp_name[256], cmp_name[256]; /* used by the macros */
+
+	u16 trans_id, questions, answers, authority, additional, datalength;
+        u16 flags = 0;
+	u32 ttl, ttl_r = 0xffffffff;
+	struct reply reply;
+	struct request *req = NULL;
+	unsigned int i;
+
+	GET16(trans_id);
+	GET16(flags);
+	GET16(questions);
+	GET16(answers);
+	GET16(authority);
+	GET16(additional);
+	(void) authority; /* suppress "unused variable" warnings. */
+	(void) additional; /* suppress "unused variable" warnings. */
+
+	req = request_find_from_trans_id(trans_id);
+	if (!req) return -1;
+
+	memset(&reply, 0, sizeof(reply));
+
+	/* If it's not an answer, it doesn't correspond to any request. */
+	if (!(flags & 0x8000)) return -1;  /* must be an answer */
+	if (flags & 0x020f) {
+		/* there was an error */
+		goto err;
+	}
+	/* if (!answers) return; */  /* must have an answer of some form */
+
+	/* This macro skips a name in the DNS reply. */
+#define SKIP_NAME \
+	do { tmp_name[0] = '\0';				\
+		if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0)\
+			goto err;				\
+	} while(0)
+#define TEST_NAME \
+	do { tmp_name[0] = '\0';				\
+		cmp_name[0] = '\0';				\
+		k = j;						\
+		if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0)\
+			goto err;					\
+		if (name_parse(req->request, req->request_len, &k, cmp_name, sizeof(cmp_name))<0)	\
+			goto err;				\
+		if (memcmp(tmp_name, cmp_name, strlen (tmp_name)) != 0)	\
+			return (-1); /* we ignore mismatching names */	\
+	} while(0)
+
+	reply.type = req->request_type;
+
+	/* skip over each question in the reply */
+	for (i = 0; i < questions; ++i) {
+		/* the question looks like
+		 *   <label:name><u16:type><u16:class>
+		 */
+		TEST_NAME;
+		j += 4;
+		if (j > length) goto err;
+	}
+
+	/* now we have the answer section which looks like
+	 * <label:name><u16:type><u16:class><u32:ttl><u16:len><data...>
+	 */
+
+	for (i = 0; i < answers; ++i) {
+		u16 type, class;
+
+		SKIP_NAME;
+		GET16(type);
+		GET16(class);
+		GET32(ttl);
+		GET16(datalength);
+
+		if (type == TYPE_A && class == CLASS_INET) {
+			int addrcount, addrtocopy;
+			if (req->request_type != TYPE_A) {
+				j += datalength; continue;
+			}
+			if ((datalength & 3) != 0) /* not an even number of As. */
+			    goto err;
+			addrcount = datalength >> 2;
+			addrtocopy = MIN(MAX_ADDRS - reply.data.a.addrcount, (unsigned)addrcount);
+
+			ttl_r = MIN(ttl_r, ttl);
+			/* we only bother with the first four addresses. */
+			if (j + 4*addrtocopy > length) goto err;
+			memcpy(&reply.data.a.addresses[reply.data.a.addrcount],
+				   packet + j, 4*addrtocopy);
+			j += 4*addrtocopy;
+			reply.data.a.addrcount += addrtocopy;
+			reply.have_answer = 1;
+			if (reply.data.a.addrcount == MAX_ADDRS) break;
+		} else if (type == TYPE_PTR && class == CLASS_INET) {
+			if (req->request_type != TYPE_PTR) {
+				j += datalength; continue;
+			}
+			if (name_parse(packet, length, &j, reply.data.ptr.name,
+						   sizeof(reply.data.ptr.name))<0)
+				goto err;
+			ttl_r = MIN(ttl_r, ttl);
+			reply.have_answer = 1;
+			break;
+		} else if (type == TYPE_AAAA && class == CLASS_INET) {
+			int addrcount, addrtocopy;
+			if (req->request_type != TYPE_AAAA) {
+				j += datalength; continue;
+			}
+			if ((datalength & 15) != 0) /* not an even number of AAAAs. */
+				goto err;
+			addrcount = datalength >> 4;  /* each address is 16 bytes long */
+			addrtocopy = MIN(MAX_ADDRS - reply.data.aaaa.addrcount, (unsigned)addrcount);
+			ttl_r = MIN(ttl_r, ttl);
+
+			/* we only bother with the first four addresses. */
+			if (j + 16*addrtocopy > length) goto err;
+			memcpy(&reply.data.aaaa.addresses[reply.data.aaaa.addrcount],
+				   packet + j, 16*addrtocopy);
+			reply.data.aaaa.addrcount += addrtocopy;
+			j += 16*addrtocopy;
+			reply.have_answer = 1;
+			if (reply.data.aaaa.addrcount == MAX_ADDRS) break;
+		} else {
+			/* skip over any other type of resource */
+			j += datalength;
+		}
+	}
+
+	reply_handle(req, flags, ttl_r, &reply);
+	return 0;
+ err:
+	if (req)
+		reply_handle(req, flags, 0, NULL);
+	return -1;
+}
+
+/* Parse a raw request (packet,length) sent to a nameserver port (port) from */
+/* a DNS client (addr,addrlen), and if it's well-formed, call the corresponding */
+/* callback. */
+static int
+request_parse(u8 *packet, int length, struct evdns_server_port *port, struct sockaddr *addr, socklen_t addrlen)
+{
+	int j = 0;	/* index into packet */
+	u16 _t;	 /* used by the macros */
+	char tmp_name[256]; /* used by the macros */
+
+	int i;
+	u16 trans_id, flags, questions, answers, authority, additional;
+	struct server_request *server_req = NULL;
+
+	/* Get the header fields */
+	GET16(trans_id);
+	GET16(flags);
+	GET16(questions);
+	GET16(answers);
+	GET16(authority);
+	GET16(additional);
+
+	if (flags & 0x8000) return -1; /* Must not be an answer. */
+	flags &= 0x0110; /* Only RD and CD get preserved. */
+
+	server_req = malloc(sizeof(struct server_request));
+	if (server_req == NULL) return -1;
+	memset(server_req, 0, sizeof(struct server_request));
+
+	server_req->trans_id = trans_id;
+	memcpy(&server_req->addr, addr, addrlen);
+	server_req->addrlen = addrlen;
+
+	server_req->base.flags = flags;
+	server_req->base.nquestions = 0;
+	server_req->base.questions = malloc(sizeof(struct evdns_server_question *) * questions);
+	if (server_req->base.questions == NULL)
+		goto err;
+
+	for (i = 0; i < questions; ++i) {
+		u16 type, class;
+		struct evdns_server_question *q;
+		int namelen;
+		if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0)
+			goto err;
+		GET16(type);
+		GET16(class);
+		namelen = strlen(tmp_name);
+		q = malloc(sizeof(struct evdns_server_question) + namelen);
+		if (!q)
+			goto err;
+		q->type = type;
+		q->dns_question_class = class;
+		memcpy(q->name, tmp_name, namelen+1);
+		server_req->base.questions[server_req->base.nquestions++] = q;
+	}
+
+	/* Ignore answers, authority, and additional. */
+
+	server_req->port = port;
+	port->refcnt++;
+
+	/* Only standard queries are supported. */
+	if (flags & 0x7800) {
+		evdns_server_request_respond(&(server_req->base), DNS_ERR_NOTIMPL);
+		return -1;
+	}
+
+	port->user_callback(&(server_req->base), port->user_data);
+
+	return 0;
+err:
+	if (server_req) {
+		if (server_req->base.questions) {
+			for (i = 0; i < server_req->base.nquestions; ++i)
+				free(server_req->base.questions[i]);
+			free(server_req->base.questions);
+		}
+		free(server_req);
+	}
+	return -1;
+
+#undef SKIP_NAME
+#undef GET32
+#undef GET16
+#undef GET8
+}
+
+static u16
+default_transaction_id_fn(void)
+{
+	u16 trans_id;
+#ifdef DNS_USE_CPU_CLOCK_FOR_ID
+	struct timespec ts;
+	static int clkid = -1;
+	if (clkid == -1) {
+		clkid = CLOCK_REALTIME;
+#ifdef CLOCK_MONOTONIC
+		if (clock_gettime(CLOCK_MONOTONIC, &ts) != -1)
+			clkid = CLOCK_MONOTONIC;
+#endif
+	}
+	if (clock_gettime(clkid, &ts) == -1)
+		event_err(1, "clock_gettime");
+	trans_id = ts.tv_nsec & 0xffff;
+#endif
+
+#ifdef DNS_USE_FTIME_FOR_ID
+	struct _timeb tb;
+	_ftime(&tb);
+	trans_id = tb.millitm & 0xffff;
+#endif
+
+#ifdef DNS_USE_GETTIMEOFDAY_FOR_ID
+	struct timeval tv;
+	evutil_gettimeofday(&tv, NULL);
+	trans_id = tv.tv_usec & 0xffff;
+#endif
+
+#ifdef DNS_USE_OPENSSL_FOR_ID
+	if (RAND_pseudo_bytes((u8 *) &trans_id, 2) == -1) {
+		/* in the case that the RAND call fails we back */
+		/* down to using gettimeofday. */
+		/*
+		  struct timeval tv;
+		  evutil_gettimeofday(&tv, NULL);
+		  trans_id = tv.tv_usec & 0xffff;
+		*/
+		abort();
+	}
+#endif
+	return trans_id;
+}
+
+static ev_uint16_t (*trans_id_function)(void) = default_transaction_id_fn;
+
+void
+evdns_set_transaction_id_fn(ev_uint16_t (*fn)(void))
+{
+	if (fn)
+		trans_id_function = fn;
+	else
+		trans_id_function = default_transaction_id_fn;
+}
+
+/* Try to choose a strong transaction id which isn't already in flight */
+static u16
+transaction_id_pick(void) {
+	for (;;) {
+		const struct request *req = req_head, *started_at;
+		u16 trans_id = trans_id_function();
+
+		if (trans_id == 0xffff) continue;
+		/* now check to see if that id is already inflight */
+		req = started_at = req_head;
+		if (req) {
+			do {
+				if (req->trans_id == trans_id) break;
+				req = req->next;
+			} while (req != started_at);
+		}
+		/* we didn't find it, so this is a good id */
+		if (req == started_at) return trans_id;
+	}
+}
+
+/* choose a namesever to use. This function will try to ignore */
+/* nameservers which we think are down and load balance across the rest */
+/* by updating the server_head global each time. */
+static struct nameserver *
+nameserver_pick(void) {
+	struct nameserver *started_at = server_head, *picked;
+	if (!server_head) return NULL;
+
+	/* if we don't have any good nameservers then there's no */
+	/* point in trying to find one. */
+	if (!global_good_nameservers) {
+		server_head = server_head->next;
+		return server_head;
+	}
+
+	/* remember that nameservers are in a circular list */
+	for (;;) {
+		if (server_head->state) {
+			/* we think this server is currently good */
+			picked = server_head;
+			server_head = server_head->next;
+			return picked;
+		}
+
+		server_head = server_head->next;
+		if (server_head == started_at) {
+			/* all the nameservers seem to be down */
+			/* so we just return this one and hope for the */
+			/* best */
+			assert(global_good_nameservers == 0);
+			picked = server_head;
+			server_head = server_head->next;
+			return picked;
+		}
+	}
+}
+
+static int
+address_is_correct(struct nameserver *ns, struct sockaddr *sa, socklen_t slen)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in*) sa;
+	if (sa->sa_family != AF_INET || slen != sizeof(struct sockaddr_in))
+		return 0;
+	if (sin->sin_addr.s_addr != ns->address)
+		return 0;
+	return 1;
+}
+
+/* this is called when a namesever socket is ready for reading */
+static void
+nameserver_read(struct nameserver *ns) {
+	u8 packet[1500];
+	struct sockaddr_storage ss;
+	socklen_t addrlen = sizeof(ss);
+
+	for (;;) {
+          	const int r = recvfrom(ns->socket, packet, sizeof(packet), 0,
+		    (struct sockaddr*)&ss, &addrlen);
+		if (r < 0) {
+			int err = last_error(ns->socket);
+			if (error_is_eagain(err)) return;
+			nameserver_failed(ns, strerror(err));
+			return;
+		}
+		if (!address_is_correct(ns, (struct sockaddr*)&ss, addrlen)) {
+			log(EVDNS_LOG_WARN, "Address mismatch on received "
+			    "DNS packet.");
+			return;
+		}
+		ns->timedout = 0;
+		reply_parse(packet, r);
+	}
+}
+
+/* Read a packet from a DNS client on a server port s, parse it, and */
+/* act accordingly. */
+static void
+server_port_read(struct evdns_server_port *s) {
+	u8 packet[1500];
+	struct sockaddr_storage addr;
+	socklen_t addrlen;
+	int r;
+
+	for (;;) {
+		addrlen = sizeof(struct sockaddr_storage);
+		r = recvfrom(s->socket, packet, sizeof(packet), 0,
+					 (struct sockaddr*) &addr, &addrlen);
+		if (r < 0) {
+			int err = last_error(s->socket);
+			if (error_is_eagain(err)) return;
+			log(EVDNS_LOG_WARN, "Error %s (%d) while reading request.",
+				strerror(err), err);
+			return;
+		}
+		request_parse(packet, r, s, (struct sockaddr*) &addr, addrlen);
+	}
+}
+
+/* Try to write all pending replies on a given DNS server port. */
+static void
+server_port_flush(struct evdns_server_port *port)
+{
+	while (port->pending_replies) {
+		struct server_request *req = port->pending_replies;
+		int r = sendto(port->socket, req->response, req->response_len, 0,
+			   (struct sockaddr*) &req->addr, req->addrlen);
+		if (r < 0) {
+			int err = last_error(port->socket);
+			if (error_is_eagain(err))
+				return;
+			log(EVDNS_LOG_WARN, "Error %s (%d) while writing response to port; dropping", strerror(err), err);
+		}
+		if (server_request_free(req)) {
+			/* we released the last reference to req->port. */
+			return;
+		}
+	}
+
+	/* We have no more pending requests; stop listening for 'writeable' events. */
+	(void) event_del(&port->event);
+	event_set(&port->event, port->socket, EV_READ | EV_PERSIST,
+			  server_port_ready_callback, port);
+	if (event_add(&port->event, NULL) < 0) {
+		log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server.");
+		/* ???? Do more? */
+	}
+}
+
+/* set if we are waiting for the ability to write to this server. */
+/* if waiting is true then we ask libevent for EV_WRITE events, otherwise */
+/* we stop these events. */
+static void
+nameserver_write_waiting(struct nameserver *ns, char waiting) {
+	if (ns->write_waiting == waiting) return;
+
+	ns->write_waiting = waiting;
+	(void) event_del(&ns->event);
+	event_set(&ns->event, ns->socket, EV_READ | (waiting ? EV_WRITE : 0) | EV_PERSIST,
+			nameserver_ready_callback, ns);
+	if (event_add(&ns->event, NULL) < 0) {
+          log(EVDNS_LOG_WARN, "Error from libevent when adding event for %s",
+              debug_ntoa(ns->address));
+          /* ???? Do more? */
+        }
+}
+
+/* a callback function. Called by libevent when the kernel says that */
+/* a nameserver socket is ready for writing or reading */
+static void
+nameserver_ready_callback(int fd, short events, void *arg) {
+	struct nameserver *ns = (struct nameserver *) arg;
+        (void)fd;
+
+	if (events & EV_WRITE) {
+		ns->choked = 0;
+		if (!evdns_transmit()) {
+			nameserver_write_waiting(ns, 0);
+		}
+	}
+	if (events & EV_READ) {
+		nameserver_read(ns);
+	}
+}
+
+/* a callback function. Called by libevent when the kernel says that */
+/* a server socket is ready for writing or reading. */
+static void
+server_port_ready_callback(int fd, short events, void *arg) {
+	struct evdns_server_port *port = (struct evdns_server_port *) arg;
+	(void) fd;
+
+	if (events & EV_WRITE) {
+		port->choked = 0;
+		server_port_flush(port);
+	}
+	if (events & EV_READ) {
+		server_port_read(port);
+	}
+}
+
+/* This is an inefficient representation; only use it via the dnslabel_table_*
+ * functions, so that is can be safely replaced with something smarter later. */
+#define MAX_LABELS 128
+/* Structures used to implement name compression */
+struct dnslabel_entry { char *v; off_t pos; };
+struct dnslabel_table {
+	int n_labels; /* number of current entries */
+	/* map from name to position in message */
+	struct dnslabel_entry labels[MAX_LABELS];
+};
+
+/* Initialize dnslabel_table. */
+static void
+dnslabel_table_init(struct dnslabel_table *table)
+{
+	table->n_labels = 0;
+}
+
+/* Free all storage held by table, but not the table itself. */
+static void
+dnslabel_clear(struct dnslabel_table *table)
+{
+	int i;
+	for (i = 0; i < table->n_labels; ++i)
+		free(table->labels[i].v);
+	table->n_labels = 0;
+}
+
+/* return the position of the label in the current message, or -1 if the label */
+/* hasn't been used yet. */
+static int
+dnslabel_table_get_pos(const struct dnslabel_table *table, const char *label)
+{
+	int i;
+	for (i = 0; i < table->n_labels; ++i) {
+		if (!strcmp(label, table->labels[i].v))
+			return table->labels[i].pos;
+	}
+	return -1;
+}
+
+/* remember that we've used the label at position pos */
+static int
+dnslabel_table_add(struct dnslabel_table *table, const char *label, off_t pos)
+{
+	char *v;
+	int p;
+	if (table->n_labels == MAX_LABELS)
+		return (-1);
+	v = strdup(label);
+	if (v == NULL)
+		return (-1);
+	p = table->n_labels++;
+	table->labels[p].v = v;
+	table->labels[p].pos = pos;
+
+	return (0);
+}
+
+/* Converts a string to a length-prefixed set of DNS labels, starting */
+/* at buf[j]. name and buf must not overlap. name_len should be the length */
+/* of name.	 table is optional, and is used for compression. */
+/* */
+/* Input: abc.def */
+/* Output: <3>abc<3>def<0> */
+/* */
+/* Returns the first index after the encoded name, or negative on error. */
+/*	 -1	 label was > 63 bytes */
+/*	 -2	 name too long to fit in buffer. */
+/* */
+static off_t
+dnsname_to_labels(u8 *const buf, size_t buf_len, off_t j,
+				  const char *name, const int name_len,
+				  struct dnslabel_table *table) {
+	const char *end = name + name_len;
+	int ref = 0;
+	u16 _t;
+
+#define APPEND16(x) do {						   \
+		if (j + 2 > (off_t)buf_len)				   \
+			goto overflow;						   \
+		_t = htons(x);							   \
+		memcpy(buf + j, &_t, 2);				   \
+		j += 2;									   \
+	} while (0)
+#define APPEND32(x) do {						   \
+		if (j + 4 > (off_t)buf_len)				   \
+			goto overflow;						   \
+		_t32 = htonl(x);						   \
+		memcpy(buf + j, &_t32, 4);				   \
+		j += 4;									   \
+	} while (0)
+
+	if (name_len > 255) return -2;
+
+	for (;;) {
+		const char *const start = name;
+		if (table && (ref = dnslabel_table_get_pos(table, name)) >= 0) {
+			APPEND16(ref | 0xc000);
+			return j;
+		}
+		name = strchr(name, '.');
+		if (!name) {
+			const unsigned int label_len = end - start;
+			if (label_len > 63) return -1;
+			if ((size_t)(j+label_len+1) > buf_len) return -2;
+			if (table) dnslabel_table_add(table, start, j);
+			buf[j++] = label_len;
+
+			memcpy(buf + j, start, end - start);
+			j += end - start;
+			break;
+		} else {
+			/* append length of the label. */
+			const unsigned int label_len = name - start;
+			if (label_len > 63) return -1;
+			if ((size_t)(j+label_len+1) > buf_len) return -2;
+			if (table) dnslabel_table_add(table, start, j);
+			buf[j++] = label_len;
+
+			memcpy(buf + j, start, name - start);
+			j += name - start;
+			/* hop over the '.' */
+			name++;
+		}
+	}
+
+	/* the labels must be terminated by a 0. */
+	/* It's possible that the name ended in a . */
+	/* in which case the zero is already there */
+	if (!j || buf[j-1]) buf[j++] = 0;
+	return j;
+ overflow:
+	return (-2);
+}
+
+/* Finds the length of a dns request for a DNS name of the given */
+/* length. The actual request may be smaller than the value returned */
+/* here */
+static int
+evdns_request_len(const int name_len) {
+	return 96 + /* length of the DNS standard header */
+		name_len + 2 +
+		4;  /* space for the resource type */
+}
+
+/* build a dns request packet into buf. buf should be at least as long */
+/* as evdns_request_len told you it should be. */
+/* */
+/* Returns the amount of space used. Negative on error. */
+static int
+evdns_request_data_build(const char *const name, const int name_len,
+    const u16 trans_id, const u16 type, const u16 class,
+    u8 *const buf, size_t buf_len) {
+	off_t j = 0;  /* current offset into buf */
+	u16 _t;  /* used by the macros */
+
+	APPEND16(trans_id);
+	APPEND16(0x0100);  /* standard query, recusion needed */
+	APPEND16(1);  /* one question */
+	APPEND16(0);  /* no answers */
+	APPEND16(0);  /* no authority */
+	APPEND16(0);  /* no additional */
+
+	j = dnsname_to_labels(buf, buf_len, j, name, name_len, NULL);
+	if (j < 0) {
+		return (int)j;
+	}
+	
+	APPEND16(type);
+	APPEND16(class);
+
+	return (int)j;
+ overflow:
+	return (-1);
+}
+
+/* exported function */
+struct evdns_server_port *
+evdns_add_server_port(int socket, int is_tcp, evdns_request_callback_fn_type cb, void *user_data)
+{
+	struct evdns_server_port *port;
+	if (!(port = malloc(sizeof(struct evdns_server_port))))
+		return NULL;
+	memset(port, 0, sizeof(struct evdns_server_port));
+
+	assert(!is_tcp); /* TCP sockets not yet implemented */
+	port->socket = socket;
+	port->refcnt = 1;
+	port->choked = 0;
+	port->closing = 0;
+	port->user_callback = cb;
+	port->user_data = user_data;
+	port->pending_replies = NULL;
+
+	event_set(&port->event, port->socket, EV_READ | EV_PERSIST,
+			  server_port_ready_callback, port);
+	event_add(&port->event, NULL); /* check return. */
+	return port;
+}
+
+/* exported function */
+void
+evdns_close_server_port(struct evdns_server_port *port)
+{
+	if (--port->refcnt == 0)
+		server_port_free(port);
+	port->closing = 1;
+}
+
+/* exported function */
+int
+evdns_server_request_add_reply(struct evdns_server_request *_req, int section, const char *name, int type, int class, int ttl, int datalen, int is_name, const char *data)
+{
+	struct server_request *req = TO_SERVER_REQUEST(_req);
+	struct server_reply_item **itemp, *item;
+	int *countp;
+
+	if (req->response) /* have we already answered? */
+		return (-1);
+
+	switch (section) {
+	case EVDNS_ANSWER_SECTION:
+		itemp = &req->answer;
+		countp = &req->n_answer;
+		break;
+	case EVDNS_AUTHORITY_SECTION:
+		itemp = &req->authority;
+		countp = &req->n_authority;
+		break;
+	case EVDNS_ADDITIONAL_SECTION:
+		itemp = &req->additional;
+		countp = &req->n_additional;
+		break;
+	default:
+		return (-1);
+	}
+	while (*itemp) {
+		itemp = &((*itemp)->next);
+	}
+	item = malloc(sizeof(struct server_reply_item));
+	if (!item)
+		return -1;
+	item->next = NULL;
+	if (!(item->name = strdup(name))) {
+		free(item);
+		return -1;
+	}
+	item->type = type;
+	item->dns_question_class = class;
+	item->ttl = ttl;
+	item->is_name = is_name != 0;
+	item->datalen = 0;
+	item->data = NULL;
+	if (data) {
+		if (item->is_name) {
+			if (!(item->data = strdup(data))) {
+				free(item->name);
+				free(item);
+				return -1;
+			}
+			item->datalen = (u16)-1;
+		} else {
+			if (!(item->data = malloc(datalen))) {
+				free(item->name);
+				free(item);
+				return -1;
+			}
+			item->datalen = datalen;
+			memcpy(item->data, data, datalen);
+		}
+	}
+
+	*itemp = item;
+	++(*countp);
+	return 0;
+}
+
+/* exported function */
+int
+evdns_server_request_add_a_reply(struct evdns_server_request *req, const char *name, int n, void *addrs, int ttl)
+{
+	return evdns_server_request_add_reply(
+		  req, EVDNS_ANSWER_SECTION, name, TYPE_A, CLASS_INET,
+		  ttl, n*4, 0, addrs);
+}
+
+/* exported function */
+int
+evdns_server_request_add_aaaa_reply(struct evdns_server_request *req, const char *name, int n, void *addrs, int ttl)
+{
+	return evdns_server_request_add_reply(
+		  req, EVDNS_ANSWER_SECTION, name, TYPE_AAAA, CLASS_INET,
+		  ttl, n*16, 0, addrs);
+}
+
+/* exported function */
+int
+evdns_server_request_add_ptr_reply(struct evdns_server_request *req, struct in_addr *in, const char *inaddr_name, const char *hostname, int ttl)
+{
+	u32 a;
+	char buf[32];
+	assert(in || inaddr_name);
+	assert(!(in && inaddr_name));
+	if (in) {
+		a = ntohl(in->s_addr);
+		evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
+				(int)(u8)((a	)&0xff),
+				(int)(u8)((a>>8 )&0xff),
+				(int)(u8)((a>>16)&0xff),
+				(int)(u8)((a>>24)&0xff));
+		inaddr_name = buf;
+	}
+	return evdns_server_request_add_reply(
+		  req, EVDNS_ANSWER_SECTION, inaddr_name, TYPE_PTR, CLASS_INET,
+		  ttl, -1, 1, hostname);
+}
+
+/* exported function */
+int
+evdns_server_request_add_cname_reply(struct evdns_server_request *req, const char *name, const char *cname, int ttl)
+{
+	return evdns_server_request_add_reply(
+		  req, EVDNS_ANSWER_SECTION, name, TYPE_CNAME, CLASS_INET,
+		  ttl, -1, 1, cname);
+}
+
+
+static int
+evdns_server_request_format_response(struct server_request *req, int err)
+{
+	unsigned char buf[1500];
+	size_t buf_len = sizeof(buf);
+	off_t j = 0, r;
+	u16 _t;
+	u32 _t32;
+	int i;
+	u16 flags;
+	struct dnslabel_table table;
+
+	if (err < 0 || err > 15) return -1;
+
+	/* Set response bit and error code; copy OPCODE and RD fields from
+	 * question; copy RA and AA if set by caller. */
+	flags = req->base.flags;
+	flags |= (0x8000 | err);
+
+	dnslabel_table_init(&table);
+	APPEND16(req->trans_id);
+	APPEND16(flags);
+	APPEND16(req->base.nquestions);
+	APPEND16(req->n_answer);
+	APPEND16(req->n_authority);
+	APPEND16(req->n_additional);
+
+	/* Add questions. */
+	for (i=0; i < req->base.nquestions; ++i) {
+		const char *s = req->base.questions[i]->name;
+		j = dnsname_to_labels(buf, buf_len, j, s, strlen(s), &table);
+		if (j < 0) {
+			dnslabel_clear(&table);
+			return (int) j;
+		}
+		APPEND16(req->base.questions[i]->type);
+		APPEND16(req->base.questions[i]->dns_question_class);
+	}
+
+	/* Add answer, authority, and additional sections. */
+	for (i=0; i<3; ++i) {
+		struct server_reply_item *item;
+		if (i==0)
+			item = req->answer;
+		else if (i==1)
+			item = req->authority;
+		else
+			item = req->additional;
+		while (item) {
+			r = dnsname_to_labels(buf, buf_len, j, item->name, strlen(item->name), &table);
+			if (r < 0)
+				goto overflow;
+			j = r;
+
+			APPEND16(item->type);
+			APPEND16(item->dns_question_class);
+			APPEND32(item->ttl);
+			if (item->is_name) {
+				off_t len_idx = j, name_start;
+				j += 2;
+				name_start = j;
+				r = dnsname_to_labels(buf, buf_len, j, item->data, strlen(item->data), &table);
+				if (r < 0)
+					goto overflow;
+				j = r;
+				_t = htons( (short) (j-name_start) );
+				memcpy(buf+len_idx, &_t, 2);
+			} else {
+				APPEND16(item->datalen);
+				if (j+item->datalen > (off_t)buf_len)
+					goto overflow;
+				memcpy(buf+j, item->data, item->datalen);
+				j += item->datalen;
+			}
+			item = item->next;
+		}
+	}
+
+	if (j > 512) {
+overflow:
+		j = 512;
+		buf[2] |= 0x02; /* set the truncated bit. */
+	}
+
+	req->response_len = j;
+
+	if (!(req->response = malloc(req->response_len))) {
+		server_request_free_answers(req);
+		dnslabel_clear(&table);
+		return (-1);
+	}
+	memcpy(req->response, buf, req->response_len);
+	server_request_free_answers(req);
+	dnslabel_clear(&table);
+	return (0);
+}
+
+/* exported function */
+int
+evdns_server_request_respond(struct evdns_server_request *_req, int err)
+{
+	struct server_request *req = TO_SERVER_REQUEST(_req);
+	struct evdns_server_port *port = req->port;
+	int r;
+	if (!req->response) {
+		if ((r = evdns_server_request_format_response(req, err))<0)
+			return r;
+	}
+
+	r = sendto(port->socket, req->response, req->response_len, 0,
+			   (struct sockaddr*) &req->addr, req->addrlen);
+	if (r<0) {
+		int sock_err = last_error(port->socket);
+		if (! error_is_eagain(sock_err))
+			return -1;
+
+		if (port->pending_replies) {
+			req->prev_pending = port->pending_replies->prev_pending;
+			req->next_pending = port->pending_replies;
+			req->prev_pending->next_pending =
+				req->next_pending->prev_pending = req;
+		} else {
+			req->prev_pending = req->next_pending = req;
+			port->pending_replies = req;
+			port->choked = 1;
+
+			(void) event_del(&port->event);
+			event_set(&port->event, port->socket, (port->closing?0:EV_READ) | EV_WRITE | EV_PERSIST, server_port_ready_callback, port);
+
+			if (event_add(&port->event, NULL) < 0) {
+				log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server");
+			}
+
+		}
+
+		return 1;
+	}
+	if (server_request_free(req))
+		return 0;
+
+	if (port->pending_replies)
+		server_port_flush(port);
+
+	return 0;
+}
+
+/* Free all storage held by RRs in req. */
+static void
+server_request_free_answers(struct server_request *req)
+{
+	struct server_reply_item *victim, *next, **list;
+	int i;
+	for (i = 0; i < 3; ++i) {
+		if (i==0)
+			list = &req->answer;
+		else if (i==1)
+			list = &req->authority;
+		else
+			list = &req->additional;
+
+		victim = *list;
+		while (victim) {
+			next = victim->next;
+			free(victim->name);
+			if (victim->data)
+				free(victim->data);
+			free(victim);
+			victim = next;
+		}
+		*list = NULL;
+	}
+}
+
+/* Free all storage held by req, and remove links to it. */
+/* return true iff we just wound up freeing the server_port. */
+static int
+server_request_free(struct server_request *req)
+{
+	int i, rc=1;
+	if (req->base.questions) {
+		for (i = 0; i < req->base.nquestions; ++i)
+			free(req->base.questions[i]);
+		free(req->base.questions);
+	}
+
+	if (req->port) {
+		if (req->port->pending_replies == req) {
+			if (req->next_pending)
+				req->port->pending_replies = req->next_pending;
+			else
+				req->port->pending_replies = NULL;
+		}
+		rc = --req->port->refcnt;
+	}
+
+	if (req->response) {
+		free(req->response);
+	}
+
+	server_request_free_answers(req);
+
+	if (req->next_pending && req->next_pending != req) {
+		req->next_pending->prev_pending = req->prev_pending;
+		req->prev_pending->next_pending = req->next_pending;
+	}
+
+	if (rc == 0) {
+		server_port_free(req->port);
+		free(req);
+		return (1);
+	}
+	free(req);
+	return (0);
+}
+
+/* Free all storage held by an evdns_server_port.  Only called when  */
+static void
+server_port_free(struct evdns_server_port *port)
+{
+	assert(port);
+	assert(!port->refcnt);
+	assert(!port->pending_replies);
+	if (port->socket > 0) {
+		CLOSE_SOCKET(port->socket);
+		port->socket = -1;
+	}
+	(void) event_del(&port->event);
+	/* XXXX actually free the port? -NM */
+}
+
+/* exported function */
+int
+evdns_server_request_drop(struct evdns_server_request *_req)
+{
+	struct server_request *req = TO_SERVER_REQUEST(_req);
+	server_request_free(req);
+	return 0;
+}
+
+/* exported function */
+int
+evdns_server_request_get_requesting_addr(struct evdns_server_request *_req, struct sockaddr *sa, int addr_len)
+{
+	struct server_request *req = TO_SERVER_REQUEST(_req);
+	if (addr_len < (int)req->addrlen)
+		return -1;
+	memcpy(sa, &(req->addr), req->addrlen);
+	return req->addrlen;
+}
+
+#undef APPEND16
+#undef APPEND32
+
+/* this is a libevent callback function which is called when a request */
+/* has timed out. */
+static void
+evdns_request_timeout_callback(int fd, short events, void *arg) {
+	struct request *const req = (struct request *) arg;
+        (void) fd;
+        (void) events;
+
+	log(EVDNS_LOG_DEBUG, "Request %lx timed out", (unsigned long) arg);
+
+	req->ns->timedout++;
+	if (req->ns->timedout > global_max_nameserver_timeout) {
+		req->ns->timedout = 0;
+		nameserver_failed(req->ns, "request timed out.");
+	}
+
+	(void) evtimer_del(&req->timeout_event);
+	if (req->tx_count >= global_max_retransmits) {
+		/* this request has failed */
+		reply_callback(req, 0, DNS_ERR_TIMEOUT, NULL);
+		request_finished(req, &req_head);
+	} else {
+		/* retransmit it */
+		evdns_request_transmit(req);
+	}
+}
+
+/* try to send a request to a given server. */
+/* */
+/* return: */
+/*   0 ok */
+/*   1 temporary failure */
+/*   2 other failure */
+static int
+evdns_request_transmit_to(struct request *req, struct nameserver *server) {
+	struct sockaddr_in sin;
+	int r;
+	memset(&sin, 0, sizeof(sin));
+	sin.sin_addr.s_addr = req->ns->address;
+	sin.sin_port = req->ns->port;
+	sin.sin_family = AF_INET;
+
+	r = sendto(server->socket, req->request, req->request_len, 0,
+	    (struct sockaddr*)&sin, sizeof(sin));
+	if (r < 0) {
+		int err = last_error(server->socket);
+		if (error_is_eagain(err)) return 1;
+		nameserver_failed(req->ns, strerror(err));
+		return 2;
+	} else if (r != (int)req->request_len) {
+		return 1;  /* short write */
+	} else {
+		return 0;
+	}
+}
+
+/* try to send a request, updating the fields of the request */
+/* as needed */
+/* */
+/* return: */
+/*   0 ok */
+/*   1 failed */
+static int
+evdns_request_transmit(struct request *req) {
+	int retcode = 0, r;
+
+	/* if we fail to send this packet then this flag marks it */
+	/* for evdns_transmit */
+	req->transmit_me = 1;
+	if (req->trans_id == 0xffff) abort();
+
+	if (req->ns->choked) {
+		/* don't bother trying to write to a socket */
+		/* which we have had EAGAIN from */
+		return 1;
+	}
+
+	r = evdns_request_transmit_to(req, req->ns);
+	switch (r) {
+	case 1:
+		/* temp failure */
+		req->ns->choked = 1;
+		nameserver_write_waiting(req->ns, 1);
+		return 1;
+	case 2:
+		/* failed in some other way */
+		retcode = 1;
+		/* fall through */
+	default:
+		/* all ok */
+		log(EVDNS_LOG_DEBUG,
+		    "Setting timeout for request %lx", (unsigned long) req);
+		if (evtimer_add(&req->timeout_event, &global_timeout) < 0) {
+                  log(EVDNS_LOG_WARN,
+		      "Error from libevent when adding timer for request %lx",
+                      (unsigned long) req);
+                  /* ???? Do more? */
+                }
+		req->tx_count++;
+		req->transmit_me = 0;
+		return retcode;
+	}
+}
+
+static void
+nameserver_probe_callback(int result, char type, int count, int ttl, void *addresses, void *arg) {
+	struct nameserver *const ns = (struct nameserver *) arg;
+        (void) type;
+        (void) count;
+        (void) ttl;
+        (void) addresses;
+
+	if (result == DNS_ERR_NONE || result == DNS_ERR_NOTEXIST) {
+		/* this is a good reply */
+		nameserver_up(ns);
+	} else nameserver_probe_failed(ns);
+}
+
+static void
+nameserver_send_probe(struct nameserver *const ns) {
+	struct request *req;
+	/* here we need to send a probe to a given nameserver */
+	/* in the hope that it is up now. */
+
+  	log(EVDNS_LOG_DEBUG, "Sending probe to %s", debug_ntoa(ns->address));
+
+	req = request_new(TYPE_A, "www.google.com", DNS_QUERY_NO_SEARCH, nameserver_probe_callback, ns);
+        if (!req) return;
+	/* we force this into the inflight queue no matter what */
+	request_trans_id_set(req, transaction_id_pick());
+	req->ns = ns;
+	request_submit(req);
+}
+
+/* returns: */
+/*   0 didn't try to transmit anything */
+/*   1 tried to transmit something */
+static int
+evdns_transmit(void) {
+	char did_try_to_transmit = 0;
+
+	if (req_head) {
+		struct request *const started_at = req_head, *req = req_head;
+		/* first transmit all the requests which are currently waiting */
+		do {
+			if (req->transmit_me) {
+				did_try_to_transmit = 1;
+				evdns_request_transmit(req);
+			}
+
+			req = req->next;
+		} while (req != started_at);
+	}
+
+	return did_try_to_transmit;
+}
+
+/* exported function */
+int
+evdns_count_nameservers(void)
+{
+	const struct nameserver *server = server_head;
+	int n = 0;
+	if (!server)
+		return 0;
+	do {
+		++n;
+		server = server->next;
+	} while (server != server_head);
+	return n;
+}
+
+/* exported function */
+int
+evdns_clear_nameservers_and_suspend(void)
+{
+	struct nameserver *server = server_head, *started_at = server_head;
+	struct request *req = req_head, *req_started_at = req_head;
+
+	if (!server)
+		return 0;
+	while (1) {
+		struct nameserver *next = server->next;
+		(void) event_del(&server->event);
+		if (evtimer_initialized(&server->timeout_event))
+			(void) evtimer_del(&server->timeout_event);
+		if (server->socket >= 0)
+			CLOSE_SOCKET(server->socket);
+		free(server);
+		if (next == started_at)
+			break;
+		server = next;
+	}
+	server_head = NULL;
+	global_good_nameservers = 0;
+
+	while (req) {
+		struct request *next = req->next;
+		req->tx_count = req->reissue_count = 0;
+		req->ns = NULL;
+		/* ???? What to do about searches? */
+		(void) evtimer_del(&req->timeout_event);
+		req->trans_id = 0;
+		req->transmit_me = 0;
+
+		global_requests_waiting++;
+		evdns_request_insert(req, &req_waiting_head);
+		/* We want to insert these suspended elements at the front of
+		 * the waiting queue, since they were pending before any of
+		 * the waiting entries were added.  This is a circular list,
+		 * so we can just shift the start back by one.*/
+		req_waiting_head = req_waiting_head->prev;
+
+		if (next == req_started_at)
+			break;
+		req = next;
+	}
+	req_head = NULL;
+	global_requests_inflight = 0;
+
+	return 0;
+}
+
+
+/* exported function */
+int
+evdns_resume(void)
+{
+	evdns_requests_pump_waiting_queue();
+	return 0;
+}
+
+static int
+_evdns_nameserver_add_impl(unsigned long int address, int port) {
+	/* first check to see if we already have this nameserver */
+
+	const struct nameserver *server = server_head, *const started_at = server_head;
+	struct nameserver *ns;
+	int err = 0;
+	if (server) {
+		do {
+			if (server->address == address) return 3;
+			server = server->next;
+		} while (server != started_at);
+	}
+
+	ns = (struct nameserver *) malloc(sizeof(struct nameserver));
+        if (!ns) return -1;
+
+	memset(ns, 0, sizeof(struct nameserver));
+
+	evtimer_set(&ns->timeout_event, nameserver_prod_callback, ns);
+
+	ns->socket = socket(PF_INET, SOCK_DGRAM, 0);
+	if (ns->socket < 0) { err = 1; goto out1; }
+        evutil_make_socket_nonblocking(ns->socket);
+
+	ns->address = address;
+	ns->port = htons(port);
+	ns->state = 1;
+	event_set(&ns->event, ns->socket, EV_READ | EV_PERSIST, nameserver_ready_callback, ns);
+	if (event_add(&ns->event, NULL) < 0) {
+          err = 2;
+          goto out2;
+        }
+
+	log(EVDNS_LOG_DEBUG, "Added nameserver %s", debug_ntoa(address));
+
+	/* insert this nameserver into the list of them */
+	if (!server_head) {
+		ns->next = ns->prev = ns;
+		server_head = ns;
+	} else {
+		ns->next = server_head->next;
+		ns->prev = server_head;
+		server_head->next = ns;
+		if (server_head->prev == server_head) {
+			server_head->prev = ns;
+		}
+	}
+
+	global_good_nameservers++;
+
+	return 0;
+
+out2:
+	CLOSE_SOCKET(ns->socket);
+out1:
+	free(ns);
+	log(EVDNS_LOG_WARN, "Unable to add nameserver %s: error %d", debug_ntoa(address), err);
+	return err;
+}
+
+/* exported function */
+int
+evdns_nameserver_add(unsigned long int address) {
+	return _evdns_nameserver_add_impl(address, 53);
+}
+
+/* exported function */
+int
+evdns_nameserver_ip_add(const char *ip_as_string) {
+	struct in_addr ina;
+	int port;
+	char buf[20];
+	const char *cp;
+	cp = strchr(ip_as_string, ':');
+	if (! cp) {
+		cp = ip_as_string;
+		port = 53;
+	} else {
+		port = strtoint(cp+1);
+		if (port < 0 || port > 65535) {
+			return 4;
+		}
+		if ((cp-ip_as_string) >= (int)sizeof(buf)) {
+			return 4;
+		}
+		memcpy(buf, ip_as_string, cp-ip_as_string);
+		buf[cp-ip_as_string] = '\0';
+		cp = buf;
+	}
+	if (!inet_aton(cp, &ina)) {
+		return 4;
+	}
+	return _evdns_nameserver_add_impl(ina.s_addr, port);
+}
+
+/* insert into the tail of the queue */
+static void
+evdns_request_insert(struct request *req, struct request **head) {
+	if (!*head) {
+		*head = req;
+		req->next = req->prev = req;
+		return;
+	}
+
+	req->prev = (*head)->prev;
+	req->prev->next = req;
+	req->next = *head;
+	(*head)->prev = req;
+}
+
+static int
+string_num_dots(const char *s) {
+	int count = 0;
+	while ((s = strchr(s, '.'))) {
+		s++;
+		count++;
+	}
+	return count;
+}
+
+static struct request *
+request_new(int type, const char *name, int flags,
+    evdns_callback_type callback, void *user_ptr) {
+	const char issuing_now =
+	    (global_requests_inflight < global_max_requests_inflight) ? 1 : 0;
+
+	const int name_len = strlen(name);
+	const int request_max_len = evdns_request_len(name_len);
+	const u16 trans_id = issuing_now ? transaction_id_pick() : 0xffff;
+	/* the request data is alloced in a single block with the header */
+	struct request *const req =
+	    (struct request *) malloc(sizeof(struct request) + request_max_len);
+	int rlen;
+        (void) flags;
+
+        if (!req) return NULL;
+	memset(req, 0, sizeof(struct request));
+
+	evtimer_set(&req->timeout_event, evdns_request_timeout_callback, req);
+
+	/* request data lives just after the header */
+	req->request = ((u8 *) req) + sizeof(struct request);
+	/* denotes that the request data shouldn't be free()ed */
+	req->request_appended = 1;
+	rlen = evdns_request_data_build(name, name_len, trans_id,
+	    type, CLASS_INET, req->request, request_max_len);
+	if (rlen < 0)
+		goto err1;
+	req->request_len = rlen;
+	req->trans_id = trans_id;
+	req->tx_count = 0;
+	req->request_type = type;
+	req->user_pointer = user_ptr;
+	req->user_callback = callback;
+	req->ns = issuing_now ? nameserver_pick() : NULL;
+	req->next = req->prev = NULL;
+
+	return req;
+err1:
+	free(req);
+	return NULL;
+}
+
+static void
+request_submit(struct request *const req) {
+	if (req->ns) {
+		/* if it has a nameserver assigned then this is going */
+		/* straight into the inflight queue */
+		evdns_request_insert(req, &req_head);
+		global_requests_inflight++;
+		evdns_request_transmit(req);
+	} else {
+		evdns_request_insert(req, &req_waiting_head);
+		global_requests_waiting++;
+	}
+}
+
+/* exported function */
+int evdns_resolve_ipv4(const char *name, int flags,
+    evdns_callback_type callback, void *ptr) {
+	log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
+	if (flags & DNS_QUERY_NO_SEARCH) {
+		struct request *const req =
+			request_new(TYPE_A, name, flags, callback, ptr);
+		if (req == NULL)
+			return (1);
+		request_submit(req);
+		return (0);
+	} else {
+		return (search_request_new(TYPE_A, name, flags, callback, ptr));
+	}
+}
+
+/* exported function */
+int evdns_resolve_ipv6(const char *name, int flags,
+					   evdns_callback_type callback, void *ptr) {
+	log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
+	if (flags & DNS_QUERY_NO_SEARCH) {
+		struct request *const req =
+			request_new(TYPE_AAAA, name, flags, callback, ptr);
+		if (req == NULL)
+			return (1);
+		request_submit(req);
+		return (0);
+	} else {
+		return (search_request_new(TYPE_AAAA, name, flags, callback, ptr));
+	}
+}
+
+int evdns_resolve_reverse(const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
+	char buf[32];
+	struct request *req;
+	u32 a;
+	assert(in);
+	a = ntohl(in->s_addr);
+	evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
+			(int)(u8)((a	)&0xff),
+			(int)(u8)((a>>8 )&0xff),
+			(int)(u8)((a>>16)&0xff),
+			(int)(u8)((a>>24)&0xff));
+	log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
+	req = request_new(TYPE_PTR, buf, flags, callback, ptr);
+	if (!req) return 1;
+	request_submit(req);
+	return 0;
+}
+
+int evdns_resolve_reverse_ipv6(const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
+	/* 32 nybbles, 32 periods, "ip6.arpa", NUL. */
+	char buf[73];
+	char *cp;
+	struct request *req;
+	int i;
+	assert(in);
+	cp = buf;
+	for (i=15; i >= 0; --i) {
+		u8 byte = in->s6_addr[i];
+		*cp++ = "0123456789abcdef"[byte & 0x0f];
+		*cp++ = '.';
+		*cp++ = "0123456789abcdef"[byte >> 4];
+		*cp++ = '.';
+	}
+	assert(cp + strlen("ip6.arpa") < buf+sizeof(buf));
+	memcpy(cp, "ip6.arpa", strlen("ip6.arpa")+1);
+	log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
+	req = request_new(TYPE_PTR, buf, flags, callback, ptr);
+	if (!req) return 1;
+	request_submit(req);
+	return 0;
+}
+
+/*/////////////////////////////////////////////////////////////////// */
+/* Search support */
+/* */
+/* the libc resolver has support for searching a number of domains */
+/* to find a name. If nothing else then it takes the single domain */
+/* from the gethostname() call. */
+/* */
+/* It can also be configured via the domain and search options in a */
+/* resolv.conf. */
+/* */
+/* The ndots option controls how many dots it takes for the resolver */
+/* to decide that a name is non-local and so try a raw lookup first. */
+
+struct search_domain {
+	int len;
+	struct search_domain *next;
+	/* the text string is appended to this structure */
+};
+
+struct search_state {
+	int refcount;
+	int ndots;
+	int num_domains;
+	struct search_domain *head;
+};
+
+static struct search_state *global_search_state = NULL;
+
+static void
+search_state_decref(struct search_state *const state) {
+	if (!state) return;
+	state->refcount--;
+	if (!state->refcount) {
+		struct search_domain *next, *dom;
+		for (dom = state->head; dom; dom = next) {
+			next = dom->next;
+			free(dom);
+		}
+		free(state);
+	}
+}
+
+static struct search_state *
+search_state_new(void) {
+	struct search_state *state = (struct search_state *) malloc(sizeof(struct search_state));
+        if (!state) return NULL;
+	memset(state, 0, sizeof(struct search_state));
+	state->refcount = 1;
+	state->ndots = 1;
+
+	return state;
+}
+
+static void
+search_postfix_clear(void) {
+	search_state_decref(global_search_state);
+
+	global_search_state = search_state_new();
+}
+
+/* exported function */
+void
+evdns_search_clear(void) {
+	search_postfix_clear();
+}
+
+static void
+search_postfix_add(const char *domain) {
+	int domain_len;
+	struct search_domain *sdomain;
+	while (domain[0] == '.') domain++;
+	domain_len = strlen(domain);
+
+	if (!global_search_state) global_search_state = search_state_new();
+        if (!global_search_state) return;
+	global_search_state->num_domains++;
+
+	sdomain = (struct search_domain *) malloc(sizeof(struct search_domain) + domain_len);
+        if (!sdomain) return;
+	memcpy( ((u8 *) sdomain) + sizeof(struct search_domain), domain, domain_len);
+	sdomain->next = global_search_state->head;
+	sdomain->len = domain_len;
+
+	global_search_state->head = sdomain;
+}
+
+/* reverse the order of members in the postfix list. This is needed because, */
+/* when parsing resolv.conf we push elements in the wrong order */
+static void
+search_reverse(void) {
+	struct search_domain *cur, *prev = NULL, *next;
+	cur = global_search_state->head;
+	while (cur) {
+		next = cur->next;
+		cur->next = prev;
+		prev = cur;
+		cur = next;
+	}
+
+	global_search_state->head = prev;
+}
+
+/* exported function */
+void
+evdns_search_add(const char *domain) {
+	search_postfix_add(domain);
+}
+
+/* exported function */
+void
+evdns_search_ndots_set(const int ndots) {
+	if (!global_search_state) global_search_state = search_state_new();
+        if (!global_search_state) return;
+	global_search_state->ndots = ndots;
+}
+
+static void
+search_set_from_hostname(void) {
+	char hostname[HOST_NAME_MAX + 1], *domainname;
+
+	search_postfix_clear();
+	if (gethostname(hostname, sizeof(hostname))) return;
+	domainname = strchr(hostname, '.');
+	if (!domainname) return;
+	search_postfix_add(domainname);
+}
+
+/* warning: returns malloced string */
+static char *
+search_make_new(const struct search_state *const state, int n, const char *const base_name) {
+	const int base_len = strlen(base_name);
+	const char need_to_append_dot = base_name[base_len - 1] == '.' ? 0 : 1;
+	struct search_domain *dom;
+
+	for (dom = state->head; dom; dom = dom->next) {
+		if (!n--) {
+			/* this is the postfix we want */
+			/* the actual postfix string is kept at the end of the structure */
+			const u8 *const postfix = ((u8 *) dom) + sizeof(struct search_domain);
+			const int postfix_len = dom->len;
+			char *const newname = (char *) malloc(base_len + need_to_append_dot + postfix_len + 1);
+                        if (!newname) return NULL;
+			memcpy(newname, base_name, base_len);
+			if (need_to_append_dot) newname[base_len] = '.';
+			memcpy(newname + base_len + need_to_append_dot, postfix, postfix_len);
+			newname[base_len + need_to_append_dot + postfix_len] = 0;
+			return newname;
+		}
+	}
+
+	/* we ran off the end of the list and still didn't find the requested string */
+	abort();
+	return NULL; /* unreachable; stops warnings in some compilers. */
+}
+
+static int
+search_request_new(int type, const char *const name, int flags, evdns_callback_type user_callback, void *user_arg) {
+	assert(type == TYPE_A || type == TYPE_AAAA);
+	if ( ((flags & DNS_QUERY_NO_SEARCH) == 0) &&
+	     global_search_state &&
+		 global_search_state->num_domains) {
+		/* we have some domains to search */
+		struct request *req;
+		if (string_num_dots(name) >= global_search_state->ndots) {
+			req = request_new(type, name, flags, user_callback, user_arg);
+			if (!req) return 1;
+			req->search_index = -1;
+		} else {
+			char *const new_name = search_make_new(global_search_state, 0, name);
+                        if (!new_name) return 1;
+			req = request_new(type, new_name, flags, user_callback, user_arg);
+			free(new_name);
+			if (!req) return 1;
+			req->search_index = 0;
+		}
+		req->search_origname = strdup(name);
+		req->search_state = global_search_state;
+		req->search_flags = flags;
+		global_search_state->refcount++;
+		request_submit(req);
+		return 0;
+	} else {
+		struct request *const req = request_new(type, name, flags, user_callback, user_arg);
+		if (!req) return 1;
+		request_submit(req);
+		return 0;
+	}
+}
+
+/* this is called when a request has failed to find a name. We need to check */
+/* if it is part of a search and, if so, try the next name in the list */
+/* returns: */
+/*   0 another request has been submitted */
+/*   1 no more requests needed */
+static int
+search_try_next(struct request *const req) {
+	if (req->search_state) {
+		/* it is part of a search */
+		char *new_name;
+		struct request *newreq;
+		req->search_index++;
+		if (req->search_index >= req->search_state->num_domains) {
+			/* no more postfixes to try, however we may need to try */
+			/* this name without a postfix */
+			if (string_num_dots(req->search_origname) < req->search_state->ndots) {
+				/* yep, we need to try it raw */
+				newreq = request_new(req->request_type, req->search_origname, req->search_flags, req->user_callback, req->user_pointer);
+				log(EVDNS_LOG_DEBUG, "Search: trying raw query %s", req->search_origname);
+				if (newreq) {
+					request_submit(newreq);
+					return 0;
+				}
+			}
+			return 1;
+		}
+
+		new_name = search_make_new(req->search_state, req->search_index, req->search_origname);
+                if (!new_name) return 1;
+		log(EVDNS_LOG_DEBUG, "Search: now trying %s (%d)", new_name, req->search_index);
+		newreq = request_new(req->request_type, new_name, req->search_flags, req->user_callback, req->user_pointer);
+		free(new_name);
+		if (!newreq) return 1;
+		newreq->search_origname = req->search_origname;
+		req->search_origname = NULL;
+		newreq->search_state = req->search_state;
+		newreq->search_flags = req->search_flags;
+		newreq->search_index = req->search_index;
+		newreq->search_state->refcount++;
+		request_submit(newreq);
+		return 0;
+	}
+	return 1;
+}
+
+static void
+search_request_finished(struct request *const req) {
+	if (req->search_state) {
+		search_state_decref(req->search_state);
+		req->search_state = NULL;
+	}
+	if (req->search_origname) {
+		free(req->search_origname);
+		req->search_origname = NULL;
+	}
+}
+
+/*/////////////////////////////////////////////////////////////////// */
+/* Parsing resolv.conf files */
+
+static void
+evdns_resolv_set_defaults(int flags) {
+	/* if the file isn't found then we assume a local resolver */
+	if (flags & DNS_OPTION_SEARCH) search_set_from_hostname();
+	if (flags & DNS_OPTION_NAMESERVERS) evdns_nameserver_ip_add("127.0.0.1");
+}
+
+#ifndef HAVE_STRTOK_R
+static char *
+strtok_r(char *s, const char *delim, char **state) {
+	return strtok(s, delim);
+}
+#endif
+
+/* helper version of atoi which returns -1 on error */
+static int
+strtoint(const char *const str) {
+	char *endptr;
+	const int r = strtol(str, &endptr, 10);
+	if (*endptr) return -1;
+	return r;
+}
+
+/* helper version of atoi that returns -1 on error and clips to bounds. */
+static int
+strtoint_clipped(const char *const str, int min, int max)
+{
+	int r = strtoint(str);
+	if (r == -1)
+		return r;
+	else if (r<min)
+		return min;
+	else if (r>max)
+		return max;
+	else
+		return r;
+}
+
+/* exported function */
+int
+evdns_set_option(const char *option, const char *val, int flags)
+{
+	if (!strncmp(option, "ndots:", 6)) {
+		const int ndots = strtoint(val);
+		if (ndots == -1) return -1;
+		if (!(flags & DNS_OPTION_SEARCH)) return 0;
+		log(EVDNS_LOG_DEBUG, "Setting ndots to %d", ndots);
+		if (!global_search_state) global_search_state = search_state_new();
+		if (!global_search_state) return -1;
+		global_search_state->ndots = ndots;
+	} else if (!strncmp(option, "timeout:", 8)) {
+		const int timeout = strtoint(val);
+		if (timeout == -1) return -1;
+		if (!(flags & DNS_OPTION_MISC)) return 0;
+		log(EVDNS_LOG_DEBUG, "Setting timeout to %d", timeout);
+		global_timeout.tv_sec = timeout;
+	} else if (!strncmp(option, "max-timeouts:", 12)) {
+		const int maxtimeout = strtoint_clipped(val, 1, 255);
+		if (maxtimeout == -1) return -1;
+		if (!(flags & DNS_OPTION_MISC)) return 0;
+		log(EVDNS_LOG_DEBUG, "Setting maximum allowed timeouts to %d",
+			maxtimeout);
+		global_max_nameserver_timeout = maxtimeout;
+	} else if (!strncmp(option, "max-inflight:", 13)) {
+		const int maxinflight = strtoint_clipped(val, 1, 65000);
+		if (maxinflight == -1) return -1;
+		if (!(flags & DNS_OPTION_MISC)) return 0;
+		log(EVDNS_LOG_DEBUG, "Setting maximum inflight requests to %d",
+			maxinflight);
+		global_max_requests_inflight = maxinflight;
+	} else if (!strncmp(option, "attempts:", 9)) {
+		int retries = strtoint(val);
+		if (retries == -1) return -1;
+		if (retries > 255) retries = 255;
+		if (!(flags & DNS_OPTION_MISC)) return 0;
+		log(EVDNS_LOG_DEBUG, "Setting retries to %d", retries);
+		global_max_retransmits = retries;
+	}
+	return 0;
+}
+
+static void
+resolv_conf_parse_line(char *const start, int flags) {
+	char *strtok_state;
+	static const char *const delims = " \t";
+#define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
+
+	char *const first_token = strtok_r(start, delims, &strtok_state);
+	if (!first_token) return;
+
+	if (!strcmp(first_token, "nameserver") && (flags & DNS_OPTION_NAMESERVERS)) {
+		const char *const nameserver = NEXT_TOKEN;
+		struct in_addr ina;
+
+		if (inet_aton(nameserver, &ina)) {
+			/* address is valid */
+			evdns_nameserver_add(ina.s_addr);
+		}
+	} else if (!strcmp(first_token, "domain") && (flags & DNS_OPTION_SEARCH)) {
+		const char *const domain = NEXT_TOKEN;
+		if (domain) {
+			search_postfix_clear();
+			search_postfix_add(domain);
+		}
+	} else if (!strcmp(first_token, "search") && (flags & DNS_OPTION_SEARCH)) {
+		const char *domain;
+		search_postfix_clear();
+
+		while ((domain = NEXT_TOKEN)) {
+			search_postfix_add(domain);
+		}
+		search_reverse();
+	} else if (!strcmp(first_token, "options")) {
+		const char *option;
+		while ((option = NEXT_TOKEN)) {
+			const char *val = strchr(option, ':');
+			evdns_set_option(option, val ? val+1 : "", flags);
+		}
+	}
+#undef NEXT_TOKEN
+}
+
+/* exported function */
+/* returns: */
+/*   0 no errors */
+/*   1 failed to open file */
+/*   2 failed to stat file */
+/*   3 file too large */
+/*   4 out of memory */
+/*   5 short read from file */
+int
+evdns_resolv_conf_parse(int flags, const char *const filename) {
+	struct stat st;
+	int fd, n, r;
+	u8 *resolv;
+	char *start;
+	int err = 0;
+
+	log(EVDNS_LOG_DEBUG, "Parsing resolv.conf file %s", filename);
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0) {
+		evdns_resolv_set_defaults(flags);
+		return 1;
+	}
+
+	if (fstat(fd, &st)) { err = 2; goto out1; }
+	if (!st.st_size) {
+		evdns_resolv_set_defaults(flags);
+		err = (flags & DNS_OPTION_NAMESERVERS) ? 6 : 0;
+		goto out1;
+	}
+	if (st.st_size > 65535) { err = 3; goto out1; }  /* no resolv.conf should be any bigger */
+
+	resolv = (u8 *) malloc((size_t)st.st_size + 1);
+	if (!resolv) { err = 4; goto out1; }
+
+	n = 0;
+	while ((r = read(fd, resolv+n, (size_t)st.st_size-n)) > 0) {
+		n += r;
+		if (n == st.st_size)
+			break;
+		assert(n < st.st_size);
+ 	}
+	if (r < 0) { err = 5; goto out2; }
+	resolv[n] = 0;	 /* we malloced an extra byte; this should be fine. */
+
+	start = (char *) resolv;
+	for (;;) {
+		char *const newline = strchr(start, '\n');
+		if (!newline) {
+			resolv_conf_parse_line(start, flags);
+			break;
+		} else {
+			*newline = 0;
+			resolv_conf_parse_line(start, flags);
+			start = newline + 1;
+		}
+	}
+
+	if (!server_head && (flags & DNS_OPTION_NAMESERVERS)) {
+		/* no nameservers were configured. */
+		evdns_nameserver_ip_add("127.0.0.1");
+		err = 6;
+	}
+	if (flags & DNS_OPTION_SEARCH && (!global_search_state || global_search_state->num_domains == 0)) {
+		search_set_from_hostname();
+	}
+
+out2:
+	free(resolv);
+out1:
+	close(fd);
+	return err;
+}
+
+#ifdef WIN32
+/* Add multiple nameservers from a space-or-comma-separated list. */
+static int
+evdns_nameserver_ip_add_line(const char *ips) {
+	const char *addr;
+	char *buf;
+	int r;
+	while (*ips) {
+		while (ISSPACE(*ips) || *ips == ',' || *ips == '\t')
+			++ips;
+		addr = ips;
+		while (ISDIGIT(*ips) || *ips == '.' || *ips == ':')
+			++ips;
+		buf = malloc(ips-addr+1);
+		if (!buf) return 4;
+		memcpy(buf, addr, ips-addr);
+		buf[ips-addr] = '\0';
+		r = evdns_nameserver_ip_add(buf);
+		free(buf);
+		if (r) return r;
+	}
+	return 0;
+}
+
+typedef DWORD(WINAPI *GetNetworkParams_fn_t)(FIXED_INFO *, DWORD*);
+
+/* Use the windows GetNetworkParams interface in iphlpapi.dll to */
+/* figure out what our nameservers are. */
+static int
+load_nameservers_with_getnetworkparams(void)
+{
+	/* Based on MSDN examples and inspection of  c-ares code. */
+	FIXED_INFO *fixed;
+	HMODULE handle = 0;
+	ULONG size = sizeof(FIXED_INFO);
+	void *buf = NULL;
+	int status = 0, r, added_any;
+	IP_ADDR_STRING *ns;
+	GetNetworkParams_fn_t fn;
+
+	if (!(handle = LoadLibrary("iphlpapi.dll"))) {
+		log(EVDNS_LOG_WARN, "Could not open iphlpapi.dll");
+		status = -1;
+		goto done;
+	}
+	if (!(fn = (GetNetworkParams_fn_t) GetProcAddress(handle, "GetNetworkParams"))) {
+		log(EVDNS_LOG_WARN, "Could not get address of function.");
+		status = -1;
+		goto done;
+	}
+
+	buf = malloc(size);
+	if (!buf) { status = 4; goto done; }
+	fixed = buf;
+	r = fn(fixed, &size);
+	if (r != ERROR_SUCCESS && r != ERROR_BUFFER_OVERFLOW) {
+		status = -1;
+		goto done;
+	}
+	if (r != ERROR_SUCCESS) {
+		free(buf);
+		buf = malloc(size);
+		if (!buf) { status = 4; goto done; }
+		fixed = buf;
+		r = fn(fixed, &size);
+		if (r != ERROR_SUCCESS) {
+			log(EVDNS_LOG_DEBUG, "fn() failed.");
+			status = -1;
+			goto done;
+		}
+	}
+
+	assert(fixed);
+	added_any = 0;
+	ns = &(fixed->DnsServerList);
+	while (ns) {
+		r = evdns_nameserver_ip_add_line(ns->IpAddress.String);
+		if (r) {
+			log(EVDNS_LOG_DEBUG,"Could not add nameserver %s to list,error: %d",
+				(ns->IpAddress.String),(int)GetLastError());
+			status = r;
+			goto done;
+		} else {
+			log(EVDNS_LOG_DEBUG,"Succesfully added %s as nameserver",ns->IpAddress.String);
+		}
+
+		added_any++;
+		ns = ns->Next;
+	}
+
+	if (!added_any) {
+		log(EVDNS_LOG_DEBUG, "No nameservers added.");
+		status = -1;
+	}
+
+ done:
+	if (buf)
+		free(buf);
+	if (handle)
+		FreeLibrary(handle);
+	return status;
+}
+
+static int
+config_nameserver_from_reg_key(HKEY key, const char *subkey)
+{
+	char *buf;
+	DWORD bufsz = 0, type = 0;
+	int status = 0;
+
+	if (RegQueryValueEx(key, subkey, 0, &type, NULL, &bufsz)
+	    != ERROR_MORE_DATA)
+		return -1;
+	if (!(buf = malloc(bufsz)))
+		return -1;
+
+	if (RegQueryValueEx(key, subkey, 0, &type, (LPBYTE)buf, &bufsz)
+	    == ERROR_SUCCESS && bufsz > 1) {
+		status = evdns_nameserver_ip_add_line(buf);
+	}
+
+	free(buf);
+	return status;
+}
+
+#define SERVICES_KEY "System\\CurrentControlSet\\Services\\"
+#define WIN_NS_9X_KEY  SERVICES_KEY "VxD\\MSTCP"
+#define WIN_NS_NT_KEY  SERVICES_KEY "Tcpip\\Parameters"
+
+static int
+load_nameservers_from_registry(void)
+{
+	int found = 0;
+	int r;
+#define TRY(k, name) \
+	if (!found && config_nameserver_from_reg_key(k,name) == 0) {	\
+		log(EVDNS_LOG_DEBUG,"Found nameservers in %s/%s",#k,name); \
+		found = 1;						\
+	} else if (!found) {						\
+		log(EVDNS_LOG_DEBUG,"Didn't find nameservers in %s/%s", \
+		    #k,#name);						\
+	}
+
+	if (((int)GetVersion()) > 0) { /* NT */
+		HKEY nt_key = 0, interfaces_key = 0;
+
+		if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_NT_KEY, 0,
+				 KEY_READ, &nt_key) != ERROR_SUCCESS) {
+			log(EVDNS_LOG_DEBUG,"Couldn't open nt key, %d",(int)GetLastError());
+			return -1;
+		}
+		r = RegOpenKeyEx(nt_key, "Interfaces", 0,
+			     KEY_QUERY_VALUE|KEY_ENUMERATE_SUB_KEYS,
+			     &interfaces_key);
+		if (r != ERROR_SUCCESS) {
+			log(EVDNS_LOG_DEBUG,"Couldn't open interfaces key, %d",(int)GetLastError());
+			return -1;
+		}
+		TRY(nt_key, "NameServer");
+		TRY(nt_key, "DhcpNameServer");
+		TRY(interfaces_key, "NameServer");
+		TRY(interfaces_key, "DhcpNameServer");
+		RegCloseKey(interfaces_key);
+		RegCloseKey(nt_key);
+	} else {
+		HKEY win_key = 0;
+		if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_9X_KEY, 0,
+				 KEY_READ, &win_key) != ERROR_SUCCESS) {
+			log(EVDNS_LOG_DEBUG, "Couldn't open registry key, %d", (int)GetLastError());
+			return -1;
+		}
+		TRY(win_key, "NameServer");
+		RegCloseKey(win_key);
+	}
+
+	if (found == 0) {
+		log(EVDNS_LOG_WARN,"Didn't find any nameservers.");
+	}
+
+	return found ? 0 : -1;
+#undef TRY
+}
+
+int
+evdns_config_windows_nameservers(void)
+{
+	if (load_nameservers_with_getnetworkparams() == 0)
+		return 0;
+	return load_nameservers_from_registry();
+}
+#endif
+
+int
+evdns_init(void)
+{
+	int res = 0;
+#ifdef WIN32
+	res = evdns_config_windows_nameservers();
+#else
+	res = evdns_resolv_conf_parse(DNS_OPTIONS_ALL, "/etc/resolv.conf");
+#endif
+
+	return (res);
+}
+
+const char *
+evdns_err_to_string(int err)
+{
+    switch (err) {
+	case DNS_ERR_NONE: return "no error";
+	case DNS_ERR_FORMAT: return "misformatted query";
+	case DNS_ERR_SERVERFAILED: return "server failed";
+	case DNS_ERR_NOTEXIST: return "name does not exist";
+	case DNS_ERR_NOTIMPL: return "query not implemented";
+	case DNS_ERR_REFUSED: return "refused";
+
+	case DNS_ERR_TRUNCATED: return "reply truncated or ill-formed";
+	case DNS_ERR_UNKNOWN: return "unknown";
+	case DNS_ERR_TIMEOUT: return "request timed out";
+	case DNS_ERR_SHUTDOWN: return "dns subsystem shut down";
+	default: return "[Unknown error code]";
+    }
+}
+
+void
+evdns_shutdown(int fail_requests)
+{
+	struct nameserver *server, *server_next;
+	struct search_domain *dom, *dom_next;
+
+	while (req_head) {
+		if (fail_requests)
+			reply_callback(req_head, 0, DNS_ERR_SHUTDOWN, NULL);
+		request_finished(req_head, &req_head);
+	}
+	while (req_waiting_head) {
+		if (fail_requests)
+			reply_callback(req_waiting_head, 0, DNS_ERR_SHUTDOWN, NULL);
+		request_finished(req_waiting_head, &req_waiting_head);
+	}
+	global_requests_inflight = global_requests_waiting = 0;
+
+	for (server = server_head; server; server = server_next) {
+		server_next = server->next;
+		if (server->socket >= 0)
+			CLOSE_SOCKET(server->socket);
+		(void) event_del(&server->event);
+		if (server->state == 0)
+                        (void) event_del(&server->timeout_event);
+		free(server);
+		if (server_next == server_head)
+			break;
+	}
+	server_head = NULL;
+	global_good_nameservers = 0;
+
+	if (global_search_state) {
+		for (dom = global_search_state->head; dom; dom = dom_next) {
+			dom_next = dom->next;
+			free(dom);
+		}
+		free(global_search_state);
+		global_search_state = NULL;
+	}
+	evdns_log_fn = NULL;
+}
+
+#ifdef EVDNS_MAIN
+void
+main_callback(int result, char type, int count, int ttl,
+			  void *addrs, void *orig) {
+	char *n = (char*)orig;
+	int i;
+	for (i = 0; i < count; ++i) {
+		if (type == DNS_IPv4_A) {
+			printf("%s: %s\n", n, debug_ntoa(((u32*)addrs)[i]));
+		} else if (type == DNS_PTR) {
+			printf("%s: %s\n", n, ((char**)addrs)[i]);
+		}
+	}
+	if (!count) {
+		printf("%s: No answer (%d)\n", n, result);
+	}
+	fflush(stdout);
+}
+void
+evdns_server_callback(struct evdns_server_request *req, void *data)
+{
+	int i, r;
+	(void)data;
+	/* dummy; give 192.168.11.11 as an answer for all A questions,
+	 *	give foo.bar.example.com as an answer for all PTR questions. */
+	for (i = 0; i < req->nquestions; ++i) {
+		u32 ans = htonl(0xc0a80b0bUL);
+		if (req->questions[i]->type == EVDNS_TYPE_A &&
+			req->questions[i]->dns_question_class == EVDNS_CLASS_INET) {
+			printf(" -- replying for %s (A)\n", req->questions[i]->name);
+			r = evdns_server_request_add_a_reply(req, req->questions[i]->name,
+										  1, &ans, 10);
+			if (r<0)
+				printf("eeep, didn't work.\n");
+		} else if (req->questions[i]->type == EVDNS_TYPE_PTR &&
+				   req->questions[i]->dns_question_class == EVDNS_CLASS_INET) {
+			printf(" -- replying for %s (PTR)\n", req->questions[i]->name);
+			r = evdns_server_request_add_ptr_reply(req, NULL, req->questions[i]->name,
+											"foo.bar.example.com", 10);
+		} else {
+			printf(" -- skipping %s [%d %d]\n", req->questions[i]->name,
+				   req->questions[i]->type, req->questions[i]->dns_question_class);
+		}
+	}
+
+	r = evdns_request_respond(req, 0);
+	if (r<0)
+		printf("eeek, couldn't send reply.\n");
+}
+
+void
+logfn(int is_warn, const char *msg) {
+  (void) is_warn;
+  fprintf(stderr, "%s\n", msg);
+}
+int
+main(int c, char **v) {
+	int idx;
+	int reverse = 0, verbose = 1, servertest = 0;
+	if (c<2) {
+		fprintf(stderr, "syntax: %s [-x] [-v] hostname\n", v[0]);
+		fprintf(stderr, "syntax: %s [-servertest]\n", v[0]);
+		return 1;
+	}
+	idx = 1;
+	while (idx < c && v[idx][0] == '-') {
+		if (!strcmp(v[idx], "-x"))
+			reverse = 1;
+		else if (!strcmp(v[idx], "-v"))
+			verbose = 1;
+		else if (!strcmp(v[idx], "-servertest"))
+			servertest = 1;
+		else
+			fprintf(stderr, "Unknown option %s\n", v[idx]);
+		++idx;
+	}
+	event_init();
+	if (verbose)
+		evdns_set_log_fn(logfn);
+	evdns_resolv_conf_parse(DNS_OPTION_NAMESERVERS, "/etc/resolv.conf");
+	if (servertest) {
+		int sock;
+		struct sockaddr_in my_addr;
+		sock = socket(PF_INET, SOCK_DGRAM, 0);
+                evutil_make_socket_nonblocking(sock);
+		my_addr.sin_family = AF_INET;
+		my_addr.sin_port = htons(10053);
+		my_addr.sin_addr.s_addr = INADDR_ANY;
+		if (bind(sock, (struct sockaddr*)&my_addr, sizeof(my_addr))<0) {
+			perror("bind");
+			exit(1);
+		}
+		evdns_add_server_port(sock, 0, evdns_server_callback, NULL);
+	}
+	for (; idx < c; ++idx) {
+		if (reverse) {
+			struct in_addr addr;
+			if (!inet_aton(v[idx], &addr)) {
+				fprintf(stderr, "Skipping non-IP %s\n", v[idx]);
+				continue;
+			}
+			fprintf(stderr, "resolving %s...\n",v[idx]);
+			evdns_resolve_reverse(&addr, 0, main_callback, v[idx]);
+		} else {
+			fprintf(stderr, "resolving (fwd) %s...\n",v[idx]);
+			evdns_resolve_ipv4(v[idx], 0, main_callback, v[idx]);
+		}
+	}
+	fflush(stdout);
+	event_dispatch();
+	return 0;
+}
+#endif
diff --git a/libevent/evdns.h b/libevent/evdns.h
new file mode 100644
index 00000000000..1eb5c382480
--- /dev/null
+++ b/libevent/evdns.h
@@ -0,0 +1,528 @@
+/*
+ * Copyright (c) 2006 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * The original DNS code is due to Adam Langley with heavy
+ * modifications by Nick Mathewson.  Adam put his DNS software in the
+ * public domain.  You can find his original copyright below.  Please,
+ * aware that the code as part of libevent is governed by the 3-clause
+ * BSD license above.
+ *
+ * This software is Public Domain. To view a copy of the public domain dedication,
+ * visit http://creativecommons.org/licenses/publicdomain/ or send a letter to
+ * Creative Commons, 559 Nathan Abbott Way, Stanford, California 94305, USA.
+ *
+ * I ask and expect, but do not require, that all derivative works contain an
+ * attribution similar to:
+ * 	Parts developed by Adam Langley <agl@imperialviolet.org>
+ *
+ * You may wish to replace the word "Parts" with something else depending on
+ * the amount of original code.
+ *
+ * (Derivative works does not include programs which link against, run or include
+ * the source verbatim in their source distributions)
+ */
+
+/** @file evdns.h
+ *
+ * Welcome, gentle reader
+ *
+ * Async DNS lookups are really a whole lot harder than they should be,
+ * mostly stemming from the fact that the libc resolver has never been
+ * very good at them. Before you use this library you should see if libc
+ * can do the job for you with the modern async call getaddrinfo_a
+ * (see http://www.imperialviolet.org/page25.html#e498). Otherwise,
+ * please continue.
+ *
+ * This code is based on libevent and you must call event_init before
+ * any of the APIs in this file. You must also seed the OpenSSL random
+ * source if you are using OpenSSL for ids (see below).
+ *
+ * This library is designed to be included and shipped with your source
+ * code. You statically link with it. You should also test for the
+ * existence of strtok_r and define HAVE_STRTOK_R if you have it.
+ *
+ * The DNS protocol requires a good source of id numbers and these
+ * numbers should be unpredictable for spoofing reasons. There are
+ * three methods for generating them here and you must define exactly
+ * one of them. In increasing order of preference:
+ *
+ * DNS_USE_GETTIMEOFDAY_FOR_ID:
+ *   Using the bottom 16 bits of the usec result from gettimeofday. This
+ *   is a pretty poor solution but should work anywhere.
+ * DNS_USE_CPU_CLOCK_FOR_ID:
+ *   Using the bottom 16 bits of the nsec result from the CPU's time
+ *   counter. This is better, but may not work everywhere. Requires
+ *   POSIX realtime support and you'll need to link against -lrt on
+ *   glibc systems at least.
+ * DNS_USE_OPENSSL_FOR_ID:
+ *   Uses the OpenSSL RAND_bytes call to generate the data. You must
+ *   have seeded the pool before making any calls to this library.
+ *
+ * The library keeps track of the state of nameservers and will avoid
+ * them when they go down. Otherwise it will round robin between them.
+ *
+ * Quick start guide:
+ *   #include "evdns.h"
+ *   void callback(int result, char type, int count, int ttl,
+ *		 void *addresses, void *arg);
+ *   evdns_resolv_conf_parse(DNS_OPTIONS_ALL, "/etc/resolv.conf");
+ *   evdns_resolve("www.hostname.com", 0, callback, NULL);
+ *
+ * When the lookup is complete the callback function is called. The
+ * first argument will be one of the DNS_ERR_* defines in evdns.h.
+ * Hopefully it will be DNS_ERR_NONE, in which case type will be
+ * DNS_IPv4_A, count will be the number of IP addresses, ttl is the time
+ * which the data can be cached for (in seconds), addresses will point
+ * to an array of uint32_t's and arg will be whatever you passed to
+ * evdns_resolve.
+ *
+ * Searching:
+ *
+ * In order for this library to be a good replacement for glibc's resolver it
+ * supports searching. This involves setting a list of default domains, in
+ * which names will be queried for. The number of dots in the query name
+ * determines the order in which this list is used.
+ *
+ * Searching appears to be a single lookup from the point of view of the API,
+ * although many DNS queries may be generated from a single call to
+ * evdns_resolve. Searching can also drastically slow down the resolution
+ * of names.
+ *
+ * To disable searching:
+ *   1. Never set it up. If you never call evdns_resolv_conf_parse or
+ *   evdns_search_add then no searching will occur.
+ *
+ *   2. If you do call evdns_resolv_conf_parse then don't pass
+ *   DNS_OPTION_SEARCH (or DNS_OPTIONS_ALL, which implies it).
+ *
+ *   3. When calling evdns_resolve, pass the DNS_QUERY_NO_SEARCH flag.
+ *
+ * The order of searches depends on the number of dots in the name. If the
+ * number is greater than the ndots setting then the names is first tried
+ * globally. Otherwise each search domain is appended in turn.
+ *
+ * The ndots setting can either be set from a resolv.conf, or by calling
+ * evdns_search_ndots_set.
+ *
+ * For example, with ndots set to 1 (the default) and a search domain list of
+ * ["myhome.net"]:
+ *  Query: www
+ *  Order: www.myhome.net, www.
+ *
+ *  Query: www.abc
+ *  Order: www.abc., www.abc.myhome.net
+ *
+ * Internals:
+ *
+ * Requests are kept in two queues. The first is the inflight queue. In
+ * this queue requests have an allocated transaction id and nameserver.
+ * They will soon be transmitted if they haven't already been.
+ *
+ * The second is the waiting queue. The size of the inflight ring is
+ * limited and all other requests wait in waiting queue for space. This
+ * bounds the number of concurrent requests so that we don't flood the
+ * nameserver. Several algorithms require a full walk of the inflight
+ * queue and so bounding its size keeps thing going nicely under huge
+ * (many thousands of requests) loads.
+ *
+ * If a nameserver loses too many requests it is considered down and we
+ * try not to use it. After a while we send a probe to that nameserver
+ * (a lookup for google.com) and, if it replies, we consider it working
+ * again. If the nameserver fails a probe we wait longer to try again
+ * with the next probe.
+ */
+
+#ifndef EVENTDNS_H
+#define EVENTDNS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For integer types. */
+#include <evutil.h>
+
+/** Error codes 0-5 are as described in RFC 1035. */
+#define DNS_ERR_NONE 0
+/** The name server was unable to interpret the query */
+#define DNS_ERR_FORMAT 1
+/** The name server was unable to process this query due to a problem with the
+ * name server */
+#define DNS_ERR_SERVERFAILED 2
+/** The domain name does not exist */
+#define DNS_ERR_NOTEXIST 3
+/** The name server does not support the requested kind of query */
+#define DNS_ERR_NOTIMPL 4
+/** The name server refuses to reform the specified operation for policy
+ * reasons */
+#define DNS_ERR_REFUSED 5
+/** The reply was truncated or ill-formated */
+#define DNS_ERR_TRUNCATED 65
+/** An unknown error occurred */
+#define DNS_ERR_UNKNOWN 66
+/** Communication with the server timed out */
+#define DNS_ERR_TIMEOUT 67
+/** The request was canceled because the DNS subsystem was shut down. */
+#define DNS_ERR_SHUTDOWN 68
+
+#define DNS_IPv4_A 1
+#define DNS_PTR 2
+#define DNS_IPv6_AAAA 3
+
+#define DNS_QUERY_NO_SEARCH 1
+
+#define DNS_OPTION_SEARCH 1
+#define DNS_OPTION_NAMESERVERS 2
+#define DNS_OPTION_MISC 4
+#define DNS_OPTIONS_ALL 7
+
+/**
+ * The callback that contains the results from a lookup.
+ * - type is either DNS_IPv4_A or DNS_PTR or DNS_IPv6_AAAA
+ * - count contains the number of addresses of form type
+ * - ttl is the number of seconds the resolution may be cached for.
+ * - addresses needs to be cast according to type
+ */
+typedef void (*evdns_callback_type) (int result, char type, int count, int ttl, void *addresses, void *arg);
+
+/**
+  Initialize the asynchronous DNS library.
+
+  This function initializes support for non-blocking name resolution by
+  calling evdns_resolv_conf_parse() on UNIX and
+  evdns_config_windows_nameservers() on Windows.
+
+  @return 0 if successful, or -1 if an error occurred
+  @see evdns_shutdown()
+ */
+int evdns_init(void);
+
+
+/**
+  Shut down the asynchronous DNS resolver and terminate all active requests.
+
+  If the 'fail_requests' option is enabled, all active requests will return
+  an empty result with the error flag set to DNS_ERR_SHUTDOWN. Otherwise,
+  the requests will be silently discarded.
+
+  @param fail_requests if zero, active requests will be aborted; if non-zero,
+		active requests will return DNS_ERR_SHUTDOWN.
+  @see evdns_init()
+ */
+void evdns_shutdown(int fail_requests);
+
+
+/**
+  Convert a DNS error code to a string.
+
+  @param err the DNS error code
+  @return a string containing an explanation of the error code
+*/
+const char *evdns_err_to_string(int err);
+
+
+/**
+  Add a nameserver.
+
+  The address should be an IPv4 address in network byte order.
+  The type of address is chosen so that it matches in_addr.s_addr.
+
+  @param address an IP address in network byte order
+  @return 0 if successful, or -1 if an error occurred
+  @see evdns_nameserver_ip_add()
+ */
+int evdns_nameserver_add(unsigned long int address);
+
+
+/**
+  Get the number of configured nameservers.
+
+  This returns the number of configured nameservers (not necessarily the
+  number of running nameservers).  This is useful for double-checking
+  whether our calls to the various nameserver configuration functions
+  have been successful.
+
+  @return the number of configured nameservers
+  @see evdns_nameserver_add()
+ */
+int evdns_count_nameservers(void);
+
+
+/**
+  Remove all configured nameservers, and suspend all pending resolves.
+
+  Resolves will not necessarily be re-attempted until evdns_resume() is called.
+
+  @return 0 if successful, or -1 if an error occurred
+  @see evdns_resume()
+ */
+int evdns_clear_nameservers_and_suspend(void);
+
+
+/**
+  Resume normal operation and continue any suspended resolve requests.
+
+  Re-attempt resolves left in limbo after an earlier call to
+  evdns_clear_nameservers_and_suspend().
+
+  @return 0 if successful, or -1 if an error occurred
+  @see evdns_clear_nameservers_and_suspend()
+ */
+int evdns_resume(void);
+
+
+/**
+  Add a nameserver.
+
+  This wraps the evdns_nameserver_add() function by parsing a string as an IP
+  address and adds it as a nameserver.
+
+  @return 0 if successful, or -1 if an error occurred
+  @see evdns_nameserver_add()
+ */
+int evdns_nameserver_ip_add(const char *ip_as_string);
+
+
+/**
+  Lookup an A record for a given name.
+
+  @param name a DNS hostname
+  @param flags either 0, or DNS_QUERY_NO_SEARCH to disable searching for this query.
+  @param callback a callback function to invoke when the request is completed
+  @param ptr an argument to pass to the callback function
+  @return 0 if successful, or -1 if an error occurred
+  @see evdns_resolve_ipv6(), evdns_resolve_reverse(), evdns_resolve_reverse_ipv6()
+ */
+int evdns_resolve_ipv4(const char *name, int flags, evdns_callback_type callback, void *ptr);
+
+
+/**
+  Lookup an AAAA record for a given name.
+
+  @param name a DNS hostname
+  @param flags either 0, or DNS_QUERY_NO_SEARCH to disable searching for this query.
+  @param callback a callback function to invoke when the request is completed
+  @param ptr an argument to pass to the callback function
+  @return 0 if successful, or -1 if an error occurred
+  @see evdns_resolve_ipv4(), evdns_resolve_reverse(), evdns_resolve_reverse_ipv6()
+ */
+int evdns_resolve_ipv6(const char *name, int flags, evdns_callback_type callback, void *ptr);
+
+struct in_addr;
+struct in6_addr;
+
+/**
+  Lookup a PTR record for a given IP address.
+
+  @param in an IPv4 address
+  @param flags either 0, or DNS_QUERY_NO_SEARCH to disable searching for this query.
+  @param callback a callback function to invoke when the request is completed
+  @param ptr an argument to pass to the callback function
+  @return 0 if successful, or -1 if an error occurred
+  @see evdns_resolve_reverse_ipv6()
+ */
+int evdns_resolve_reverse(const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr);
+
+
+/**
+  Lookup a PTR record for a given IPv6 address.
+
+  @param in an IPv6 address
+  @param flags either 0, or DNS_QUERY_NO_SEARCH to disable searching for this query.
+  @param callback a callback function to invoke when the request is completed
+  @param ptr an argument to pass to the callback function
+  @return 0 if successful, or -1 if an error occurred
+  @see evdns_resolve_reverse_ipv6()
+ */
+int evdns_resolve_reverse_ipv6(const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr);
+
+
+/**
+  Set the value of a configuration option.
+
+  The currently available configuration options are:
+
+    ndots, timeout, max-timeouts, max-inflight, and attempts
+
+  @param option the name of the configuration option to be modified
+  @param val the value to be set
+  @param flags either 0 | DNS_OPTION_SEARCH | DNS_OPTION_MISC
+  @return 0 if successful, or -1 if an error occurred
+ */
+int evdns_set_option(const char *option, const char *val, int flags);
+
+
+/**
+  Parse a resolv.conf file.
+
+  The 'flags' parameter determines what information is parsed from the
+  resolv.conf file. See the man page for resolv.conf for the format of this
+  file.
+
+  The following directives are not parsed from the file: sortlist, rotate,
+  no-check-names, inet6, debug.
+
+  If this function encounters an error, the possible return values are: 1 =
+  failed to open file, 2 = failed to stat file, 3 = file too large, 4 = out of
+  memory, 5 = short read from file, 6 = no nameservers listed in the file
+
+  @param flags any of DNS_OPTION_NAMESERVERS|DNS_OPTION_SEARCH|DNS_OPTION_MISC|
+         DNS_OPTIONS_ALL
+  @param filename the path to the resolv.conf file
+  @return 0 if successful, or various positive error codes if an error
+          occurred (see above)
+  @see resolv.conf(3), evdns_config_windows_nameservers()
+ */
+int evdns_resolv_conf_parse(int flags, const char *const filename);
+
+
+/**
+  Obtain nameserver information using the Windows API.
+
+  Attempt to configure a set of nameservers based on platform settings on
+  a win32 host.  Preferentially tries to use GetNetworkParams; if that fails,
+  looks in the registry.
+
+  @return 0 if successful, or -1 if an error occurred
+  @see evdns_resolv_conf_parse()
+ */
+#ifdef WIN32
+int evdns_config_windows_nameservers(void);
+#endif
+
+
+/**
+  Clear the list of search domains.
+ */
+void evdns_search_clear(void);
+
+
+/**
+  Add a domain to the list of search domains
+
+  @param domain the domain to be added to the search list
+ */
+void evdns_search_add(const char *domain);
+
+
+/**
+  Set the 'ndots' parameter for searches.
+
+  Sets the number of dots which, when found in a name, causes
+  the first query to be without any search domain.
+
+  @param ndots the new ndots parameter
+ */
+void evdns_search_ndots_set(const int ndots);
+
+/**
+  A callback that is invoked when a log message is generated
+
+  @param is_warning indicates if the log message is a 'warning'
+  @param msg the content of the log message
+ */
+typedef void (*evdns_debug_log_fn_type)(int is_warning, const char *msg);
+
+
+/**
+  Set the callback function to handle log messages.
+
+  @param fn the callback to be invoked when a log message is generated
+ */
+void evdns_set_log_fn(evdns_debug_log_fn_type fn);
+
+/**
+   Set a callback that will be invoked to generate transaction IDs.  By
+   default, we pick transaction IDs based on the current clock time.
+
+   @param fn the new callback, or NULL to use the default.
+ */
+void evdns_set_transaction_id_fn(ev_uint16_t (*fn)(void));
+
+#define DNS_NO_SEARCH 1
+
+/*
+ * Structures and functions used to implement a DNS server.
+ */
+
+struct evdns_server_request {
+	int flags;
+	int nquestions;
+	struct evdns_server_question **questions;
+};
+struct evdns_server_question {
+	int type;
+#ifdef __cplusplus
+	int dns_question_class;
+#else
+	/* You should refer to this field as "dns_question_class".  The
+	 * name "class" works in C for backward compatibility, and will be
+	 * removed in a future version. (1.5 or later). */
+	int class;
+#define dns_question_class class
+#endif
+	char name[1];
+};
+typedef void (*evdns_request_callback_fn_type)(struct evdns_server_request *, void *);
+#define EVDNS_ANSWER_SECTION 0
+#define EVDNS_AUTHORITY_SECTION 1
+#define EVDNS_ADDITIONAL_SECTION 2
+
+#define EVDNS_TYPE_A	   1
+#define EVDNS_TYPE_NS	   2
+#define EVDNS_TYPE_CNAME   5
+#define EVDNS_TYPE_SOA	   6
+#define EVDNS_TYPE_PTR	  12
+#define EVDNS_TYPE_MX	  15
+#define EVDNS_TYPE_TXT	  16
+#define EVDNS_TYPE_AAAA	  28
+
+#define EVDNS_QTYPE_AXFR 252
+#define EVDNS_QTYPE_ALL	 255
+
+#define EVDNS_CLASS_INET   1
+
+struct evdns_server_port *evdns_add_server_port(int socket, int is_tcp, evdns_request_callback_fn_type callback, void *user_data);
+void evdns_close_server_port(struct evdns_server_port *port);
+
+int evdns_server_request_add_reply(struct evdns_server_request *req, int section, const char *name, int type, int dns_class, int ttl, int datalen, int is_name, const char *data);
+int evdns_server_request_add_a_reply(struct evdns_server_request *req, const char *name, int n, void *addrs, int ttl);
+int evdns_server_request_add_aaaa_reply(struct evdns_server_request *req, const char *name, int n, void *addrs, int ttl);
+int evdns_server_request_add_ptr_reply(struct evdns_server_request *req, struct in_addr *in, const char *inaddr_name, const char *hostname, int ttl);
+int evdns_server_request_add_cname_reply(struct evdns_server_request *req, const char *name, const char *cname, int ttl);
+
+int evdns_server_request_respond(struct evdns_server_request *req, int err);
+int evdns_server_request_drop(struct evdns_server_request *req);
+struct sockaddr;
+int evdns_server_request_get_requesting_addr(struct evdns_server_request *_req, struct sockaddr *sa, int addr_len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* !EVENTDNS_H */
diff --git a/libevent/event-internal.h b/libevent/event-internal.h
new file mode 100644
index 00000000000..6436b3358bd
--- /dev/null
+++ b/libevent/event-internal.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVENT_INTERNAL_H_
+#define _EVENT_INTERNAL_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "config.h"
+#include "min_heap.h"
+#include "evsignal.h"
+
+struct eventop {
+	const char *name;
+	void *(*init)(struct event_base *);
+	int (*add)(void *, struct event *);
+	int (*del)(void *, struct event *);
+	int (*dispatch)(struct event_base *, void *, struct timeval *);
+	void (*dealloc)(struct event_base *, void *);
+	/* set if we need to reinitialize the event base */
+	int need_reinit;
+};
+
+struct event_base {
+	const struct eventop *evsel;
+	void *evbase;
+	int event_count;		/* counts number of total events */
+	int event_count_active;	/* counts number of active events */
+
+	int event_gotterm;		/* Set to terminate loop */
+	int event_break;		/* Set to terminate loop immediately */
+
+	/* active event management */
+	struct event_list **activequeues;
+	int nactivequeues;
+
+	/* signal handling info */
+	struct evsignal_info sig;
+
+	struct event_list eventqueue;
+	struct timeval event_tv;
+
+	struct min_heap timeheap;
+
+	struct timeval tv_cache;
+};
+
+/* Internal use only: Functions that might be missing from <sys/queue.h> */
+#ifndef HAVE_TAILQFOREACH
+#define	TAILQ_FIRST(head)		((head)->tqh_first)
+#define	TAILQ_END(head)			NULL
+#define	TAILQ_NEXT(elm, field)		((elm)->field.tqe_next)
+#define TAILQ_FOREACH(var, head, field)					\
+	for((var) = TAILQ_FIRST(head);					\
+	    (var) != TAILQ_END(head);					\
+	    (var) = TAILQ_NEXT(var, field))
+#define	TAILQ_INSERT_BEFORE(listelm, elm, field) do {			\
+	(elm)->field.tqe_prev = (listelm)->field.tqe_prev;		\
+	(elm)->field.tqe_next = (listelm);				\
+	*(listelm)->field.tqe_prev = (elm);				\
+	(listelm)->field.tqe_prev = &(elm)->field.tqe_next;		\
+} while (0)
+#define TAILQ_LAST(head, headname) \
+        (*(((struct headname *)((head)->tqh_last))->tqh_last))
+#define TAILQ_EMPTY(head)               ((head)->tqh_first == NULL)
+
+#endif /* TAILQ_FOREACH */
+
+int _evsignal_set_handler(struct event_base *base, int evsignal,
+			  void (*fn)(int));
+int _evsignal_restore_handler(struct event_base *base, int evsignal);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _EVENT_INTERNAL_H_ */
diff --git a/libevent/event.3 b/libevent/event.3
new file mode 100644
index 00000000000..5b33ec64a93
--- /dev/null
+++ b/libevent/event.3
@@ -0,0 +1,624 @@
+.\"	$OpenBSD: event.3,v 1.4 2002/07/12 18:50:48 provos Exp $
+.\"
+.\" Copyright (c) 2000 Artur Grabowski <art@openbsd.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\"
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. The name of the author may not be used to endorse or promote products
+.\"    derived from this software without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+.\" AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+.\" THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.\" EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.\" PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+.\" OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+.\" OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+.\" ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 8, 2000
+.Dt EVENT 3
+.Os
+.Sh NAME
+.Nm event_init ,
+.Nm event_dispatch ,
+.Nm event_loop ,
+.Nm event_loopexit ,
+.Nm event_loopbreak ,
+.Nm event_set ,
+.Nm event_base_dispatch ,
+.Nm event_base_loop ,
+.Nm event_base_loopexit ,
+.Nm event_base_loopbreak ,
+.Nm event_base_set ,
+.Nm event_base_free ,
+.Nm event_add ,
+.Nm event_del ,
+.Nm event_once ,
+.Nm event_base_once ,
+.Nm event_pending ,
+.Nm event_initialized ,
+.Nm event_priority_init ,
+.Nm event_priority_set ,
+.Nm evtimer_set ,
+.Nm evtimer_add ,
+.Nm evtimer_del ,
+.Nm evtimer_pending ,
+.Nm evtimer_initialized ,
+.Nm signal_set ,
+.Nm signal_add ,
+.Nm signal_del ,
+.Nm signal_pending ,
+.Nm signal_initialized ,
+.Nm bufferevent_new ,
+.Nm bufferevent_free ,
+.Nm bufferevent_write ,
+.Nm bufferevent_write_buffer ,
+.Nm bufferevent_read ,
+.Nm bufferevent_enable ,
+.Nm bufferevent_disable ,
+.Nm bufferevent_settimeout ,
+.Nm bufferevent_base_set ,
+.Nm evbuffer_new ,
+.Nm evbuffer_free ,
+.Nm evbuffer_add ,
+.Nm evbuffer_add_buffer ,
+.Nm evbuffer_add_printf ,
+.Nm evbuffer_add_vprintf ,
+.Nm evbuffer_drain ,
+.Nm evbuffer_write ,
+.Nm evbuffer_read ,
+.Nm evbuffer_find ,
+.Nm evbuffer_readline ,
+.Nm evhttp_new ,
+.Nm evhttp_bind_socket ,
+.Nm evhttp_free
+.Nd execute a function when a specific event occurs
+.Sh SYNOPSIS
+.Fd #include <sys/time.h>
+.Fd #include <event.h>
+.Ft "struct event_base *"
+.Fn "event_init" "void"
+.Ft int
+.Fn "event_dispatch" "void"
+.Ft int
+.Fn "event_loop" "int flags"
+.Ft int
+.Fn "event_loopexit" "struct timeval *tv"
+.Ft int
+.Fn "event_loopbreak" "void"
+.Ft void
+.Fn "event_set" "struct event *ev" "int fd" "short event" "void (*fn)(int, short, void *)" "void *arg"
+.Ft int
+.Fn "event_base_dispatch" "struct event_base *base"
+.Ft int
+.Fn "event_base_loop" "struct event_base *base" "int flags"
+.Ft int
+.Fn "event_base_loopexit" "struct event_base *base" "struct timeval *tv"
+.Ft int
+.Fn "event_base_loopbreak" "struct event_base *base"
+.Ft int
+.Fn "event_base_set" "struct event_base *base" "struct event *"
+.Ft void
+.Fn "event_base_free" "struct event_base *base"
+.Ft int
+.Fn "event_add" "struct event *ev" "struct timeval *tv"
+.Ft int
+.Fn "event_del" "struct event *ev"
+.Ft int
+.Fn "event_once" "int fd" "short event" "void (*fn)(int, short, void *)" "void *arg" "struct timeval *tv"
+.Ft int
+.Fn "event_base_once" "struct event_base *base" "int fd" "short event" "void (*fn)(int, short, void *)" "void *arg" "struct timeval *tv"
+.Ft int
+.Fn "event_pending" "struct event *ev" "short event" "struct timeval *tv"
+.Ft int
+.Fn "event_initialized" "struct event *ev"
+.Ft int
+.Fn "event_priority_init" "int npriorities"
+.Ft int
+.Fn "event_priority_set" "struct event *ev" "int priority"
+.Ft void
+.Fn "evtimer_set" "struct event *ev" "void (*fn)(int, short, void *)" "void *arg"
+.Ft void
+.Fn "evtimer_add" "struct event *ev" "struct timeval *"
+.Ft void
+.Fn "evtimer_del" "struct event *ev"
+.Ft int
+.Fn "evtimer_pending" "struct event *ev" "struct timeval *tv"
+.Ft int
+.Fn "evtimer_initialized" "struct event *ev"
+.Ft void
+.Fn "signal_set" "struct event *ev" "int signal" "void (*fn)(int, short, void *)" "void *arg"
+.Ft void
+.Fn "signal_add" "struct event *ev" "struct timeval *"
+.Ft void
+.Fn "signal_del" "struct event *ev"
+.Ft int
+.Fn "signal_pending" "struct event *ev" "struct timeval *tv"
+.Ft int
+.Fn "signal_initialized" "struct event *ev"
+.Ft "struct bufferevent *"
+.Fn "bufferevent_new" "int fd" "evbuffercb readcb" "evbuffercb writecb" "everrorcb" "void *cbarg"
+.Ft void
+.Fn "bufferevent_free" "struct bufferevent *bufev"
+.Ft int
+.Fn "bufferevent_write" "struct bufferevent *bufev" "void *data" "size_t size"
+.Ft int
+.Fn "bufferevent_write_buffer" "struct bufferevent *bufev" "struct evbuffer *buf"
+.Ft size_t
+.Fn "bufferevent_read" "struct bufferevent *bufev" "void *data" "size_t size"
+.Ft int
+.Fn "bufferevent_enable" "struct bufferevent *bufev" "short event"
+.Ft int
+.Fn "bufferevent_disable" "struct bufferevent *bufev" "short event"
+.Ft void
+.Fn "bufferevent_settimeout" "struct bufferevent *bufev" "int timeout_read" "int timeout_write"
+.Ft int
+.Fn "bufferevent_base_set" "struct event_base *base" "struct bufferevent *bufev"
+.Ft "struct evbuffer *"
+.Fn "evbuffer_new" "void"
+.Ft void
+.Fn "evbuffer_free" "struct evbuffer *buf"
+.Ft int
+.Fn "evbuffer_add" "struct evbuffer *buf" "const void *data" "size_t size"
+.Ft int
+.Fn "evbuffer_add_buffer" "struct evbuffer *dst" "struct evbuffer *src"
+.Ft int
+.Fn "evbuffer_add_printf" "struct evbuffer *buf" "const char *fmt" "..."
+.Ft int
+.Fn "evbuffer_add_vprintf" "struct evbuffer *buf" "const char *fmt" "va_list ap"
+.Ft void
+.Fn "evbuffer_drain" "struct evbuffer *buf" "size_t size"
+.Ft int
+.Fn "evbuffer_write" "struct evbuffer *buf" "int fd"
+.Ft int
+.Fn "evbuffer_read" "struct evbuffer *buf" "int fd" "int size"
+.Ft "u_char *"
+.Fn "evbuffer_find" "struct evbuffer *buf" "const u_char *data" "size_t size"
+.Ft "char *"
+.Fn "evbuffer_readline" "struct evbuffer *buf"
+.Ft "struct evhttp *"
+.Fn "evhttp_new" "struct event_base *base"
+.Ft int
+.Fn "evhttp_bind_socket" "struct evhttp *http" "const char *address" "u_short port"
+.Ft "void"
+.Fn "evhttp_free" "struct evhttp *http"
+.Ft int
+.Fa (*event_sigcb)(void) ;
+.Ft volatile sig_atomic_t
+.Fa event_gotsig ;
+.Sh DESCRIPTION
+The
+.Nm event
+API provides a mechanism to execute a function when a specific event
+on a file descriptor occurs or after a given time has passed.
+.Pp
+The
+.Nm event
+API needs to be initialized with
+.Fn event_init
+before it can be used.
+.Pp
+In order to process events, an application needs to call
+.Fn event_dispatch .
+This function only returns on error, and should replace the event core
+of the application program.
+.Pp
+The function
+.Fn event_set
+prepares the event structure
+.Fa ev
+to be used in future calls to
+.Fn event_add
+and
+.Fn event_del .
+The event will be prepared to call the function specified by the
+.Fa fn
+argument with an
+.Fa int
+argument indicating the file descriptor, a
+.Fa short
+argument indicating the type of event, and a
+.Fa void *
+argument given in the
+.Fa arg
+argument.
+The
+.Fa fd
+indicates the file descriptor that should be monitored for events.
+The events can be either
+.Va EV_READ ,
+.Va EV_WRITE ,
+or both,
+indicating that an application can read or write from the file descriptor
+respectively without blocking.
+.Pp
+The function
+.Fa fn
+will be called with the file descriptor that triggered the event and
+the type of event which will be either
+.Va EV_TIMEOUT ,
+.Va EV_SIGNAL ,
+.Va EV_READ ,
+or
+.Va EV_WRITE .
+Additionally, an event which has registered interest in more than one of the
+preceeding events, via bitwise-OR to
+.Fn event_set ,
+can provide its callback function with a bitwise-OR of more than one triggered
+event.
+The additional flag
+.Va EV_PERSIST
+makes an
+.Fn event_add
+persistent until
+.Fn event_del
+has been called.
+.Pp
+Once initialized, the
+.Fa ev
+structure can be used repeatedly with
+.Fn event_add
+and
+.Fn event_del
+and does not need to be reinitialized unless the function called and/or
+the argument to it are to be changed.
+However, when an
+.Fa ev
+structure has been added to libevent using
+.Fn event_add
+the structure must persist until the event occurs (assuming
+.Fa EV_PERSIST
+is not set) or is removed
+using
+.Fn event_del .
+You may not reuse the same
+.Fa ev
+structure for multiple monitored descriptors; each descriptor
+needs its own
+.Fa ev .
+.Pp
+The function
+.Fn event_add
+schedules the execution of the
+.Fa ev
+event when the event specified in
+.Fn event_set
+occurs or in at least the time specified in the
+.Fa tv .
+If
+.Fa tv
+is
+.Dv NULL ,
+no timeout occurs and the function will only be called
+if a matching event occurs on the file descriptor.
+The event in the
+.Fa ev
+argument must be already initialized by
+.Fn event_set
+and may not be used in calls to
+.Fn event_set
+until it has timed out or been removed with
+.Fn event_del .
+If the event in the
+.Fa ev
+argument already has a scheduled timeout, the old timeout will be
+replaced by the new one.
+.Pp
+The function
+.Fn event_del
+will cancel the event in the argument
+.Fa ev .
+If the event has already executed or has never been added
+the call will have no effect.
+.Pp
+The functions
+.Fn evtimer_set ,
+.Fn evtimer_add ,
+.Fn evtimer_del ,
+.Fn evtimer_initialized ,
+and
+.Fn evtimer_pending
+are abbreviations for common situations where only a timeout is required.
+The file descriptor passed will be \-1, and the event type will be
+.Va EV_TIMEOUT .
+.Pp
+The functions
+.Fn signal_set ,
+.Fn signal_add ,
+.Fn signal_del ,
+.Fn signal_initialized ,
+and
+.Fn signal_pending
+are abbreviations.
+The event type will be a persistent
+.Va EV_SIGNAL .
+That means
+.Fn signal_set
+adds
+.Va EV_PERSIST .
+.Pp
+In order to avoid races in signal handlers, the
+.Nm event
+API provides two variables:
+.Va event_sigcb
+and
+.Va event_gotsig .
+A signal handler
+sets
+.Va event_gotsig
+to indicate that a signal has been received.
+The application sets
+.Va event_sigcb
+to a callback function.
+After the signal handler sets
+.Va event_gotsig ,
+.Nm event_dispatch
+will execute the callback function to process received signals.
+The callback returns 1 when no events are registered any more.
+It can return \-1 to indicate an error to the
+.Nm event
+library, causing
+.Fn event_dispatch
+to terminate with
+.Va errno
+set to
+.Er EINTR .
+.Pp
+The function
+.Fn event_once
+is similar to
+.Fn event_set .
+However, it schedules a callback to be called exactly once and does not
+require the caller to prepare an
+.Fa event
+structure.
+This function supports
+.Fa EV_TIMEOUT ,
+.Fa EV_READ ,
+and
+.Fa EV_WRITE .
+.Pp
+The
+.Fn event_pending
+function can be used to check if the event specified by
+.Fa event
+is pending to run.
+If
+.Va EV_TIMEOUT
+was specified and
+.Fa tv
+is not
+.Dv NULL ,
+the expiration time of the event will be returned in
+.Fa tv .
+.Pp
+The
+.Fn event_initialized
+macro can be used to check if an event has been initialized.
+.Pp
+The
+.Nm event_loop
+function provides an interface for single pass execution of pending
+events.
+The flags
+.Va EVLOOP_ONCE
+and
+.Va EVLOOP_NONBLOCK
+are recognized.
+The
+.Nm event_loopexit
+function exits from the event loop. The next
+.Fn event_loop
+iteration after the
+given timer expires will complete normally (handling all queued events) then
+exit without blocking for events again. Subsequent invocations of
+.Fn event_loop
+will proceed normally.
+The
+.Nm event_loopbreak
+function exits from the event loop immediately.
+.Fn event_loop
+will abort after the next event is completed;
+.Fn event_loopbreak
+is typically invoked from this event's callback. This behavior is analogous
+to the "break;" statement. Subsequent invocations of
+.Fn event_loop
+will proceed normally.
+.Pp
+It is the responsibility of the caller to provide these functions with
+pre-allocated event structures.
+.Pp
+.Sh EVENT PRIORITIES
+By default
+.Nm libevent
+schedules all active events with the same priority.
+However, sometimes it is desirable to process some events with a higher
+priority than others.
+For that reason,
+.Nm libevent
+supports strict priority queues.
+Active events with a lower priority are always processed before events
+with a higher priority.
+.Pp
+The number of different priorities can be set initially with the
+.Fn event_priority_init
+function.
+This function should be called before the first call to
+.Fn event_dispatch .
+The
+.Fn event_priority_set
+function can be used to assign a priority to an event.
+By default,
+.Nm libevent
+assigns the middle priority to all events unless their priority
+is explicitly set.
+.Sh THREAD SAFE EVENTS
+.Nm Libevent
+has experimental support for thread-safe events.
+When initializing the library via
+.Fn event_init ,
+an event base is returned.
+This event base can be used in conjunction with calls to
+.Fn event_base_set ,
+.Fn event_base_dispatch ,
+.Fn event_base_loop ,
+.Fn event_base_loopexit ,
+.Fn bufferevent_base_set
+and
+.Fn event_base_free .
+.Fn event_base_set
+should be called after preparing an event with
+.Fn event_set ,
+as
+.Fn event_set
+assigns the provided event to the most recently created event base.
+.Fn bufferevent_base_set
+should be called after preparing a bufferevent with
+.Fn bufferevent_new .
+.Fn event_base_free
+should be used to free memory associated with the event base
+when it is no longer needed.
+.Sh BUFFERED EVENTS
+.Nm libevent
+provides an abstraction on top of the regular event callbacks.
+This abstraction is called a
+.Va "buffered event" .
+A buffered event provides input and output buffers that get filled
+and drained automatically.
+The user of a buffered event no longer deals directly with the IO,
+but instead is reading from input and writing to output buffers.
+.Pp
+A new bufferevent is created by
+.Fn bufferevent_new .
+The parameter
+.Fa fd
+specifies the file descriptor from which data is read and written to.
+This file descriptor is not allowed to be a
+.Xr pipe 2 .
+The next three parameters are callbacks.
+The read and write callback have the following form:
+.Ft void
+.Fn "(*cb)" "struct bufferevent *bufev" "void *arg" .
+The error callback has the following form:
+.Ft void
+.Fn "(*cb)" "struct bufferevent *bufev" "short what" "void *arg" .
+The argument is specified by the fourth parameter
+.Fa "cbarg" .
+A
+.Fa bufferevent struct
+pointer is returned on success, NULL on error.
+Both the read and the write callback may be NULL.
+The error callback has to be always provided.
+.Pp
+Once initialized, the bufferevent structure can be used repeatedly with
+bufferevent_enable() and bufferevent_disable().
+The flags parameter can be a combination of
+.Va EV_READ
+and
+.Va EV_WRITE .
+When read enabled the bufferevent will try to read from the file
+descriptor and call the read callback.
+The write callback is executed
+whenever the output buffer is drained below the write low watermark,
+which is
+.Va 0
+by default.
+.Pp
+The
+.Fn bufferevent_write
+function can be used to write data to the file descriptor.
+The data is appended to the output buffer and written to the descriptor
+automatically as it becomes available for writing.
+.Fn bufferevent_write
+returns 0 on success or \-1 on failure.
+The
+.Fn bufferevent_read
+function is used to read data from the input buffer,
+returning the amount of data read.
+.Pp
+If multiple bases are in use, bufferevent_base_set() must be called before
+enabling the bufferevent for the first time.
+.Sh NON-BLOCKING HTTP SUPPORT
+.Nm libevent
+provides a very thin HTTP layer that can be used both to host an HTTP
+server and also to make HTTP requests.
+An HTTP server can be created by calling
+.Fn evhttp_new .
+It can be bound to any port and address with the
+.Fn evhttp_bind_socket
+function.
+When the HTTP server is no longer used, it can be freed via
+.Fn evhttp_free .
+.Pp
+To be notified of HTTP requests, a user needs to register callbacks with the
+HTTP server.
+This can be done by calling
+.Fn evhttp_set_cb .
+The second argument is the URI for which a callback is being registered.
+The corresponding callback will receive an
+.Va struct evhttp_request
+object that contains all information about the request.
+.Pp
+This section does not document all the possible function calls; please
+check
+.Va event.h
+for the public interfaces.
+.Sh ADDITIONAL NOTES
+It is possible to disable support for
+.Va epoll , kqueue , devpoll , poll
+or
+.Va select
+by setting the environment variable
+.Va EVENT_NOEPOLL , EVENT_NOKQUEUE , EVENT_NODEVPOLL , EVENT_NOPOLL
+or
+.Va EVENT_NOSELECT ,
+respectively.
+By setting the environment variable
+.Va EVENT_SHOW_METHOD ,
+.Nm libevent
+displays the kernel notification method that it uses.
+.Sh RETURN VALUES
+Upon successful completion
+.Fn event_add
+and
+.Fn event_del
+return 0.
+Otherwise, \-1 is returned and the global variable errno is
+set to indicate the error.
+.Sh SEE ALSO
+.Xr kqueue 2 ,
+.Xr poll 2 ,
+.Xr select 2 ,
+.Xr evdns 3 ,
+.Xr timeout 9
+.Sh HISTORY
+The
+.Nm event
+API manpage is based on the
+.Xr timeout 9
+manpage by Artur Grabowski.
+The port of
+.Nm libevent
+to Windows is due to Michael A. Davis.
+Support for real-time signals is due to Taral.
+.Sh AUTHORS
+The
+.Nm event
+library was written by Niels Provos.
+.Sh BUGS
+This documentation is neither complete nor authoritative.
+If you are in doubt about the usage of this API then
+check the source code to find out how it works, write
+up the missing piece of documentation and send it to
+me for inclusion in this man page.
diff --git a/libevent/event.c b/libevent/event.c
new file mode 100644
index 00000000000..6eb5db05c87
--- /dev/null
+++ b/libevent/event.c
@@ -0,0 +1,1025 @@
+/*
+ * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#endif
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else 
+#include <sys/_time.h>
+#endif
+#include <sys/queue.h>
+#include <stdio.h>
+#include <stdlib.h>
+#ifndef WIN32
+#include <unistd.h>
+#endif
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <assert.h>
+#include <time.h>
+
+#include "event.h"
+#include "event-internal.h"
+#include "evutil.h"
+#include "log.h"
+
+#ifdef HAVE_EVENT_PORTS
+extern const struct eventop evportops;
+#endif
+#ifdef HAVE_SELECT
+extern const struct eventop selectops;
+#endif
+#ifdef HAVE_POLL
+extern const struct eventop pollops;
+#endif
+#ifdef HAVE_EPOLL
+extern const struct eventop epollops;
+#endif
+#ifdef HAVE_WORKING_KQUEUE
+extern const struct eventop kqops;
+#endif
+#ifdef HAVE_DEVPOLL
+extern const struct eventop devpollops;
+#endif
+#ifdef WIN32
+extern const struct eventop win32ops;
+#endif
+
+/* In order of preference */
+static const struct eventop *eventops[] = {
+#ifdef HAVE_EVENT_PORTS
+	&evportops,
+#endif
+#ifdef HAVE_WORKING_KQUEUE
+	&kqops,
+#endif
+#ifdef HAVE_EPOLL
+	&epollops,
+#endif
+#ifdef HAVE_DEVPOLL
+	&devpollops,
+#endif
+#ifdef HAVE_POLL
+	&pollops,
+#endif
+#ifdef HAVE_SELECT
+	&selectops,
+#endif
+#ifdef WIN32
+	&win32ops,
+#endif
+	NULL
+};
+
+/* Global state */
+struct event_base *current_base = NULL;
+extern struct event_base *evsignal_base;
+static int use_monotonic;
+
+/* Handle signals - This is a deprecated interface */
+int (*event_sigcb)(void);		/* Signal callback when gotsig is set */
+volatile sig_atomic_t event_gotsig;	/* Set in signal handler */
+
+/* Prototypes */
+static void	event_queue_insert(struct event_base *, struct event *, int);
+static void	event_queue_remove(struct event_base *, struct event *, int);
+static int	event_haveevents(struct event_base *);
+
+static void	event_process_active(struct event_base *);
+
+static int	timeout_next(struct event_base *, struct timeval **);
+static void	timeout_process(struct event_base *);
+static void	timeout_correct(struct event_base *, struct timeval *);
+
+static void
+detect_monotonic(void)
+{
+#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
+	struct timespec	ts;
+
+	if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0)
+		use_monotonic = 1;
+#endif
+}
+
+static int
+gettime(struct event_base *base, struct timeval *tp)
+{
+	if (base->tv_cache.tv_sec) {
+		*tp = base->tv_cache;
+		return (0);
+	}
+
+#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
+	if (use_monotonic) {
+		struct timespec	ts;
+
+		if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+			return (-1);
+
+		tp->tv_sec = ts.tv_sec;
+		tp->tv_usec = ts.tv_nsec / 1000;
+		return (0);
+	}
+#endif
+
+	return (evutil_gettimeofday(tp, NULL));
+}
+
+struct event_base *
+event_init(void)
+{
+	struct event_base *base = event_base_new();
+
+	if (base != NULL)
+		current_base = base;
+
+	return (base);
+}
+
+struct event_base *
+event_base_new(void)
+{
+	int i;
+	struct event_base *base;
+
+	if ((base = calloc(1, sizeof(struct event_base))) == NULL)
+		event_err(1, "%s: calloc", __func__);
+
+	event_sigcb = NULL;
+	event_gotsig = 0;
+
+	detect_monotonic();
+	gettime(base, &base->event_tv);
+	
+	min_heap_ctor(&base->timeheap);
+	TAILQ_INIT(&base->eventqueue);
+	base->sig.ev_signal_pair[0] = -1;
+	base->sig.ev_signal_pair[1] = -1;
+	
+	base->evbase = NULL;
+	for (i = 0; eventops[i] && !base->evbase; i++) {
+		base->evsel = eventops[i];
+
+		base->evbase = base->evsel->init(base);
+	}
+
+	if (base->evbase == NULL)
+		event_errx(1, "%s: no event mechanism available", __func__);
+
+	if (getenv("EVENT_SHOW_METHOD")) 
+		event_msgx("libevent using: %s\n",
+			   base->evsel->name);
+
+	/* allocate a single active event queue */
+	event_base_priority_init(base, 1);
+
+	return (base);
+}
+
+void
+event_base_free(struct event_base *base)
+{
+	int i, n_deleted=0;
+	struct event *ev;
+
+	if (base == NULL && current_base)
+		base = current_base;
+	if (base == current_base)
+		current_base = NULL;
+
+	/* XXX(niels) - check for internal events first */
+	assert(base);
+	/* Delete all non-internal events. */
+	for (ev = TAILQ_FIRST(&base->eventqueue); ev; ) {
+		struct event *next = TAILQ_NEXT(ev, ev_next);
+		if (!(ev->ev_flags & EVLIST_INTERNAL)) {
+			event_del(ev);
+			++n_deleted;
+		}
+		ev = next;
+	}
+	while ((ev = min_heap_top(&base->timeheap)) != NULL) {
+		event_del(ev);
+		++n_deleted;
+	}
+
+	for (i = 0; i < base->nactivequeues; ++i) {
+		for (ev = TAILQ_FIRST(base->activequeues[i]); ev; ) {
+			struct event *next = TAILQ_NEXT(ev, ev_active_next);
+			if (!(ev->ev_flags & EVLIST_INTERNAL)) {
+				event_del(ev);
+				++n_deleted;
+			}
+			ev = next;
+		}
+	}
+
+	if (n_deleted)
+		event_debug(("%s: %d events were still set in base",
+			__func__, n_deleted));
+
+	if (base->evsel->dealloc != NULL)
+		base->evsel->dealloc(base, base->evbase);
+
+	for (i = 0; i < base->nactivequeues; ++i)
+		assert(TAILQ_EMPTY(base->activequeues[i]));
+
+	assert(min_heap_empty(&base->timeheap));
+	min_heap_dtor(&base->timeheap);
+
+	for (i = 0; i < base->nactivequeues; ++i)
+		free(base->activequeues[i]);
+	free(base->activequeues);
+
+	assert(TAILQ_EMPTY(&base->eventqueue));
+
+	free(base);
+}
+
+/* reinitialized the event base after a fork */
+int
+event_reinit(struct event_base *base)
+{
+	const struct eventop *evsel = base->evsel;
+	void *evbase = base->evbase;
+	int res = 0;
+	struct event *ev;
+
+	/* check if this event mechanism requires reinit */
+	if (!evsel->need_reinit)
+		return (0);
+
+	/* prevent internal delete */
+	if (base->sig.ev_signal_added) {
+		/* we cannot call event_del here because the base has
+		 * not been reinitialized yet. */
+		event_queue_remove(base, &base->sig.ev_signal,
+		    EVLIST_INSERTED);
+		if (base->sig.ev_signal.ev_flags & EVLIST_ACTIVE)
+			event_queue_remove(base, &base->sig.ev_signal,
+			    EVLIST_ACTIVE);
+		base->sig.ev_signal_added = 0;
+	}
+	
+	if (base->evsel->dealloc != NULL)
+		base->evsel->dealloc(base, base->evbase);
+	evbase = base->evbase = evsel->init(base);
+	if (base->evbase == NULL)
+		event_errx(1, "%s: could not reinitialize event mechanism",
+		    __func__);
+
+	TAILQ_FOREACH(ev, &base->eventqueue, ev_next) {
+		if (evsel->add(evbase, ev) == -1)
+			res = -1;
+	}
+
+	return (res);
+}
+
+int
+event_priority_init(int npriorities)
+{
+  return event_base_priority_init(current_base, npriorities);
+}
+
+int
+event_base_priority_init(struct event_base *base, int npriorities)
+{
+	int i;
+
+	if (base->event_count_active)
+		return (-1);
+
+	if (base->nactivequeues && npriorities != base->nactivequeues) {
+		for (i = 0; i < base->nactivequeues; ++i) {
+			free(base->activequeues[i]);
+		}
+		free(base->activequeues);
+	}
+
+	/* Allocate our priority queues */
+	base->nactivequeues = npriorities;
+	base->activequeues = (struct event_list **)calloc(base->nactivequeues,
+	    npriorities * sizeof(struct event_list *));
+	if (base->activequeues == NULL)
+		event_err(1, "%s: calloc", __func__);
+
+	for (i = 0; i < base->nactivequeues; ++i) {
+		base->activequeues[i] = malloc(sizeof(struct event_list));
+		if (base->activequeues[i] == NULL)
+			event_err(1, "%s: malloc", __func__);
+		TAILQ_INIT(base->activequeues[i]);
+	}
+
+	return (0);
+}
+
+int
+event_haveevents(struct event_base *base)
+{
+	return (base->event_count > 0);
+}
+
+/*
+ * Active events are stored in priority queues.  Lower priorities are always
+ * process before higher priorities.  Low priority events can starve high
+ * priority ones.
+ */
+
+static void
+event_process_active(struct event_base *base)
+{
+	struct event *ev;
+	struct event_list *activeq = NULL;
+	int i;
+	short ncalls;
+
+	for (i = 0; i < base->nactivequeues; ++i) {
+		if (TAILQ_FIRST(base->activequeues[i]) != NULL) {
+			activeq = base->activequeues[i];
+			break;
+		}
+	}
+
+	assert(activeq != NULL);
+
+	for (ev = TAILQ_FIRST(activeq); ev; ev = TAILQ_FIRST(activeq)) {
+		if (ev->ev_events & EV_PERSIST)
+			event_queue_remove(base, ev, EVLIST_ACTIVE);
+		else
+			event_del(ev);
+		
+		/* Allows deletes to work */
+		ncalls = ev->ev_ncalls;
+		ev->ev_pncalls = &ncalls;
+		while (ncalls) {
+			ncalls--;
+			ev->ev_ncalls = ncalls;
+			(*ev->ev_callback)((int)ev->ev_fd, ev->ev_res, ev->ev_arg);
+			if (event_gotsig || base->event_break)
+				return;
+		}
+	}
+}
+
+/*
+ * Wait continously for events.  We exit only if no events are left.
+ */
+
+int
+event_dispatch(void)
+{
+	return (event_loop(0));
+}
+
+int
+event_base_dispatch(struct event_base *event_base)
+{
+  return (event_base_loop(event_base, 0));
+}
+
+const char *
+event_base_get_method(struct event_base *base)
+{
+	assert(base);
+	return (base->evsel->name);
+}
+
+static void
+event_loopexit_cb(int fd, short what, void *arg)
+{
+	struct event_base *base = arg;
+	base->event_gotterm = 1;
+}
+
+/* not thread safe */
+int
+event_loopexit(const struct timeval *tv)
+{
+	return (event_once(-1, EV_TIMEOUT, event_loopexit_cb,
+		    current_base, tv));
+}
+
+int
+event_base_loopexit(struct event_base *event_base, const struct timeval *tv)
+{
+	return (event_base_once(event_base, -1, EV_TIMEOUT, event_loopexit_cb,
+		    event_base, tv));
+}
+
+/* not thread safe */
+int
+event_loopbreak(void)
+{
+	return (event_base_loopbreak(current_base));
+}
+
+int
+event_base_loopbreak(struct event_base *event_base)
+{
+	if (event_base == NULL)
+		return (-1);
+
+	event_base->event_break = 1;
+	return (0);
+}
+
+
+
+/* not thread safe */
+
+int
+event_loop(int flags)
+{
+	return event_base_loop(current_base, flags);
+}
+
+int
+event_base_loop(struct event_base *base, int flags)
+{
+	const struct eventop *evsel = base->evsel;
+	void *evbase = base->evbase;
+	struct timeval tv;
+	struct timeval *tv_p;
+	int res, done;
+
+	/* clear time cache */
+	base->tv_cache.tv_sec = 0;
+
+	if (base->sig.ev_signal_added)
+		evsignal_base = base;
+	done = 0;
+	while (!done) {
+		/* Terminate the loop if we have been asked to */
+		if (base->event_gotterm) {
+			base->event_gotterm = 0;
+			break;
+		}
+
+		if (base->event_break) {
+			base->event_break = 0;
+			break;
+		}
+
+		/* You cannot use this interface for multi-threaded apps */
+		while (event_gotsig) {
+			event_gotsig = 0;
+			if (event_sigcb) {
+				res = (*event_sigcb)();
+				if (res == -1) {
+					errno = EINTR;
+					return (-1);
+				}
+			}
+		}
+
+		timeout_correct(base, &tv);
+
+		tv_p = &tv;
+		if (!base->event_count_active && !(flags & EVLOOP_NONBLOCK)) {
+			timeout_next(base, &tv_p);
+		} else {
+			/* 
+			 * if we have active events, we just poll new events
+			 * without waiting.
+			 */
+			evutil_timerclear(&tv);
+		}
+		
+		/* If we have no events, we just exit */
+		if (!event_haveevents(base)) {
+			event_debug(("%s: no events registered.", __func__));
+			return (1);
+		}
+
+		/* update last old time */
+		gettime(base, &base->event_tv);
+
+		/* clear time cache */
+		base->tv_cache.tv_sec = 0;
+
+		res = evsel->dispatch(base, evbase, tv_p);
+
+		if (res == -1)
+			return (-1);
+		gettime(base, &base->tv_cache);
+
+		timeout_process(base);
+
+		if (base->event_count_active) {
+			event_process_active(base);
+			if (!base->event_count_active && (flags & EVLOOP_ONCE))
+				done = 1;
+		} else if (flags & EVLOOP_NONBLOCK)
+			done = 1;
+	}
+
+	/* clear time cache */
+	base->tv_cache.tv_sec = 0;
+
+	event_debug(("%s: asked to terminate loop.", __func__));
+	return (0);
+}
+
+/* Sets up an event for processing once */
+
+struct event_once {
+	struct event ev;
+
+	void (*cb)(int, short, void *);
+	void *arg;
+};
+
+/* One-time callback, it deletes itself */
+
+static void
+event_once_cb(int fd, short events, void *arg)
+{
+	struct event_once *eonce = arg;
+
+	(*eonce->cb)(fd, events, eonce->arg);
+	free(eonce);
+}
+
+/* not threadsafe, event scheduled once. */
+int
+event_once(int fd, short events,
+    void (*callback)(int, short, void *), void *arg, const struct timeval *tv)
+{
+	return event_base_once(current_base, fd, events, callback, arg, tv);
+}
+
+/* Schedules an event once */
+int
+event_base_once(struct event_base *base, int fd, short events,
+    void (*callback)(int, short, void *), void *arg, const struct timeval *tv)
+{
+	struct event_once *eonce;
+	struct timeval etv;
+	int res;
+
+	/* We cannot support signals that just fire once */
+	if (events & EV_SIGNAL)
+		return (-1);
+
+	if ((eonce = calloc(1, sizeof(struct event_once))) == NULL)
+		return (-1);
+
+	eonce->cb = callback;
+	eonce->arg = arg;
+
+	if (events == EV_TIMEOUT) {
+		if (tv == NULL) {
+			evutil_timerclear(&etv);
+			tv = &etv;
+		}
+
+		evtimer_set(&eonce->ev, event_once_cb, eonce);
+	} else if (events & (EV_READ|EV_WRITE)) {
+		events &= EV_READ|EV_WRITE;
+
+		event_set(&eonce->ev, fd, events, event_once_cb, eonce);
+	} else {
+		/* Bad event combination */
+		free(eonce);
+		return (-1);
+	}
+
+	res = event_base_set(base, &eonce->ev);
+	if (res == 0)
+		res = event_add(&eonce->ev, tv);
+	if (res != 0) {
+		free(eonce);
+		return (res);
+	}
+
+	return (0);
+}
+
+void
+event_set(struct event *ev, int fd, short events,
+	  void (*callback)(int, short, void *), void *arg)
+{
+	/* Take the current base - caller needs to set the real base later */
+	ev->ev_base = current_base;
+
+	ev->ev_callback = callback;
+	ev->ev_arg = arg;
+	ev->ev_fd = fd;
+	ev->ev_events = events;
+	ev->ev_res = 0;
+	ev->ev_flags = EVLIST_INIT;
+	ev->ev_ncalls = 0;
+	ev->ev_pncalls = NULL;
+
+	min_heap_elem_init(ev);
+
+	/* by default, we put new events into the middle priority */
+	if(current_base)
+		ev->ev_pri = current_base->nactivequeues/2;
+}
+
+int
+event_base_set(struct event_base *base, struct event *ev)
+{
+	/* Only innocent events may be assigned to a different base */
+	if (ev->ev_flags != EVLIST_INIT)
+		return (-1);
+
+	ev->ev_base = base;
+	ev->ev_pri = base->nactivequeues/2;
+
+	return (0);
+}
+
+/*
+ * Set's the priority of an event - if an event is already scheduled
+ * changing the priority is going to fail.
+ */
+
+int
+event_priority_set(struct event *ev, int pri)
+{
+	if (ev->ev_flags & EVLIST_ACTIVE)
+		return (-1);
+	if (pri < 0 || pri >= ev->ev_base->nactivequeues)
+		return (-1);
+
+	ev->ev_pri = pri;
+
+	return (0);
+}
+
+/*
+ * Checks if a specific event is pending or scheduled.
+ */
+
+int
+event_pending(struct event *ev, short event, struct timeval *tv)
+{
+	struct timeval	now, res;
+	int flags = 0;
+
+	if (ev->ev_flags & EVLIST_INSERTED)
+		flags |= (ev->ev_events & (EV_READ|EV_WRITE|EV_SIGNAL));
+	if (ev->ev_flags & EVLIST_ACTIVE)
+		flags |= ev->ev_res;
+	if (ev->ev_flags & EVLIST_TIMEOUT)
+		flags |= EV_TIMEOUT;
+
+	event &= (EV_TIMEOUT|EV_READ|EV_WRITE|EV_SIGNAL);
+
+	/* See if there is a timeout that we should report */
+	if (tv != NULL && (flags & event & EV_TIMEOUT)) {
+		gettime(ev->ev_base, &now);
+		evutil_timersub(&ev->ev_timeout, &now, &res);
+		/* correctly remap to real time */
+		evutil_gettimeofday(&now, NULL);
+		evutil_timeradd(&now, &res, tv);
+	}
+
+	return (flags & event);
+}
+
+int
+event_add(struct event *ev, const struct timeval *tv)
+{
+	struct event_base *base = ev->ev_base;
+	const struct eventop *evsel = base->evsel;
+	void *evbase = base->evbase;
+	int res = 0;
+
+	event_debug((
+		 "event_add: event: %p, %s%s%scall %p",
+		 ev,
+		 ev->ev_events & EV_READ ? "EV_READ " : " ",
+		 ev->ev_events & EV_WRITE ? "EV_WRITE " : " ",
+		 tv ? "EV_TIMEOUT " : " ",
+		 ev->ev_callback));
+
+	assert(!(ev->ev_flags & ~EVLIST_ALL));
+
+	/*
+	 * prepare for timeout insertion further below, if we get a
+	 * failure on any step, we should not change any state.
+	 */
+	if (tv != NULL && !(ev->ev_flags & EVLIST_TIMEOUT)) {
+		if (min_heap_reserve(&base->timeheap,
+			1 + min_heap_size(&base->timeheap)) == -1)
+			return (-1);  /* ENOMEM == errno */
+	}
+
+	if ((ev->ev_events & (EV_READ|EV_WRITE|EV_SIGNAL)) &&
+	    !(ev->ev_flags & (EVLIST_INSERTED|EVLIST_ACTIVE))) {
+		res = evsel->add(evbase, ev);
+		if (res != -1)
+			event_queue_insert(base, ev, EVLIST_INSERTED);
+	}
+
+	/* 
+	 * we should change the timout state only if the previous event
+	 * addition succeeded.
+	 */
+	if (res != -1 && tv != NULL) {
+		struct timeval now;
+
+		/* 
+		 * we already reserved memory above for the case where we
+		 * are not replacing an exisiting timeout.
+		 */
+		if (ev->ev_flags & EVLIST_TIMEOUT)
+			event_queue_remove(base, ev, EVLIST_TIMEOUT);
+
+		/* Check if it is active due to a timeout.  Rescheduling
+		 * this timeout before the callback can be executed
+		 * removes it from the active list. */
+		if ((ev->ev_flags & EVLIST_ACTIVE) &&
+		    (ev->ev_res & EV_TIMEOUT)) {
+			/* See if we are just active executing this
+			 * event in a loop
+			 */
+			if (ev->ev_ncalls && ev->ev_pncalls) {
+				/* Abort loop */
+				*ev->ev_pncalls = 0;
+			}
+			
+			event_queue_remove(base, ev, EVLIST_ACTIVE);
+		}
+
+		gettime(base, &now);
+		evutil_timeradd(&now, tv, &ev->ev_timeout);
+
+		event_debug((
+			 "event_add: timeout in %ld seconds, call %p",
+			 tv->tv_sec, ev->ev_callback));
+
+		event_queue_insert(base, ev, EVLIST_TIMEOUT);
+	}
+
+	return (res);
+}
+
+int
+event_del(struct event *ev)
+{
+	struct event_base *base;
+	const struct eventop *evsel;
+	void *evbase;
+
+	event_debug(("event_del: %p, callback %p",
+		 ev, ev->ev_callback));
+
+	/* An event without a base has not been added */
+	if (ev->ev_base == NULL)
+		return (-1);
+
+	base = ev->ev_base;
+	evsel = base->evsel;
+	evbase = base->evbase;
+
+	assert(!(ev->ev_flags & ~EVLIST_ALL));
+
+	/* See if we are just active executing this event in a loop */
+	if (ev->ev_ncalls && ev->ev_pncalls) {
+		/* Abort loop */
+		*ev->ev_pncalls = 0;
+	}
+
+	if (ev->ev_flags & EVLIST_TIMEOUT)
+		event_queue_remove(base, ev, EVLIST_TIMEOUT);
+
+	if (ev->ev_flags & EVLIST_ACTIVE)
+		event_queue_remove(base, ev, EVLIST_ACTIVE);
+
+	if (ev->ev_flags & EVLIST_INSERTED) {
+		event_queue_remove(base, ev, EVLIST_INSERTED);
+		return (evsel->del(evbase, ev));
+	}
+
+	return (0);
+}
+
+void
+event_active(struct event *ev, int res, short ncalls)
+{
+	/* We get different kinds of events, add them together */
+	if (ev->ev_flags & EVLIST_ACTIVE) {
+		ev->ev_res |= res;
+		return;
+	}
+
+	ev->ev_res = res;
+	ev->ev_ncalls = ncalls;
+	ev->ev_pncalls = NULL;
+	event_queue_insert(ev->ev_base, ev, EVLIST_ACTIVE);
+}
+
+static int
+timeout_next(struct event_base *base, struct timeval **tv_p)
+{
+	struct timeval now;
+	struct event *ev;
+	struct timeval *tv = *tv_p;
+
+	if ((ev = min_heap_top(&base->timeheap)) == NULL) {
+		/* if no time-based events are active wait for I/O */
+		*tv_p = NULL;
+		return (0);
+	}
+
+	if (gettime(base, &now) == -1)
+		return (-1);
+
+	if (evutil_timercmp(&ev->ev_timeout, &now, <=)) {
+		evutil_timerclear(tv);
+		return (0);
+	}
+
+	evutil_timersub(&ev->ev_timeout, &now, tv);
+
+	assert(tv->tv_sec >= 0);
+	assert(tv->tv_usec >= 0);
+
+	event_debug(("timeout_next: in %ld seconds", tv->tv_sec));
+	return (0);
+}
+
+/*
+ * Determines if the time is running backwards by comparing the current
+ * time against the last time we checked.  Not needed when using clock
+ * monotonic.
+ */
+
+static void
+timeout_correct(struct event_base *base, struct timeval *tv)
+{
+	struct event **pev;
+	unsigned int size;
+	struct timeval off;
+
+	if (use_monotonic)
+		return;
+
+	/* Check if time is running backwards */
+	gettime(base, tv);
+	if (evutil_timercmp(tv, &base->event_tv, >=)) {
+		base->event_tv = *tv;
+		return;
+	}
+
+	event_debug(("%s: time is running backwards, corrected",
+		    __func__));
+	evutil_timersub(&base->event_tv, tv, &off);
+
+	/*
+	 * We can modify the key element of the node without destroying
+	 * the key, beause we apply it to all in the right order.
+	 */
+	pev = base->timeheap.p;
+	size = base->timeheap.n;
+	for (; size-- > 0; ++pev) {
+		struct timeval *ev_tv = &(**pev).ev_timeout;
+		evutil_timersub(ev_tv, &off, ev_tv);
+	}
+	/* Now remember what the new time turned out to be. */
+	base->event_tv = *tv;
+}
+
+void
+timeout_process(struct event_base *base)
+{
+	struct timeval now;
+	struct event *ev;
+
+	if (min_heap_empty(&base->timeheap))
+		return;
+
+	gettime(base, &now);
+
+	while ((ev = min_heap_top(&base->timeheap))) {
+		if (evutil_timercmp(&ev->ev_timeout, &now, >))
+			break;
+
+		/* delete this event from the I/O queues */
+		event_del(ev);
+
+		event_debug(("timeout_process: call %p",
+			 ev->ev_callback));
+		event_active(ev, EV_TIMEOUT, 1);
+	}
+}
+
+void
+event_queue_remove(struct event_base *base, struct event *ev, int queue)
+{
+	if (!(ev->ev_flags & queue))
+		event_errx(1, "%s: %p(fd %d) not on queue %x", __func__,
+			   ev, ev->ev_fd, queue);
+
+	if (~ev->ev_flags & EVLIST_INTERNAL)
+		base->event_count--;
+
+	ev->ev_flags &= ~queue;
+	switch (queue) {
+	case EVLIST_INSERTED:
+		TAILQ_REMOVE(&base->eventqueue, ev, ev_next);
+		break;
+	case EVLIST_ACTIVE:
+		base->event_count_active--;
+		TAILQ_REMOVE(base->activequeues[ev->ev_pri],
+		    ev, ev_active_next);
+		break;
+	case EVLIST_TIMEOUT:
+		min_heap_erase(&base->timeheap, ev);
+		break;
+	default:
+		event_errx(1, "%s: unknown queue %x", __func__, queue);
+	}
+}
+
+void
+event_queue_insert(struct event_base *base, struct event *ev, int queue)
+{
+	if (ev->ev_flags & queue) {
+		/* Double insertion is possible for active events */
+		if (queue & EVLIST_ACTIVE)
+			return;
+
+		event_errx(1, "%s: %p(fd %d) already on queue %x", __func__,
+			   ev, ev->ev_fd, queue);
+	}
+
+	if (~ev->ev_flags & EVLIST_INTERNAL)
+		base->event_count++;
+
+	ev->ev_flags |= queue;
+	switch (queue) {
+	case EVLIST_INSERTED:
+		TAILQ_INSERT_TAIL(&base->eventqueue, ev, ev_next);
+		break;
+	case EVLIST_ACTIVE:
+		base->event_count_active++;
+		TAILQ_INSERT_TAIL(base->activequeues[ev->ev_pri],
+		    ev,ev_active_next);
+		break;
+	case EVLIST_TIMEOUT: {
+		min_heap_push(&base->timeheap, ev);
+		break;
+	}
+	default:
+		event_errx(1, "%s: unknown queue %x", __func__, queue);
+	}
+}
+
+/* Functions for debugging */
+
+const char *
+event_get_version(void)
+{
+	return (VERSION);
+}
+
+/* 
+ * No thread-safe interface needed - the information should be the same
+ * for all threads.
+ */
+
+const char *
+event_get_method(void)
+{
+	return (current_base->evsel->name);
+}
diff --git a/libevent/event.h b/libevent/event.h
new file mode 100644
index 00000000000..039e4f88bcb
--- /dev/null
+++ b/libevent/event.h
@@ -0,0 +1,1175 @@
+/*
+ * Copyright (c) 2000-2007 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVENT_H_
+#define _EVENT_H_
+
+/** @mainpage
+
+  @section intro Introduction
+
+  libevent is an event notification library for developing scalable network
+  servers.  The libevent API provides a mechanism to execute a callback
+  function when a specific event occurs on a file descriptor or after a
+  timeout has been reached. Furthermore, libevent also support callbacks due
+  to signals or regular timeouts.
+
+  libevent is meant to replace the event loop found in event driven network
+  servers. An application just needs to call event_dispatch() and then add or
+  remove events dynamically without having to change the event loop.
+
+  Currently, libevent supports /dev/poll, kqueue(2), select(2), poll(2) and
+  epoll(4). It also has experimental support for real-time signals. The
+  internal event mechanism is completely independent of the exposed event API,
+  and a simple update of libevent can provide new functionality without having
+  to redesign the applications. As a result, Libevent allows for portable
+  application development and provides the most scalable event notification
+  mechanism available on an operating system. Libevent can also be used for
+  multi-threaded aplications; see Steven Grimm's explanation. Libevent should
+  compile on Linux, *BSD, Mac OS X, Solaris and Windows.
+
+  @section usage Standard usage
+
+  Every program that uses libevent must include the <event.h> header, and pass
+  the -levent flag to the linker.  Before using any of the functions in the
+  library, you must call event_init() or event_base_new() to perform one-time
+  initialization of the libevent library.
+
+  @section event Event notification
+
+  For each file descriptor that you wish to monitor, you must declare an event
+  structure and call event_set() to initialize the members of the structure.
+  To enable notification, you add the structure to the list of monitored
+  events by calling event_add().  The event structure must remain allocated as
+  long as it is active, so it should be allocated on the heap. Finally, you
+  call event_dispatch() to loop and dispatch events.
+
+  @section bufferevent I/O Buffers
+
+  libevent provides an abstraction on top of the regular event callbacks. This
+  abstraction is called a buffered event. A buffered event provides input and
+  output buffers that get filled and drained automatically. The user of a
+  buffered event no longer deals directly with the I/O, but instead is reading
+  from input and writing to output buffers.
+
+  Once initialized via bufferevent_new(), the bufferevent structure can be
+  used repeatedly with bufferevent_enable() and bufferevent_disable().
+  Instead of reading and writing directly to a socket, you would call
+  bufferevent_read() and bufferevent_write().
+
+  When read enabled the bufferevent will try to read from the file descriptor
+  and call the read callback. The write callback is executed whenever the
+  output buffer is drained below the write low watermark, which is 0 by
+  default.
+
+  @section timers Timers
+
+  libevent can also be used to create timers that invoke a callback after a
+  certain amount of time has expired. The evtimer_set() function prepares an
+  event struct to be used as a timer. To activate the timer, call
+  evtimer_add(). Timers can be deactivated by calling evtimer_del().
+
+  @section timeouts Timeouts
+
+  In addition to simple timers, libevent can assign timeout events to file
+  descriptors that are triggered whenever a certain amount of time has passed
+  with no activity on a file descriptor.  The timeout_set() function
+  initializes an event struct for use as a timeout. Once initialized, the
+  event must be activated by using timeout_add().  To cancel the timeout, call
+  timeout_del().
+
+  @section evdns Asynchronous DNS resolution
+
+  libevent provides an asynchronous DNS resolver that should be used instead
+  of the standard DNS resolver functions.  These functions can be imported by
+  including the <evdns.h> header in your program. Before using any of the
+  resolver functions, you must call evdns_init() to initialize the library. To
+  convert a hostname to an IP address, you call the evdns_resolve_ipv4()
+  function.  To perform a reverse lookup, you would call the
+  evdns_resolve_reverse() function.  All of these functions use callbacks to
+  avoid blocking while the lookup is performed.
+
+  @section evhttp Event-driven HTTP servers
+
+  libevent provides a very simple event-driven HTTP server that can be
+  embedded in your program and used to service HTTP requests.
+
+  To use this capability, you need to include the <evhttp.h> header in your
+  program.  You create the server by calling evhttp_new(). Add addresses and
+  ports to listen on with evhttp_bind_socket(). You then register one or more
+  callbacks to handle incoming requests.  Each URI can be assigned a callback
+  via the evhttp_set_cb() function.  A generic callback function can also be
+  registered via evhttp_set_gencb(); this callback will be invoked if no other
+  callbacks have been registered for a given URI.
+
+  @section evrpc A framework for RPC servers and clients
+ 
+  libevents provides a framework for creating RPC servers and clients.  It
+  takes care of marshaling and unmarshaling all data structures.
+
+  @section api API Reference
+
+  To browse the complete documentation of the libevent API, click on any of
+  the following links.
+
+  event.h
+  The primary libevent header
+
+  evdns.h
+  Asynchronous DNS resolution
+
+  evhttp.h
+  An embedded libevent-based HTTP server
+
+  evrpc.h
+  A framework for creating RPC servers and clients
+
+ */
+
+/** @file event.h
+
+  A library for writing event-driven network servers
+
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <config.h>
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+#include <stdarg.h>
+
+/* For int types. */
+#include <evutil.h>
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+typedef unsigned char u_char;
+typedef unsigned short u_short;
+#endif
+
+#define EVLIST_TIMEOUT	0x01
+#define EVLIST_INSERTED	0x02
+#define EVLIST_SIGNAL	0x04
+#define EVLIST_ACTIVE	0x08
+#define EVLIST_INTERNAL	0x10
+#define EVLIST_INIT	0x80
+
+/* EVLIST_X_ Private space: 0x1000-0xf000 */
+#define EVLIST_ALL	(0xf000 | 0x9f)
+
+#define EV_TIMEOUT	0x01
+#define EV_READ		0x02
+#define EV_WRITE	0x04
+#define EV_SIGNAL	0x08
+#define EV_PERSIST	0x10	/* Persistant event */
+
+/* Fix so that ppl dont have to run with <sys/queue.h> */
+#ifndef TAILQ_ENTRY
+#define _EVENT_DEFINED_TQENTRY
+#define TAILQ_ENTRY(type)						\
+struct {								\
+	struct type *tqe_next;	/* next element */			\
+	struct type **tqe_prev;	/* address of previous next element */	\
+}
+#endif /* !TAILQ_ENTRY */
+
+struct event_base;
+struct event {
+	TAILQ_ENTRY (event) ev_next;
+	TAILQ_ENTRY (event) ev_active_next;
+	TAILQ_ENTRY (event) ev_signal_next;
+	unsigned int min_heap_idx;	/* for managing timeouts */
+
+	struct event_base *ev_base;
+
+	int ev_fd;
+	short ev_events;
+	short ev_ncalls;
+	short *ev_pncalls;	/* Allows deletes in callback */
+
+	struct timeval ev_timeout;
+
+	int ev_pri;		/* smaller numbers are higher priority */
+
+	void (*ev_callback)(int, short, void *arg);
+	void *ev_arg;
+
+	int ev_res;		/* result passed to event callback */
+	int ev_flags;
+};
+
+#define EVENT_SIGNAL(ev)	(int)(ev)->ev_fd
+#define EVENT_FD(ev)		(int)(ev)->ev_fd
+
+/*
+ * Key-Value pairs.  Can be used for HTTP headers but also for
+ * query argument parsing.
+ */
+struct evkeyval {
+	TAILQ_ENTRY(evkeyval) next;
+
+	char *key;
+	char *value;
+};
+
+#ifdef _EVENT_DEFINED_TQENTRY
+#undef TAILQ_ENTRY
+struct event_list;
+struct evkeyvalq;
+#undef _EVENT_DEFINED_TQENTRY
+#else
+TAILQ_HEAD (event_list, event);
+TAILQ_HEAD (evkeyvalq, evkeyval);
+#endif /* _EVENT_DEFINED_TQENTRY */
+
+/**
+  Initialize the event API.
+
+  Use event_base_new() to initialize a new event base, but does not set
+  the current_base global.   If using only event_base_new(), each event
+  added must have an event base set with event_base_set()
+
+  @see event_base_set(), event_base_free(), event_init()
+ */
+struct event_base *event_base_new(void);
+
+/**
+  Initialize the event API.
+
+  The event API needs to be initialized with event_init() before it can be
+  used.  Sets the current_base global representing the default base for
+  events that have no base associated with them.
+
+  @see event_base_set(), event_base_new()
+ */
+struct event_base *event_init(void);
+
+/**
+  Reinitialized the event base after a fork
+
+  Some event mechanisms do not survive across fork.   The event base needs
+  to be reinitialized with the event_reinit() function.
+
+  @param base the event base that needs to be re-initialized
+  @return 0 if successful, or -1 if some events could not be re-added.
+  @see event_base_new(), event_init()
+*/
+int event_reinit(struct event_base *base);
+
+/**
+  Loop to process events.
+
+  In order to process events, an application needs to call
+  event_dispatch().  This function only returns on error, and should
+  replace the event core of the application program.
+
+  @see event_base_dispatch()
+ */
+int event_dispatch(void);
+
+
+/**
+  Threadsafe event dispatching loop.
+
+  @param eb the event_base structure returned by event_init()
+  @see event_init(), event_dispatch()
+ */
+int event_base_dispatch(struct event_base *);
+
+
+/**
+ Get the kernel event notification mechanism used by libevent.
+ 
+ @param eb the event_base structure returned by event_base_new()
+ @return a string identifying the kernel event mechanism (kqueue, epoll, etc.)
+ */
+const char *event_base_get_method(struct event_base *);
+        
+        
+/**
+  Deallocate all memory associated with an event_base, and free the base.
+
+  Note that this function will not close any fds or free any memory passed
+  to event_set as the argument to callback.
+
+  @param eb an event_base to be freed
+ */
+void event_base_free(struct event_base *);
+
+
+#define _EVENT_LOG_DEBUG 0
+#define _EVENT_LOG_MSG   1
+#define _EVENT_LOG_WARN  2
+#define _EVENT_LOG_ERR   3
+typedef void (*event_log_cb)(int severity, const char *msg);
+/**
+  Redirect libevent's log messages.
+
+  @param cb a function taking two arguments: an integer severity between
+     _EVENT_LOG_DEBUG and _EVENT_LOG_ERR, and a string.  If cb is NULL,
+	 then the default log is used.
+  */
+void event_set_log_callback(event_log_cb cb);
+
+/**
+  Associate a different event base with an event.
+
+  @param eb the event base
+  @param ev the event
+ */
+int event_base_set(struct event_base *, struct event *);
+
+/**
+ event_loop() flags
+ */
+/*@{*/
+#define EVLOOP_ONCE	0x01	/**< Block at most once. */
+#define EVLOOP_NONBLOCK	0x02	/**< Do not block. */
+/*@}*/
+
+/**
+  Handle events.
+
+  This is a more flexible version of event_dispatch().
+
+  @param flags any combination of EVLOOP_ONCE | EVLOOP_NONBLOCK
+  @return 0 if successful, -1 if an error occurred, or 1 if no events were
+    registered.
+  @see event_loopexit(), event_base_loop()
+*/
+int event_loop(int);
+
+/**
+  Handle events (threadsafe version).
+
+  This is a more flexible version of event_base_dispatch().
+
+  @param eb the event_base structure returned by event_init()
+  @param flags any combination of EVLOOP_ONCE | EVLOOP_NONBLOCK
+  @return 0 if successful, -1 if an error occurred, or 1 if no events were
+    registered.
+  @see event_loopexit(), event_base_loop()
+  */
+int event_base_loop(struct event_base *, int);
+
+/**
+  Exit the event loop after the specified time.
+
+  The next event_loop() iteration after the given timer expires will
+  complete normally (handling all queued events) then exit without
+  blocking for events again.
+
+  Subsequent invocations of event_loop() will proceed normally.
+
+  @param tv the amount of time after which the loop should terminate.
+  @return 0 if successful, or -1 if an error occurred
+  @see event_loop(), event_base_loop(), event_base_loopexit()
+  */
+int event_loopexit(const struct timeval *);
+
+
+/**
+  Exit the event loop after the specified time (threadsafe variant).
+
+  The next event_base_loop() iteration after the given timer expires will
+  complete normally (handling all queued events) then exit without
+  blocking for events again.
+
+  Subsequent invocations of event_base_loop() will proceed normally.
+
+  @param eb the event_base structure returned by event_init()
+  @param tv the amount of time after which the loop should terminate.
+  @return 0 if successful, or -1 if an error occurred
+  @see event_loopexit()
+ */
+int event_base_loopexit(struct event_base *, const struct timeval *);
+
+/**
+  Abort the active event_loop() immediately.
+
+  event_loop() will abort the loop after the next event is completed;
+  event_loopbreak() is typically invoked from this event's callback.
+  This behavior is analogous to the "break;" statement.
+
+  Subsequent invocations of event_loop() will proceed normally.
+
+  @return 0 if successful, or -1 if an error occurred
+  @see event_base_loopbreak(), event_loopexit()
+ */
+int event_loopbreak(void);
+
+/**
+  Abort the active event_base_loop() immediately.
+
+  event_base_loop() will abort the loop after the next event is completed;
+  event_base_loopbreak() is typically invoked from this event's callback.
+  This behavior is analogous to the "break;" statement.
+
+  Subsequent invocations of event_loop() will proceed normally.
+
+  @param eb the event_base structure returned by event_init()
+  @return 0 if successful, or -1 if an error occurred
+  @see event_base_loopexit
+ */
+int event_base_loopbreak(struct event_base *);
+
+
+/**
+  Add a timer event.
+
+  @param ev the event struct
+  @param tv timeval struct
+ */
+#define evtimer_add(ev, tv)		event_add(ev, tv)
+
+
+/**
+  Define a timer event.
+
+  @param ev event struct to be modified
+  @param cb callback function
+  @param arg argument that will be passed to the callback function
+ */
+#define evtimer_set(ev, cb, arg)	event_set(ev, -1, 0, cb, arg)
+
+
+/**
+ * Delete a timer event.
+ *
+ * @param ev the event struct to be disabled
+ */
+#define evtimer_del(ev)			event_del(ev)
+#define evtimer_pending(ev, tv)		event_pending(ev, EV_TIMEOUT, tv)
+#define evtimer_initialized(ev)		((ev)->ev_flags & EVLIST_INIT)
+
+/**
+ * Add a timeout event.
+ *
+ * @param ev the event struct to be disabled
+ * @param tv the timeout value, in seconds
+ */
+#define timeout_add(ev, tv)		event_add(ev, tv)
+
+
+/**
+ * Define a timeout event.
+ *
+ * @param ev the event struct to be defined
+ * @param cb the callback to be invoked when the timeout expires
+ * @param arg the argument to be passed to the callback
+ */
+#define timeout_set(ev, cb, arg)	event_set(ev, -1, 0, cb, arg)
+
+
+/**
+ * Disable a timeout event.
+ *
+ * @param ev the timeout event to be disabled
+ */
+#define timeout_del(ev)			event_del(ev)
+
+#define timeout_pending(ev, tv)		event_pending(ev, EV_TIMEOUT, tv)
+#define timeout_initialized(ev)		((ev)->ev_flags & EVLIST_INIT)
+
+#define signal_add(ev, tv)		event_add(ev, tv)
+#define signal_set(ev, x, cb, arg)	\
+	event_set(ev, x, EV_SIGNAL|EV_PERSIST, cb, arg)
+#define signal_del(ev)			event_del(ev)
+#define signal_pending(ev, tv)		event_pending(ev, EV_SIGNAL, tv)
+#define signal_initialized(ev)		((ev)->ev_flags & EVLIST_INIT)
+
+/**
+  Prepare an event structure to be added.
+
+  The function event_set() prepares the event structure ev to be used in
+  future calls to event_add() and event_del().  The event will be prepared to
+  call the function specified by the fn argument with an int argument
+  indicating the file descriptor, a short argument indicating the type of
+  event, and a void * argument given in the arg argument.  The fd indicates
+  the file descriptor that should be monitored for events.  The events can be
+  either EV_READ, EV_WRITE, or both.  Indicating that an application can read
+  or write from the file descriptor respectively without blocking.
+
+  The function fn will be called with the file descriptor that triggered the
+  event and the type of event which will be either EV_TIMEOUT, EV_SIGNAL,
+  EV_READ, or EV_WRITE.  The additional flag EV_PERSIST makes an event_add()
+  persistent until event_del() has been called.
+
+  @param ev an event struct to be modified
+  @param fd the file descriptor to be monitored
+  @param event desired events to monitor; can be EV_READ and/or EV_WRITE
+  @param fn callback function to be invoked when the event occurs
+  @param arg an argument to be passed to the callback function
+
+  @see event_add(), event_del(), event_once()
+
+ */
+void event_set(struct event *, int, short, void (*)(int, short, void *), void *);
+
+/**
+  Schedule a one-time event to occur.
+
+  The function event_once() is similar to event_set().  However, it schedules
+  a callback to be called exactly once and does not require the caller to
+  prepare an event structure.
+
+  @param fd a file descriptor to monitor
+  @param events event(s) to monitor; can be any of EV_TIMEOUT | EV_READ |
+         EV_WRITE
+  @param callback callback function to be invoked when the event occurs
+  @param arg an argument to be passed to the callback function
+  @param timeout the maximum amount of time to wait for the event, or NULL
+         to wait forever
+  @return 0 if successful, or -1 if an error occurred
+  @see event_set()
+
+ */
+int event_once(int, short, void (*)(int, short, void *), void *,
+    const struct timeval *);
+
+
+/**
+  Schedule a one-time event (threadsafe variant)
+
+  The function event_base_once() is similar to event_set().  However, it
+  schedules a callback to be called exactly once and does not require the
+  caller to prepare an event structure.
+
+  @param base an event_base returned by event_init()
+  @param fd a file descriptor to monitor
+  @param events event(s) to monitor; can be any of EV_TIMEOUT | EV_READ |
+         EV_WRITE
+  @param callback callback function to be invoked when the event occurs
+  @param arg an argument to be passed to the callback function
+  @param timeout the maximum amount of time to wait for the event, or NULL
+         to wait forever
+  @return 0 if successful, or -1 if an error occurred
+  @see event_once()
+ */
+int event_base_once(struct event_base *base, int fd, short events,
+    void (*callback)(int, short, void *), void *arg,
+    const struct timeval *timeout);
+
+
+/**
+  Add an event to the set of monitored events.
+
+  The function event_add() schedules the execution of the ev event when the
+  event specified in event_set() occurs or in at least the time specified in
+  the tv.  If tv is NULL, no timeout occurs and the function will only be
+  called if a matching event occurs on the file descriptor.  The event in the
+  ev argument must be already initialized by event_set() and may not be used
+  in calls to event_set() until it has timed out or been removed with
+  event_del().  If the event in the ev argument already has a scheduled
+  timeout, the old timeout will be replaced by the new one.
+
+  @param ev an event struct initialized via event_set()
+  @param timeout the maximum amount of time to wait for the event, or NULL
+         to wait forever
+  @return 0 if successful, or -1 if an error occurred
+  @see event_del(), event_set()
+  */
+int event_add(struct event *ev, const struct timeval *timeout);
+
+
+/**
+  Remove an event from the set of monitored events.
+
+  The function event_del() will cancel the event in the argument ev.  If the
+  event has already executed or has never been added the call will have no
+  effect.
+
+  @param ev an event struct to be removed from the working set
+  @return 0 if successful, or -1 if an error occurred
+  @see event_add()
+ */
+int event_del(struct event *);
+
+void event_active(struct event *, int, short);
+
+
+/**
+  Checks if a specific event is pending or scheduled.
+
+  @param ev an event struct previously passed to event_add()
+  @param event the requested event type; any of EV_TIMEOUT|EV_READ|
+         EV_WRITE|EV_SIGNAL
+  @param tv an alternate timeout (FIXME - is this true?)
+
+  @return 1 if the event is pending, or 0 if the event has not occurred
+
+ */
+int event_pending(struct event *ev, short event, struct timeval *tv);
+
+
+/**
+  Test if an event structure has been initialized.
+
+  The event_initialized() macro can be used to check if an event has been
+  initialized.
+
+  @param ev an event structure to be tested
+  @return 1 if the structure has been initialized, or 0 if it has not been
+          initialized
+ */
+#ifdef WIN32
+#define event_initialized(ev)		((ev)->ev_flags & EVLIST_INIT && (ev)->ev_fd != (int)INVALID_HANDLE_VALUE)
+#else
+#define event_initialized(ev)		((ev)->ev_flags & EVLIST_INIT)
+#endif
+
+
+/**
+  Get the libevent version number.
+
+  @return a string containing the version number of libevent
+ */
+const char *event_get_version(void);
+
+
+/**
+  Get the kernel event notification mechanism used by libevent.
+
+  @return a string identifying the kernel event mechanism (kqueue, epoll, etc.)
+ */
+const char *event_get_method(void);
+
+
+/**
+  Set the number of different event priorities.
+
+  By default libevent schedules all active events with the same priority.
+  However, some time it is desirable to process some events with a higher
+  priority than others.  For that reason, libevent supports strict priority
+  queues.  Active events with a lower priority are always processed before
+  events with a higher priority.
+
+  The number of different priorities can be set initially with the
+  event_priority_init() function.  This function should be called before the
+  first call to event_dispatch().  The event_priority_set() function can be
+  used to assign a priority to an event.  By default, libevent assigns the
+  middle priority to all events unless their priority is explicitly set.
+
+  @param npriorities the maximum number of priorities
+  @return 0 if successful, or -1 if an error occurred
+  @see event_base_priority_init(), event_priority_set()
+
+ */
+int	event_priority_init(int);
+
+
+/**
+  Set the number of different event priorities (threadsafe variant).
+
+  See the description of event_priority_init() for more information.
+
+  @param eb the event_base structure returned by event_init()
+  @param npriorities the maximum number of priorities
+  @return 0 if successful, or -1 if an error occurred
+  @see event_priority_init(), event_priority_set()
+ */
+int	event_base_priority_init(struct event_base *, int);
+
+
+/**
+  Assign a priority to an event.
+
+  @param ev an event struct
+  @param priority the new priority to be assigned
+  @return 0 if successful, or -1 if an error occurred
+  @see event_priority_init()
+  */
+int	event_priority_set(struct event *, int);
+
+
+/* These functions deal with buffering input and output */
+
+struct evbuffer {
+	u_char *buffer;
+	u_char *orig_buffer;
+
+	size_t misalign;
+	size_t totallen;
+	size_t off;
+
+	void (*cb)(struct evbuffer *, size_t, size_t, void *);
+	void *cbarg;
+};
+
+/* Just for error reporting - use other constants otherwise */
+#define EVBUFFER_READ		0x01
+#define EVBUFFER_WRITE		0x02
+#define EVBUFFER_EOF		0x10
+#define EVBUFFER_ERROR		0x20
+#define EVBUFFER_TIMEOUT	0x40
+
+struct bufferevent;
+typedef void (*evbuffercb)(struct bufferevent *, void *);
+typedef void (*everrorcb)(struct bufferevent *, short what, void *);
+
+struct event_watermark {
+	size_t low;
+	size_t high;
+};
+
+struct bufferevent {
+	struct event_base *ev_base;
+
+	struct event ev_read;
+	struct event ev_write;
+
+	struct evbuffer *input;
+	struct evbuffer *output;
+
+	struct event_watermark wm_read;
+	struct event_watermark wm_write;
+
+	evbuffercb readcb;
+	evbuffercb writecb;
+	everrorcb errorcb;
+	void *cbarg;
+
+	int timeout_read;	/* in seconds */
+	int timeout_write;	/* in seconds */
+
+	short enabled;	/* events that are currently enabled */
+};
+
+
+/**
+  Create a new bufferevent.
+
+  libevent provides an abstraction on top of the regular event callbacks.
+  This abstraction is called a buffered event.  A buffered event provides
+  input and output buffers that get filled and drained automatically.  The
+  user of a buffered event no longer deals directly with the I/O, but
+  instead is reading from input and writing to output buffers.
+
+  Once initialized, the bufferevent structure can be used repeatedly with
+  bufferevent_enable() and bufferevent_disable().
+
+  When read enabled the bufferevent will try to read from the file descriptor
+  and call the read callback.  The write callback is executed whenever the
+  output buffer is drained below the write low watermark, which is 0 by
+  default.
+
+  If multiple bases are in use, bufferevent_base_set() must be called before
+  enabling the bufferevent for the first time.
+
+  @param fd the file descriptor from which data is read and written to.
+  		This file descriptor is not allowed to be a pipe(2).
+  @param readcb callback to invoke when there is data to be read, or NULL if
+         no callback is desired
+  @param writecb callback to invoke when the file descriptor is ready for
+         writing, or NULL if no callback is desired
+  @param errorcb callback to invoke when there is an error on the file
+         descriptor
+  @param cbarg an argument that will be supplied to each of the callbacks
+         (readcb, writecb, and errorcb)
+  @return a pointer to a newly allocated bufferevent struct, or NULL if an
+          error occurred
+  @see bufferevent_base_set(), bufferevent_free()
+  */
+struct bufferevent *bufferevent_new(int fd,
+    evbuffercb readcb, evbuffercb writecb, everrorcb errorcb, void *cbarg);
+
+
+/**
+  Assign a bufferevent to a specific event_base.
+
+  @param base an event_base returned by event_init()
+  @param bufev a bufferevent struct returned by bufferevent_new()
+  @return 0 if successful, or -1 if an error occurred
+  @see bufferevent_new()
+ */
+int bufferevent_base_set(struct event_base *base, struct bufferevent *bufev);
+
+
+/**
+  Assign a priority to a bufferevent.
+
+  @param bufev a bufferevent struct
+  @param pri the priority to be assigned
+  @return 0 if successful, or -1 if an error occurred
+  */
+int bufferevent_priority_set(struct bufferevent *bufev, int pri);
+
+
+/**
+  Deallocate the storage associated with a bufferevent structure.
+
+  @param bufev the bufferevent structure to be freed.
+  */
+void bufferevent_free(struct bufferevent *bufev);
+
+
+/**
+  Changes the callbacks for a bufferevent.
+
+  @param bufev the bufferevent object for which to change callbacks
+  @param readcb callback to invoke when there is data to be read, or NULL if
+         no callback is desired
+  @param writecb callback to invoke when the file descriptor is ready for
+         writing, or NULL if no callback is desired
+  @param errorcb callback to invoke when there is an error on the file
+         descriptor
+  @param cbarg an argument that will be supplied to each of the callbacks
+         (readcb, writecb, and errorcb)
+  @see bufferevent_new()
+  */
+void bufferevent_setcb(struct bufferevent *bufev,
+    evbuffercb readcb, evbuffercb writecb, everrorcb errorcb, void *cbarg);
+
+/**
+  Changes the file descriptor on which the bufferevent operates.
+
+  @param bufev the bufferevent object for which to change the file descriptor
+  @param fd the file descriptor to operate on
+*/
+void bufferevent_setfd(struct bufferevent *bufev, int fd);
+
+/**
+  Write data to a bufferevent buffer.
+
+  The bufferevent_write() function can be used to write data to the file
+  descriptor.  The data is appended to the output buffer and written to the
+  descriptor automatically as it becomes available for writing.
+
+  @param bufev the bufferevent to be written to
+  @param data a pointer to the data to be written
+  @param size the length of the data, in bytes
+  @return 0 if successful, or -1 if an error occurred
+  @see bufferevent_write_buffer()
+  */
+int bufferevent_write(struct bufferevent *bufev,
+    const void *data, size_t size);
+
+
+/**
+  Write data from an evbuffer to a bufferevent buffer.  The evbuffer is
+  being drained as a result.
+
+  @param bufev the bufferevent to be written to
+  @param buf the evbuffer to be written
+  @return 0 if successful, or -1 if an error occurred
+  @see bufferevent_write()
+ */
+int bufferevent_write_buffer(struct bufferevent *bufev, struct evbuffer *buf);
+
+
+/**
+  Read data from a bufferevent buffer.
+
+  The bufferevent_read() function is used to read data from the input buffer.
+
+  @param bufev the bufferevent to be read from
+  @param data pointer to a buffer that will store the data
+  @param size the size of the data buffer, in bytes
+  @return the amount of data read, in bytes.
+ */
+size_t bufferevent_read(struct bufferevent *bufev, void *data, size_t size);
+
+/**
+  Enable a bufferevent.
+
+  @param bufev the bufferevent to be enabled
+  @param event any combination of EV_READ | EV_WRITE.
+  @return 0 if successful, or -1 if an error occurred
+  @see bufferevent_disable()
+ */
+int bufferevent_enable(struct bufferevent *bufev, short event);
+
+
+/**
+  Disable a bufferevent.
+
+  @param bufev the bufferevent to be disabled
+  @param event any combination of EV_READ | EV_WRITE.
+  @return 0 if successful, or -1 if an error occurred
+  @see bufferevent_enable()
+ */
+int bufferevent_disable(struct bufferevent *bufev, short event);
+
+
+/**
+  Set the read and write timeout for a buffered event.
+
+  @param bufev the bufferevent to be modified
+  @param timeout_read the read timeout
+  @param timeout_write the write timeout
+ */
+void bufferevent_settimeout(struct bufferevent *bufev,
+    int timeout_read, int timeout_write);
+
+
+/**
+  Sets the watermarks for read and write events.
+
+  On input, a bufferevent does not invoke the user read callback unless
+  there is at least low watermark data in the buffer.   If the read buffer
+  is beyond the high watermark, the buffevent stops reading from the network.
+
+  On output, the user write callback is invoked whenever the buffered data
+  falls below the low watermark.
+
+  @param bufev the bufferevent to be modified
+  @param events EV_READ, EV_WRITE or both
+  @param lowmark the lower watermark to set
+  @param highmark the high watermark to set
+*/
+
+void bufferevent_setwatermark(struct bufferevent *bufev, short events,
+    size_t lowmark, size_t highmark);
+
+#define EVBUFFER_LENGTH(x)	(x)->off
+#define EVBUFFER_DATA(x)	(x)->buffer
+#define EVBUFFER_INPUT(x)	(x)->input
+#define EVBUFFER_OUTPUT(x)	(x)->output
+
+
+/**
+  Allocate storage for a new evbuffer.
+
+  @return a pointer to a newly allocated evbuffer struct, or NULL if an error
+          occurred
+ */
+struct evbuffer *evbuffer_new(void);
+
+
+/**
+  Deallocate storage for an evbuffer.
+
+  @param pointer to the evbuffer to be freed
+ */
+void evbuffer_free(struct evbuffer *);
+
+
+/**
+  Expands the available space in an event buffer.
+
+  Expands the available space in the event buffer to at least datlen
+
+  @param buf the event buffer to be expanded
+  @param datlen the new minimum length requirement
+  @return 0 if successful, or -1 if an error occurred
+*/
+int evbuffer_expand(struct evbuffer *, size_t);
+
+
+/**
+  Append data to the end of an evbuffer.
+
+  @param buf the event buffer to be appended to
+  @param data pointer to the beginning of the data buffer
+  @param datlen the number of bytes to be copied from the data buffer
+ */
+int evbuffer_add(struct evbuffer *, const void *, size_t);
+
+
+
+/**
+  Read data from an event buffer and drain the bytes read.
+
+  @param buf the event buffer to be read from
+  @param data the destination buffer to store the result
+  @param datlen the maximum size of the destination buffer
+  @return the number of bytes read
+ */
+int evbuffer_remove(struct evbuffer *, void *, size_t);
+
+
+/**
+ * Read a single line from an event buffer.
+ *
+ * Reads a line terminated by either '\r\n', '\n\r' or '\r' or '\n'.
+ * The returned buffer needs to be freed by the caller.
+ *
+ * @param buffer the evbuffer to read from
+ * @return pointer to a single line, or NULL if an error occurred
+ */
+char *evbuffer_readline(struct evbuffer *);
+
+
+/**
+  Move data from one evbuffer into another evbuffer.
+
+  This is a destructive add.  The data from one buffer moves into
+  the other buffer. The destination buffer is expanded as needed.
+
+  @param outbuf the output buffer
+  @param inbuf the input buffer
+  @return 0 if successful, or -1 if an error occurred
+ */
+int evbuffer_add_buffer(struct evbuffer *, struct evbuffer *);
+
+
+/**
+  Append a formatted string to the end of an evbuffer.
+
+  @param buf the evbuffer that will be appended to
+  @param fmt a format string
+  @param ... arguments that will be passed to printf(3)
+  @return The number of bytes added if successful, or -1 if an error occurred.
+ */
+int evbuffer_add_printf(struct evbuffer *, const char *fmt, ...)
+#ifdef __GNUC__
+  __attribute__((format(printf, 2, 3)))
+#endif
+;
+
+
+/**
+  Append a va_list formatted string to the end of an evbuffer.
+
+  @param buf the evbuffer that will be appended to
+  @param fmt a format string
+  @param ap a varargs va_list argument array that will be passed to vprintf(3)
+  @return The number of bytes added if successful, or -1 if an error occurred.
+ */
+int evbuffer_add_vprintf(struct evbuffer *, const char *fmt, va_list ap);
+
+
+/**
+  Remove a specified number of bytes data from the beginning of an evbuffer.
+
+  @param buf the evbuffer to be drained
+  @param len the number of bytes to drain from the beginning of the buffer
+ */
+void evbuffer_drain(struct evbuffer *, size_t);
+
+
+/**
+  Write the contents of an evbuffer to a file descriptor.
+
+  The evbuffer will be drained after the bytes have been successfully written.
+
+  @param buffer the evbuffer to be written and drained
+  @param fd the file descriptor to be written to
+  @return the number of bytes written, or -1 if an error occurred
+  @see evbuffer_read()
+ */
+int evbuffer_write(struct evbuffer *, int);
+
+
+/**
+  Read from a file descriptor and store the result in an evbuffer.
+
+  @param buf the evbuffer to store the result
+  @param fd the file descriptor to read from
+  @param howmuch the number of bytes to be read
+  @return the number of bytes read, or -1 if an error occurred
+  @see evbuffer_write()
+ */
+int evbuffer_read(struct evbuffer *, int, int);
+
+
+/**
+  Find a string within an evbuffer.
+
+  @param buffer the evbuffer to be searched
+  @param what the string to be searched for
+  @param len the length of the search string
+  @return a pointer to the beginning of the search string, or NULL if the search failed.
+ */
+u_char *evbuffer_find(struct evbuffer *, const u_char *, size_t);
+
+/**
+  Set a callback to invoke when the evbuffer is modified.
+
+  @param buffer the evbuffer to be monitored
+  @param cb the callback function to invoke when the evbuffer is modified
+  @param cbarg an argument to be provided to the callback function
+ */
+void evbuffer_setcb(struct evbuffer *, void (*)(struct evbuffer *, size_t, size_t, void *), void *);
+
+/*
+ * Marshaling tagged data - We assume that all tags are inserted in their
+ * numeric order - so that unknown tags will always be higher than the
+ * known ones - and we can just ignore the end of an event buffer.
+ */
+
+void evtag_init(void);
+
+void evtag_marshal(struct evbuffer *evbuf, ev_uint32_t tag, const void *data,
+    ev_uint32_t len);
+
+/**
+  Encode an integer and store it in an evbuffer.
+
+  We encode integer's by nibbles; the first nibble contains the number
+  of significant nibbles - 1;  this allows us to encode up to 64-bit
+  integers.  This function is byte-order independent.
+
+  @param evbuf evbuffer to store the encoded number
+  @param number a 32-bit integer
+ */
+void encode_int(struct evbuffer *evbuf, ev_uint32_t number);
+
+void evtag_marshal_int(struct evbuffer *evbuf, ev_uint32_t tag,
+    ev_uint32_t integer);
+
+void evtag_marshal_string(struct evbuffer *buf, ev_uint32_t tag,
+    const char *string);
+
+void evtag_marshal_timeval(struct evbuffer *evbuf, ev_uint32_t tag,
+    struct timeval *tv);
+
+int evtag_unmarshal(struct evbuffer *src, ev_uint32_t *ptag,
+    struct evbuffer *dst);
+int evtag_peek(struct evbuffer *evbuf, ev_uint32_t *ptag);
+int evtag_peek_length(struct evbuffer *evbuf, ev_uint32_t *plength);
+int evtag_payload_length(struct evbuffer *evbuf, ev_uint32_t *plength);
+int evtag_consume(struct evbuffer *evbuf);
+
+int evtag_unmarshal_int(struct evbuffer *evbuf, ev_uint32_t need_tag,
+    ev_uint32_t *pinteger);
+
+int evtag_unmarshal_fixed(struct evbuffer *src, ev_uint32_t need_tag,
+    void *data, size_t len);
+
+int evtag_unmarshal_string(struct evbuffer *evbuf, ev_uint32_t need_tag,
+    char **pstring);
+
+int evtag_unmarshal_timeval(struct evbuffer *evbuf, ev_uint32_t need_tag,
+    struct timeval *ptv);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _EVENT_H_ */
diff --git a/libevent/event_rpcgen.py b/libevent/event_rpcgen.py
new file mode 100644
index 00000000000..5503ff8a5c3
--- /dev/null
+++ b/libevent/event_rpcgen.py
@@ -0,0 +1,1417 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2005 Niels Provos <provos@citi.umich.edu>
+# All rights reserved.
+#
+# Generates marshaling code based on libevent.
+
+import sys
+import re
+
+#
+_NAME = "event_rpcgen.py"
+_VERSION = "0.1"
+_STRUCT_RE = '[a-z][a-z_0-9]*'
+
+# Globals
+line_count = 0
+
+white = re.compile(r'^\s+')
+cppcomment = re.compile(r'\/\/.*$')
+headerdirect = []
+cppdirect = []
+
+# Holds everything that makes a struct
+class Struct:
+    def __init__(self, name):
+        self._name = name
+        self._entries = []
+        self._tags = {}
+        print >>sys.stderr, '  Created struct: %s' % name
+
+    def AddEntry(self, entry):
+        if self._tags.has_key(entry.Tag()):
+            print >>sys.stderr, ( 'Entry "%s" duplicates tag number '
+                                  '%d from "%s" around line %d' ) % (
+                entry.Name(), entry.Tag(),
+                self._tags[entry.Tag()], line_count)
+            sys.exit(1)
+        self._entries.append(entry)
+        self._tags[entry.Tag()] = entry.Name()
+        print >>sys.stderr, '    Added entry: %s' % entry.Name()
+
+    def Name(self):
+        return self._name
+
+    def EntryTagName(self, entry):
+        """Creates the name inside an enumeration for distinguishing data
+        types."""
+        name = "%s_%s" % (self._name, entry.Name())
+        return name.upper()
+
+    def PrintIdented(self, file, ident, code):
+        """Takes an array, add indentation to each entry and prints it."""
+        for entry in code:
+            print >>file, '%s%s' % (ident, entry)
+
+    def PrintTags(self, file):
+        """Prints the tag definitions for a structure."""
+        print >>file, '/* Tag definition for %s */' % self._name
+        print >>file, 'enum %s_ {' % self._name.lower()
+        for entry in self._entries:
+            print >>file, '  %s=%d,' % (self.EntryTagName(entry),
+                                        entry.Tag())
+        print >>file, '  %s_MAX_TAGS' % (self._name.upper())
+        print >>file, '};\n'
+
+    def PrintForwardDeclaration(self, file):
+        print >>file, 'struct %s;' % self._name
+
+    def PrintDeclaration(self, file):
+        print >>file, '/* Structure declaration for %s */' % self._name
+        print >>file, 'struct %s_access_ {' % self._name
+        for entry in self._entries:
+            dcl = entry.AssignDeclaration('(*%s_assign)' % entry.Name())
+            dcl.extend(
+                entry.GetDeclaration('(*%s_get)' % entry.Name()))
+            if entry.Array():
+                dcl.extend(
+                    entry.AddDeclaration('(*%s_add)' % entry.Name()))
+            self.PrintIdented(file, '  ', dcl)
+        print >>file, '};\n'
+
+        print >>file, 'struct %s {' % self._name
+        print >>file, '  struct %s_access_ *base;\n' % self._name
+        for entry in self._entries:
+            dcl = entry.Declaration()
+            self.PrintIdented(file, '  ', dcl)
+        print >>file, ''
+        for entry in self._entries:
+            print >>file, '  uint8_t %s_set;' % entry.Name()
+        print >>file, '};\n'
+
+        print >>file, \
+"""struct %(name)s *%(name)s_new(void);
+void %(name)s_free(struct %(name)s *);
+void %(name)s_clear(struct %(name)s *);
+void %(name)s_marshal(struct evbuffer *, const struct %(name)s *);
+int %(name)s_unmarshal(struct %(name)s *, struct evbuffer *);
+int %(name)s_complete(struct %(name)s *);
+void evtag_marshal_%(name)s(struct evbuffer *, uint32_t, 
+    const struct %(name)s *);
+int evtag_unmarshal_%(name)s(struct evbuffer *, uint32_t,
+    struct %(name)s *);""" % { 'name' : self._name }
+
+
+        # Write a setting function of every variable
+        for entry in self._entries:
+            self.PrintIdented(file, '', entry.AssignDeclaration(
+                entry.AssignFuncName()))
+            self.PrintIdented(file, '', entry.GetDeclaration(
+                entry.GetFuncName()))
+            if entry.Array():
+                self.PrintIdented(file, '', entry.AddDeclaration(
+                    entry.AddFuncName()))
+
+        print >>file, '/* --- %s done --- */\n' % self._name
+
+    def PrintCode(self, file):
+        print >>file, ('/*\n'
+                       ' * Implementation of %s\n'
+                       ' */\n') % self._name
+
+        print >>file, \
+              'static struct %(name)s_access_ __%(name)s_base = {' % \
+              { 'name' : self._name }
+        for entry in self._entries:
+            self.PrintIdented(file, '  ', entry.CodeBase())
+        print >>file, '};\n'
+
+        # Creation
+        print >>file, (
+            'struct %(name)s *\n'
+            '%(name)s_new(void)\n'
+            '{\n'
+            '  struct %(name)s *tmp;\n'
+            '  if ((tmp = malloc(sizeof(struct %(name)s))) == NULL) {\n'
+            '    event_warn("%%s: malloc", __func__);\n'
+            '    return (NULL);\n'
+            '  }\n'
+            '  tmp->base = &__%(name)s_base;\n') % { 'name' : self._name }
+
+        for entry in self._entries:
+            self.PrintIdented(file, '  ', entry.CodeNew('tmp'))
+            print >>file, '  tmp->%s_set = 0;\n' % entry.Name()
+
+        print >>file, (
+            '  return (tmp);\n'
+            '}\n')
+
+        # Adding
+        for entry in self._entries:
+            if entry.Array():
+                self.PrintIdented(file, '', entry.CodeAdd())
+            print >>file, ''
+            
+        # Assigning
+        for entry in self._entries:
+            self.PrintIdented(file, '', entry.CodeAssign())
+            print >>file, ''
+
+        # Getting
+        for entry in self._entries:
+            self.PrintIdented(file, '', entry.CodeGet())
+            print >>file, ''
+            
+        # Clearing
+        print >>file, ( 'void\n'
+                        '%(name)s_clear(struct %(name)s *tmp)\n'
+                        '{'
+                        ) % { 'name' : self._name }
+        for entry in self._entries:
+            self.PrintIdented(file, '  ', entry.CodeClear('tmp'))
+
+        print >>file, '}\n'
+
+        # Freeing
+        print >>file, ( 'void\n'
+                        '%(name)s_free(struct %(name)s *tmp)\n'
+                        '{'
+                        ) % { 'name' : self._name }
+        
+        for entry in self._entries:
+            self.PrintIdented(file, '  ', entry.CodeFree('tmp'))
+
+        print >>file, ('  free(tmp);\n'
+                       '}\n')
+
+        # Marshaling
+        print >>file, ('void\n'
+                       '%(name)s_marshal(struct evbuffer *evbuf, '
+                       'const struct %(name)s *tmp)'
+                       '{') % { 'name' : self._name }
+        for entry in self._entries:
+            indent = '  '
+            # Optional entries do not have to be set
+            if entry.Optional():
+                indent += '  '
+                print >>file, '  if (tmp->%s_set) {' % entry.Name()
+            self.PrintIdented(
+                file, indent,
+                entry.CodeMarshal('evbuf', self.EntryTagName(entry), 'tmp'))
+            if entry.Optional():
+                print >>file, '  }'
+
+        print >>file, '}\n'
+                       
+        # Unmarshaling
+        print >>file, ('int\n'
+                       '%(name)s_unmarshal(struct %(name)s *tmp, '
+                       ' struct evbuffer *evbuf)\n'
+                       '{\n'
+                       '  uint32_t tag;\n'
+                       '  while (EVBUFFER_LENGTH(evbuf) > 0) {\n'
+                       '    if (evtag_peek(evbuf, &tag) == -1)\n'
+                       '      return (-1);\n'
+                       '    switch (tag) {\n'
+                       ) % { 'name' : self._name }
+        for entry in self._entries:
+            print >>file, '      case %s:\n' % self.EntryTagName(entry)
+            if not entry.Array():
+                print >>file, (
+                    '        if (tmp->%s_set)\n'
+                    '          return (-1);'
+                    ) % (entry.Name())
+
+            self.PrintIdented(
+                file, '        ',
+                entry.CodeUnmarshal('evbuf',
+                                    self.EntryTagName(entry), 'tmp'))
+
+            print >>file, ( '        tmp->%s_set = 1;\n' % entry.Name() +
+                            '        break;\n' )
+        print >>file, ( '      default:\n'
+                        '        return -1;\n'
+                        '    }\n'
+                        '  }\n' )
+        # Check if it was decoded completely
+        print >>file, ( '  if (%(name)s_complete(tmp) == -1)\n'
+                        '    return (-1);'
+                        ) % { 'name' : self._name }
+
+        # Successfully decoded
+        print >>file, ( '  return (0);\n'
+                        '}\n')
+
+        # Checking if a structure has all the required data
+        print >>file, (
+            'int\n'
+            '%(name)s_complete(struct %(name)s *msg)\n'
+            '{' ) % { 'name' : self._name }
+        for entry in self._entries:
+            self.PrintIdented(
+                file, '  ',
+                entry.CodeComplete('msg'))
+        print >>file, (
+            '  return (0);\n'
+            '}\n' )
+
+        # Complete message unmarshaling
+        print >>file, (
+            'int\n'
+            'evtag_unmarshal_%(name)s(struct evbuffer *evbuf, '
+            'uint32_t need_tag, struct %(name)s *msg)\n'
+            '{\n'
+            '  uint32_t tag;\n'
+            '  int res = -1;\n'
+            '\n'
+            '  struct evbuffer *tmp = evbuffer_new();\n'
+            '\n'
+            '  if (evtag_unmarshal(evbuf, &tag, tmp) == -1'
+            ' || tag != need_tag)\n'
+            '    goto error;\n'
+            '\n'
+            '  if (%(name)s_unmarshal(msg, tmp) == -1)\n'
+            '    goto error;\n'
+            '\n'
+            '  res = 0;\n'
+            '\n'
+            ' error:\n'
+            '  evbuffer_free(tmp);\n'
+            '  return (res);\n'
+            '}\n' ) % { 'name' : self._name }
+
+        # Complete message marshaling
+        print >>file, (
+            'void\n'
+            'evtag_marshal_%(name)s(struct evbuffer *evbuf, uint32_t tag, '
+            'const struct %(name)s *msg)\n'
+            '{\n'
+            '  struct evbuffer *_buf = evbuffer_new();\n'
+            '  assert(_buf != NULL);\n'
+            '  evbuffer_drain(_buf, -1);\n'
+            '  %(name)s_marshal(_buf, msg);\n'
+            '  evtag_marshal(evbuf, tag, EVBUFFER_DATA(_buf), '
+            'EVBUFFER_LENGTH(_buf));\n'
+            '  evbuffer_free(_buf);\n'
+            '}\n' ) % { 'name' : self._name }
+
+class Entry:
+    def __init__(self, type, name, tag):
+        self._type = type
+        self._name = name
+        self._tag = int(tag)
+        self._ctype = type
+        self._optional = 0
+        self._can_be_array = 0
+        self._array = 0
+        self._line_count = -1
+        self._struct = None
+        self._refname = None
+
+    def GetTranslation(self):
+        return { "parent_name" : self._struct.Name(),
+                 "name" : self._name,
+                 "ctype" : self._ctype,
+                 "refname" : self._refname
+                 }
+    
+    def SetStruct(self, struct):
+        self._struct = struct
+
+    def LineCount(self):
+        assert self._line_count != -1
+        return self._line_count
+
+    def SetLineCount(self, number):
+        self._line_count = number
+
+    def Array(self):
+        return self._array
+
+    def Optional(self):
+        return self._optional
+
+    def Tag(self):
+        return self._tag
+
+    def Name(self):
+        return self._name
+
+    def Type(self):
+        return self._type
+
+    def MakeArray(self, yes=1):
+        self._array = yes
+        
+    def MakeOptional(self):
+        self._optional = 1
+
+    def GetFuncName(self):
+        return '%s_%s_get' % (self._struct.Name(), self._name)
+    
+    def GetDeclaration(self, funcname):
+        code = [ 'int %s(struct %s *, %s *);' % (
+            funcname, self._struct.Name(), self._ctype ) ]
+        return code
+
+    def CodeGet(self):
+        code = (
+            'int',
+            '%(parent_name)s_%(name)s_get(struct %(parent_name)s *msg, '
+            '%(ctype)s *value)',
+            '{',
+            '  if (msg->%(name)s_set != 1)',
+            '    return (-1);',
+            '  *value = msg->%(name)s_data;',
+            '  return (0);',
+            '}' )
+        code = '\n'.join(code)
+        code = code % self.GetTranslation()
+        return code.split('\n')
+        
+    def AssignFuncName(self):
+        return '%s_%s_assign' % (self._struct.Name(), self._name)
+    
+    def AddFuncName(self):
+        return '%s_%s_add' % (self._struct.Name(), self._name)
+    
+    def AssignDeclaration(self, funcname):
+        code = [ 'int %s(struct %s *, const %s);' % (
+            funcname, self._struct.Name(), self._ctype ) ]
+        return code
+
+    def CodeAssign(self):
+        code = [ 'int',
+                 '%(parent_name)s_%(name)s_assign(struct %(parent_name)s *msg,'
+                 ' const %(ctype)s value)',
+                 '{',
+                 '  msg->%(name)s_set = 1;',
+                 '  msg->%(name)s_data = value;',
+                 '  return (0);',
+                 '}' ]
+        code = '\n'.join(code)
+        code = code % self.GetTranslation()
+        return code.split('\n')
+
+    def CodeClear(self, structname):
+        code = [ '%s->%s_set = 0;' % (structname, self.Name()) ]
+
+        return code
+        
+    def CodeComplete(self, structname):
+        if self.Optional():
+            return []
+        
+        code = [ 'if (!%s->%s_set)' % (structname, self.Name()),
+                 '  return (-1);' ]
+
+        return code
+
+    def CodeFree(self, name):
+        return []
+
+    def CodeBase(self):
+        code = [
+            '%(parent_name)s_%(name)s_assign,',
+            '%(parent_name)s_%(name)s_get,'
+            ]
+        if self.Array():
+            code.append('%(parent_name)s_%(name)s_add,')
+
+        code = '\n'.join(code)
+        code = code % self.GetTranslation()
+        return code.split('\n')
+
+    def Verify(self):
+        if self.Array() and not self._can_be_array:
+            print >>sys.stderr, (
+                'Entry "%s" cannot be created as an array '
+                'around line %d' ) % (self._name, self.LineCount())
+            sys.exit(1)
+        if not self._struct:
+            print >>sys.stderr, (
+                'Entry "%s" does not know which struct it belongs to '
+                'around line %d' ) % (self._name, self.LineCount())
+            sys.exit(1)
+        if self._optional and self._array:
+            print >>sys.stderr,  ( 'Entry "%s" has illegal combination of '
+                                   'optional and array around line %d' ) % (
+                self._name, self.LineCount() )
+            sys.exit(1)
+
+class EntryBytes(Entry):
+    def __init__(self, type, name, tag, length):
+        # Init base class
+        Entry.__init__(self, type, name, tag)
+
+        self._length = length
+        self._ctype = 'uint8_t'
+
+    def GetDeclaration(self, funcname):
+        code = [ 'int %s(struct %s *, %s **);' % (
+            funcname, self._struct.Name(), self._ctype ) ]
+        return code
+        
+    def AssignDeclaration(self, funcname):
+        code = [ 'int %s(struct %s *, const %s *);' % (
+            funcname, self._struct.Name(), self._ctype ) ]
+        return code
+        
+    def Declaration(self):
+        dcl  = ['uint8_t %s_data[%s];' % (self._name, self._length)]
+        
+        return dcl
+
+    def CodeGet(self):
+        name = self._name
+        code = [ 'int',
+                 '%s_%s_get(struct %s *msg, %s **value)' % (
+            self._struct.Name(), name,
+            self._struct.Name(), self._ctype),
+                 '{',
+                 '  if (msg->%s_set != 1)' % name,
+                 '    return (-1);',
+                 '  *value = msg->%s_data;' % name,
+                 '  return (0);',
+                 '}' ]
+        return code
+        
+    def CodeAssign(self):
+        name = self._name
+        code = [ 'int',
+                 '%s_%s_assign(struct %s *msg, const %s *value)' % (
+            self._struct.Name(), name,
+            self._struct.Name(), self._ctype),
+                 '{',
+                 '  msg->%s_set = 1;' % name,
+                 '  memcpy(msg->%s_data, value, %s);' % (
+            name, self._length),
+                 '  return (0);',
+                 '}' ]
+        return code
+        
+    def CodeUnmarshal(self, buf, tag_name, var_name):
+        code = [  'if (evtag_unmarshal_fixed(%s, %s, ' % (buf, tag_name) +
+                  '%s->%s_data, ' % (var_name, self._name) +
+                  'sizeof(%s->%s_data)) == -1) {' % (
+            var_name, self._name),
+                  '  event_warnx("%%s: failed to unmarshal %s", __func__);' % (
+            self._name ),
+                  '  return (-1);',
+                  '}'
+                  ]
+        return code
+
+    def CodeMarshal(self, buf, tag_name, var_name):
+        code = ['evtag_marshal(%s, %s, %s->%s_data, sizeof(%s->%s_data));' % (
+            buf, tag_name, var_name, self._name, var_name, self._name )]
+        return code
+
+    def CodeClear(self, structname):
+        code = [ '%s->%s_set = 0;' % (structname, self.Name()),
+                 'memset(%s->%s_data, 0, sizeof(%s->%s_data));' % (
+            structname, self._name, structname, self._name)]
+
+        return code
+        
+    def CodeNew(self, name):
+        code  = ['memset(%s->%s_data, 0, sizeof(%s->%s_data));' % (
+            name, self._name, name, self._name)]
+        return code
+
+    def Verify(self):
+        if not self._length:
+            print >>sys.stderr, 'Entry "%s" needs a length around line %d' % (
+                self._name, self.LineCount() )
+            sys.exit(1)
+
+        Entry.Verify(self)
+
+class EntryInt(Entry):
+    def __init__(self, type, name, tag):
+        # Init base class
+        Entry.__init__(self, type, name, tag)
+
+        self._ctype = 'uint32_t'
+
+    def CodeUnmarshal(self, buf, tag_name, var_name):
+        code = ['if (evtag_unmarshal_int(%s, %s, &%s->%s_data) == -1) {' % (
+            buf, tag_name, var_name, self._name),
+                  '  event_warnx("%%s: failed to unmarshal %s", __func__);' % (
+            self._name ),
+                '  return (-1);',
+                '}' ] 
+        return code
+
+    def CodeMarshal(self, buf, tag_name, var_name):
+        code = ['evtag_marshal_int(%s, %s, %s->%s_data);' % (
+            buf, tag_name, var_name, self._name)]
+        return code
+
+    def Declaration(self):
+        dcl  = ['uint32_t %s_data;' % self._name]
+
+        return dcl
+
+    def CodeNew(self, name):
+        code = ['%s->%s_data = 0;' % (name, self._name)]
+        return code
+
+class EntryString(Entry):
+    def __init__(self, type, name, tag):
+        # Init base class
+        Entry.__init__(self, type, name, tag)
+
+        self._ctype = 'char *'
+
+    def CodeAssign(self):
+        name = self._name
+        code = """int
+%(parent_name)s_%(name)s_assign(struct %(parent_name)s *msg,
+    const %(ctype)s value)
+{
+  if (msg->%(name)s_data != NULL)
+    free(msg->%(name)s_data);
+  if ((msg->%(name)s_data = strdup(value)) == NULL)
+    return (-1);
+  msg->%(name)s_set = 1;
+  return (0);
+}""" % self.GetTranslation()
+
+        return code.split('\n')
+        
+    def CodeUnmarshal(self, buf, tag_name, var_name):
+        code = ['if (evtag_unmarshal_string(%s, %s, &%s->%s_data) == -1) {' % (
+            buf, tag_name, var_name, self._name),
+                '  event_warnx("%%s: failed to unmarshal %s", __func__);' % (
+            self._name ),
+                '  return (-1);',
+                '}'
+                ]
+        return code
+
+    def CodeMarshal(self, buf, tag_name, var_name):
+        code = ['evtag_marshal_string(%s, %s, %s->%s_data);' % (
+            buf, tag_name, var_name, self._name)]
+        return code
+
+    def CodeClear(self, structname):
+        code = [ 'if (%s->%s_set == 1) {' % (structname, self.Name()),
+                 '  free (%s->%s_data);' % (structname, self.Name()),
+                 '  %s->%s_data = NULL;' % (structname, self.Name()),
+                 '  %s->%s_set = 0;' % (structname, self.Name()),
+                 '}'
+                 ]
+
+        return code
+        
+    def CodeNew(self, name):
+        code  = ['%s->%s_data = NULL;' % (name, self._name)]
+        return code
+
+    def CodeFree(self, name):
+        code  = ['if (%s->%s_data != NULL)' % (name, self._name),
+                 '    free (%s->%s_data); ' % (name, self._name)]
+
+        return code
+
+    def Declaration(self):
+        dcl  = ['char *%s_data;' % self._name]
+
+        return dcl
+
+class EntryStruct(Entry):
+    def __init__(self, type, name, tag, refname):
+        # Init base class
+        Entry.__init__(self, type, name, tag)
+
+        self._can_be_array = 1
+        self._refname = refname
+        self._ctype = 'struct %s*' % refname
+
+    def CodeGet(self):
+        name = self._name
+        code = [ 'int',
+                 '%s_%s_get(struct %s *msg, %s *value)' % (
+            self._struct.Name(), name,
+            self._struct.Name(), self._ctype),
+                 '{',
+                 '  if (msg->%s_set != 1) {' % name,
+                 '    msg->%s_data = %s_new();' % (name, self._refname),
+                 '    if (msg->%s_data == NULL)' % name,
+                 '      return (-1);',
+                 '    msg->%s_set = 1;' % name,
+                 '  }',
+                 '  *value = msg->%s_data;' % name,
+                 '  return (0);',
+                 '}' ]
+        return code
+        
+    def CodeAssign(self):
+        name = self._name
+        code = """int
+%(parent_name)s_%(name)s_assign(struct %(parent_name)s *msg,
+    const %(ctype)s value)
+{
+   struct evbuffer *tmp = NULL;
+   if (msg->%(name)s_set) {
+     %(refname)s_clear(msg->%(name)s_data);
+     msg->%(name)s_set = 0;
+   } else {
+     msg->%(name)s_data = %(refname)s_new();
+     if (msg->%(name)s_data == NULL) {
+       event_warn("%%s: %(refname)s_new()", __func__);
+       goto error;
+     }
+   }
+   if ((tmp = evbuffer_new()) == NULL) {
+     event_warn("%%s: evbuffer_new()", __func__);
+     goto error;
+   }
+   %(refname)s_marshal(tmp, value);
+   if (%(refname)s_unmarshal(msg->%(name)s_data, tmp) == -1) {
+     event_warnx("%%s: %(refname)s_unmarshal", __func__);
+     goto error;
+   }
+   msg->%(name)s_set = 1;
+   evbuffer_free(tmp);
+   return (0);
+ error:
+   if (tmp != NULL)
+     evbuffer_free(tmp);
+   if (msg->%(name)s_data != NULL) {
+     %(refname)s_free(msg->%(name)s_data);
+     msg->%(name)s_data = NULL;
+   }
+   return (-1);
+}""" % self.GetTranslation()
+        return code.split('\n')
+        
+    def CodeComplete(self, structname):
+        if self.Optional():
+            code = [ 'if (%s->%s_set && %s_complete(%s->%s_data) == -1)' % (
+                structname, self.Name(),
+                self._refname, structname, self.Name()),
+                     '  return (-1);' ]
+        else:
+            code = [ 'if (%s_complete(%s->%s_data) == -1)' % (
+                self._refname, structname, self.Name()),
+                     '  return (-1);' ]
+
+        return code
+    
+    def CodeUnmarshal(self, buf, tag_name, var_name):
+        code = ['%s->%s_data = %s_new();' % (
+            var_name, self._name, self._refname),
+                'if (%s->%s_data == NULL)' % (var_name, self._name),
+                '  return (-1);',
+                'if (evtag_unmarshal_%s(%s, %s, %s->%s_data) == -1) {' % (
+            self._refname, buf, tag_name, var_name, self._name),
+                  '  event_warnx("%%s: failed to unmarshal %s", __func__);' % (
+            self._name ),
+                '  return (-1);',
+                '}'
+                ]
+        return code
+
+    def CodeMarshal(self, buf, tag_name, var_name):
+        code = ['evtag_marshal_%s(%s, %s, %s->%s_data);' % (
+            self._refname, buf, tag_name, var_name, self._name)]
+        return code
+
+    def CodeClear(self, structname):
+        code = [ 'if (%s->%s_set == 1) {' % (structname, self.Name()),
+                 '  %s_free(%s->%s_data);' % (
+            self._refname, structname, self.Name()),
+                 '  %s->%s_data = NULL;' % (structname, self.Name()),
+                 '  %s->%s_set = 0;' % (structname, self.Name()),
+                 '}'
+                 ]
+
+        return code
+        
+    def CodeNew(self, name):
+        code  = ['%s->%s_data = NULL;' % (name, self._name)]
+        return code
+
+    def CodeFree(self, name):
+        code  = ['if (%s->%s_data != NULL)' % (name, self._name),
+                 '    %s_free(%s->%s_data); ' % (
+            self._refname, name, self._name)]
+
+        return code
+
+    def Declaration(self):
+        dcl  = ['%s %s_data;' % (self._ctype, self._name)]
+
+        return dcl
+
+class EntryVarBytes(Entry):
+    def __init__(self, type, name, tag):
+        # Init base class
+        Entry.__init__(self, type, name, tag)
+
+        self._ctype = 'uint8_t *'
+
+    def GetDeclaration(self, funcname):
+        code = [ 'int %s(struct %s *, %s *, uint32_t *);' % (
+            funcname, self._struct.Name(), self._ctype ) ]
+        return code
+        
+    def AssignDeclaration(self, funcname):
+        code = [ 'int %s(struct %s *, const %s, uint32_t);' % (
+            funcname, self._struct.Name(), self._ctype ) ]
+        return code
+        
+    def CodeAssign(self):
+        name = self._name
+        code = [ 'int',
+                 '%s_%s_assign(struct %s *msg, '
+                 'const %s value, uint32_t len)' % (
+            self._struct.Name(), name,
+            self._struct.Name(), self._ctype),
+                 '{',
+                 '  if (msg->%s_data != NULL)' % name,
+                 '    free (msg->%s_data);' % name,
+                 '  msg->%s_data = malloc(len);' % name,
+                 '  if (msg->%s_data == NULL)' % name,
+                 '    return (-1);',
+                 '  msg->%s_set = 1;' % name,
+                 '  msg->%s_length = len;' % name,
+                 '  memcpy(msg->%s_data, value, len);' % name,
+                 '  return (0);',
+                 '}' ]
+        return code
+        
+    def CodeGet(self):
+        name = self._name
+        code = [ 'int',
+                 '%s_%s_get(struct %s *msg, %s *value, uint32_t *plen)' % (
+            self._struct.Name(), name,
+            self._struct.Name(), self._ctype),
+                 '{',
+                 '  if (msg->%s_set != 1)' % name,
+                 '    return (-1);',
+                 '  *value = msg->%s_data;' % name,
+                 '  *plen = msg->%s_length;' % name,
+                 '  return (0);',
+                 '}' ]
+        return code
+
+    def CodeUnmarshal(self, buf, tag_name, var_name):
+        code = ['if (evtag_payload_length(%s, &%s->%s_length) == -1)' % (
+            buf, var_name, self._name),
+                '  return (-1);',
+                # We do not want DoS opportunities
+                'if (%s->%s_length > EVBUFFER_LENGTH(%s))' % (
+            var_name, self._name, buf),
+                '  return (-1);',
+                'if ((%s->%s_data = malloc(%s->%s_length)) == NULL)' % (
+            var_name, self._name, var_name, self._name),
+                '  return (-1);',
+                'if (evtag_unmarshal_fixed(%s, %s, %s->%s_data, '
+                '%s->%s_length) == -1) {' % (
+            buf, tag_name, var_name, self._name, var_name, self._name),
+                '  event_warnx("%%s: failed to unmarshal %s", __func__);' % (
+            self._name ),
+                '  return (-1);',
+                '}'
+                ]
+        return code
+
+    def CodeMarshal(self, buf, tag_name, var_name):
+        code = ['evtag_marshal(%s, %s, %s->%s_data, %s->%s_length);' % (
+            buf, tag_name, var_name, self._name, var_name, self._name)]
+        return code
+
+    def CodeClear(self, structname):
+        code = [ 'if (%s->%s_set == 1) {' % (structname, self.Name()),
+                 '  free (%s->%s_data);' % (structname, self.Name()),
+                 '  %s->%s_data = NULL;' % (structname, self.Name()),
+                 '  %s->%s_length = 0;' % (structname, self.Name()),
+                 '  %s->%s_set = 0;' % (structname, self.Name()),
+                 '}'
+                 ]
+
+        return code
+        
+    def CodeNew(self, name):
+        code  = ['%s->%s_data = NULL;' % (name, self._name),
+                 '%s->%s_length = 0;' % (name, self._name) ]
+        return code
+
+    def CodeFree(self, name):
+        code  = ['if (%s->%s_data != NULL)' % (name, self._name),
+                 '    free (%s->%s_data); ' % (name, self._name)]
+
+        return code
+
+    def Declaration(self):
+        dcl  = ['uint8_t *%s_data;' % self._name,
+                'uint32_t %s_length;' % self._name]
+
+        return dcl
+
+class EntryArray(Entry):
+    def __init__(self, entry):
+        # Init base class
+        Entry.__init__(self, entry._type, entry._name, entry._tag)
+
+        self._entry = entry
+        self._refname = entry._refname
+        self._ctype = 'struct %s *' % self._refname
+
+    def GetDeclaration(self, funcname):
+        """Allows direct access to elements of the array."""
+        translate = self.GetTranslation()
+        translate["funcname"] = funcname
+        code = [
+            'int %(funcname)s(struct %(parent_name)s *, int, %(ctype)s *);' %
+            translate ]
+        return code
+        
+    def AssignDeclaration(self, funcname):
+        code = [ 'int %s(struct %s *, int, const %s);' % (
+            funcname, self._struct.Name(), self._ctype ) ]
+        return code
+        
+    def AddDeclaration(self, funcname):
+        code = [ '%s %s(struct %s *);' % (
+            self._ctype, funcname, self._struct.Name() ) ]
+        return code
+        
+    def CodeGet(self):
+        code = """int
+%(parent_name)s_%(name)s_get(struct %(parent_name)s *msg, int offset,
+    %(ctype)s *value)
+{
+  if (!msg->%(name)s_set || offset < 0 || offset >= msg->%(name)s_length)
+    return (-1);
+  *value = msg->%(name)s_data[offset];
+  return (0);
+}""" % self.GetTranslation()
+
+        return code.split('\n')
+        
+    def CodeAssign(self):
+        code = """int
+%(parent_name)s_%(name)s_assign(struct %(parent_name)s *msg, int off,
+    const %(ctype)s value)
+{
+  struct evbuffer *tmp = NULL;
+  if (!msg->%(name)s_set || off < 0 || off >= msg->%(name)s_length)
+    return (-1);
+  %(refname)s_clear(msg->%(name)s_data[off]);
+  if ((tmp = evbuffer_new()) == NULL) {
+    event_warn("%%s: evbuffer_new()", __func__);
+    goto error;
+  }
+  %(refname)s_marshal(tmp, value);
+  if (%(refname)s_unmarshal(msg->%(name)s_data[off], tmp) == -1) {
+    event_warnx("%%s: %(refname)s_unmarshal", __func__);
+    goto error;
+  }
+  evbuffer_free(tmp);
+  return (0);
+error:
+  if (tmp != NULL)
+    evbuffer_free(tmp);
+  %(refname)s_clear(msg->%(name)s_data[off]);
+  return (-1);
+}""" % self.GetTranslation()
+
+        return code.split('\n')
+        
+    def CodeAdd(self):
+        code = \
+"""%(ctype)s
+%(parent_name)s_%(name)s_add(struct %(parent_name)s *msg)
+{
+  if (++msg->%(name)s_length >= msg->%(name)s_num_allocated) {
+    int tobe_allocated = msg->%(name)s_num_allocated;
+    %(ctype)s* new_data = NULL;
+    tobe_allocated = !tobe_allocated ? 1 : tobe_allocated << 1;
+    new_data = (%(ctype)s*) realloc(msg->%(name)s_data,
+        tobe_allocated * sizeof(%(ctype)s));
+    if (new_data == NULL)
+      goto error;
+    msg->%(name)s_data = new_data;
+    msg->%(name)s_num_allocated = tobe_allocated;
+  }
+  msg->%(name)s_data[msg->%(name)s_length - 1] = %(refname)s_new();
+  if (msg->%(name)s_data[msg->%(name)s_length - 1] == NULL)
+    goto error;
+  msg->%(name)s_set = 1;
+  return (msg->%(name)s_data[msg->%(name)s_length - 1]);
+error:
+  --msg->%(name)s_length;
+  return (NULL);
+}
+        """ % self.GetTranslation()
+
+        return code.split('\n')
+
+    def CodeComplete(self, structname):
+        code = []
+        translate = self.GetTranslation()
+
+        if self.Optional():
+            code.append( 'if (%(structname)s->%(name)s_set)'  % translate)
+
+        translate["structname"] = structname
+        tmp = """{
+  int i;
+  for (i = 0; i < %(structname)s->%(name)s_length; ++i) {
+    if (%(refname)s_complete(%(structname)s->%(name)s_data[i]) == -1)
+      return (-1);
+  }
+}""" % translate
+        code.extend(tmp.split('\n'))
+
+        return code
+    
+    def CodeUnmarshal(self, buf, tag_name, var_name):
+        translate = self.GetTranslation()
+        translate["var_name"] = var_name
+        translate["buf"] = buf
+        translate["tag_name"] = tag_name
+        code = """if (%(parent_name)s_%(name)s_add(%(var_name)s) == NULL)
+  return (-1);
+if (evtag_unmarshal_%(refname)s(%(buf)s, %(tag_name)s,
+  %(var_name)s->%(name)s_data[%(var_name)s->%(name)s_length - 1]) == -1) {
+  --%(var_name)s->%(name)s_length;
+  event_warnx("%%s: failed to unmarshal %(name)s", __func__);
+  return (-1);
+}""" % translate
+
+        return code.split('\n')
+
+    def CodeMarshal(self, buf, tag_name, var_name):
+        code = ['{',
+                '  int i;',
+                '  for (i = 0; i < %s->%s_length; ++i) {' % (
+            var_name, self._name),
+                '    evtag_marshal_%s(%s, %s, %s->%s_data[i]);' % (
+            self._refname, buf, tag_name, var_name, self._name),
+                '  }',
+                '}'
+                ]
+        return code
+
+    def CodeClear(self, structname):
+        code = [ 'if (%s->%s_set == 1) {' % (structname, self.Name()),
+                 '  int i;',
+                 '  for (i = 0; i < %s->%s_length; ++i) {' % (
+            structname, self.Name()),
+                 '    %s_free(%s->%s_data[i]);' % (
+            self._refname, structname, self.Name()),
+                 '  }',
+                 '  free(%s->%s_data);' % (structname, self.Name()),
+                 '  %s->%s_data = NULL;' % (structname, self.Name()),
+                 '  %s->%s_set = 0;' % (structname, self.Name()),
+                 '  %s->%s_length = 0;' % (structname, self.Name()),
+                 '  %s->%s_num_allocated = 0;' % (structname, self.Name()),
+                 '}'
+                 ]
+
+        return code
+        
+    def CodeNew(self, name):
+        code  = ['%s->%s_data = NULL;' % (name, self._name),
+                 '%s->%s_length = 0;' % (name, self._name),
+                 '%s->%s_num_allocated = 0;' % (name, self._name)]
+        return code
+
+    def CodeFree(self, name):
+        code  = ['if (%s->%s_data != NULL) {' % (name, self._name),
+                 '  int i;',
+                 '  for (i = 0; i < %s->%s_length; ++i) {' % (
+            name, self._name),
+                 '    %s_free(%s->%s_data[i]); ' % (
+            self._refname, name, self._name),
+                 '    %s->%s_data[i] = NULL;' % (name, self._name),
+                 '  }',
+                 '  free(%s->%s_data);' % (name, self._name),
+                 '  %s->%s_data = NULL;' % (name, self._name),
+                 '  %s->%s_length = 0;' % (name, self._name),
+                 '  %s->%s_num_allocated = 0;' % (name, self._name),
+                 '}'
+                 ]
+
+        return code
+
+    def Declaration(self):
+        dcl  = ['struct %s **%s_data;' % (self._refname, self._name),
+                'int %s_length;' % self._name,
+                'int %s_num_allocated;' % self._name ]
+
+        return dcl
+
+def NormalizeLine(line):
+    global white
+    global cppcomment
+    
+    line = cppcomment.sub('', line)
+    line = line.strip()
+    line = white.sub(' ', line)
+
+    return line
+
+def ProcessOneEntry(newstruct, entry):
+    optional = 0
+    array = 0
+    entry_type = ''
+    name = ''
+    tag = ''
+    tag_set = None
+    separator = ''
+    fixed_length = ''
+
+    tokens = entry.split(' ')
+    while tokens:
+        token = tokens[0]
+        tokens = tokens[1:]
+
+        if not entry_type:
+            if not optional and token == 'optional':
+                optional = 1
+                continue
+
+            if not array and token == 'array':
+                array = 1
+                continue
+
+        if not entry_type:
+            entry_type = token
+            continue
+
+        if not name:
+            res = re.match(r'^([^\[\]]+)(\[.*\])?$', token)
+            if not res:
+                print >>sys.stderr, 'Cannot parse name: \"%s\" around %d' % (
+                    entry, line_count)
+                sys.exit(1)
+            name = res.group(1)
+            fixed_length = res.group(2)
+            if fixed_length:
+                fixed_length = fixed_length[1:-1]
+            continue
+
+        if not separator:
+            separator = token
+            if separator != '=':
+                print >>sys.stderr, 'Expected "=" after name \"%s\" got %s' % (
+                    name, token)
+                sys.exit(1)
+            continue
+
+        if not tag_set:
+            tag_set = 1
+            if not re.match(r'^(0x)?[0-9]+$', token):
+                print >>sys.stderr, 'Expected tag number: \"%s\"' % entry
+                sys.exit(1)
+            tag = int(token, 0)
+            continue
+
+        print >>sys.stderr, 'Cannot parse \"%s\"' % entry
+        sys.exit(1)
+
+    if not tag_set:
+        print >>sys.stderr, 'Need tag number: \"%s\"' % entry
+        sys.exit(1)
+
+    # Create the right entry
+    if entry_type == 'bytes':
+        if fixed_length:
+            newentry = EntryBytes(entry_type, name, tag, fixed_length)
+        else:
+            newentry = EntryVarBytes(entry_type, name, tag)
+    elif entry_type == 'int' and not fixed_length:
+        newentry = EntryInt(entry_type, name, tag)
+    elif entry_type == 'string' and not fixed_length:
+        newentry = EntryString(entry_type, name, tag)
+    else:
+        res = re.match(r'^struct\[(%s)\]$' % _STRUCT_RE,
+                       entry_type, re.IGNORECASE)
+        if res:
+            # References another struct defined in our file
+            newentry = EntryStruct(entry_type, name, tag, res.group(1))
+        else:
+            print >>sys.stderr, 'Bad type: "%s" in "%s"' % (entry_type, entry)
+            sys.exit(1)
+
+    structs = []
+        
+    if optional:
+        newentry.MakeOptional()
+    if array:
+        newentry.MakeArray()
+
+    newentry.SetStruct(newstruct)
+    newentry.SetLineCount(line_count)
+    newentry.Verify()
+
+    if array:
+        # We need to encapsulate this entry into a struct
+        newname = newentry.Name()+ '_array'
+
+        # Now borgify the new entry.
+        newentry = EntryArray(newentry)
+        newentry.SetStruct(newstruct)
+        newentry.SetLineCount(line_count)
+        newentry.MakeArray()
+
+    newstruct.AddEntry(newentry)
+
+    return structs
+
+def ProcessStruct(data):
+    tokens = data.split(' ')
+
+    # First three tokens are: 'struct' 'name' '{'
+    newstruct = Struct(tokens[1])
+
+    inside = ' '.join(tokens[3:-1])
+
+    tokens = inside.split(';')
+
+    structs = []
+
+    for entry in tokens:
+        entry = NormalizeLine(entry)
+        if not entry:
+            continue
+
+        # It's possible that new structs get defined in here
+        structs.extend(ProcessOneEntry(newstruct, entry))
+
+    structs.append(newstruct)
+    return structs
+
+def GetNextStruct(file):
+    global line_count
+    global cppdirect
+
+    got_struct = 0
+
+    processed_lines = []
+
+    have_c_comment = 0
+    data = ''
+    while 1:
+        line = file.readline()
+        if not line:
+            break
+        
+        line_count += 1
+        line = line[:-1]
+
+        if not have_c_comment and re.search(r'/\*', line):
+            if re.search(r'/\*.*\*/', line):
+                line = re.sub(r'/\*.*\*/', '', line)
+            else:
+                line = re.sub(r'/\*.*$', '', line)
+                have_c_comment = 1
+
+        if have_c_comment:
+            if not re.search(r'\*/', line):
+                continue
+            have_c_comment = 0
+            line = re.sub(r'^.*\*/', '', line)
+
+        line = NormalizeLine(line)
+
+        if not line:
+            continue
+
+        if not got_struct:
+            if re.match(r'#include ["<].*[>"]', line):
+                cppdirect.append(line)
+                continue
+            
+            if re.match(r'^#(if( |def)|endif)', line):
+                cppdirect.append(line)
+                continue
+
+            if re.match(r'^#define', line):
+                headerdirect.append(line)
+                continue
+
+            if not re.match(r'^struct %s {$' % _STRUCT_RE,
+                            line, re.IGNORECASE):
+                print >>sys.stderr, 'Missing struct on line %d: %s' % (
+                    line_count, line)
+                sys.exit(1)
+            else:
+                got_struct = 1
+                data += line
+            continue
+
+        # We are inside the struct
+        tokens = line.split('}')
+        if len(tokens) == 1:
+            data += ' ' + line
+            continue
+
+        if len(tokens[1]):
+            print >>sys.stderr, 'Trailing garbage after struct on line %d' % (
+                line_count )
+            sys.exit(1)
+
+        # We found the end of the struct
+        data += ' %s}' % tokens[0]
+        break
+
+    # Remove any comments, that might be in there
+    data = re.sub(r'/\*.*\*/', '', data)
+    
+    return data
+        
+
+def Parse(file):
+    """
+    Parses the input file and returns C code and corresponding header file.
+    """
+
+    entities = []
+
+    while 1:
+        # Just gets the whole struct nicely formatted
+        data = GetNextStruct(file)
+
+        if not data:
+            break
+
+        entities.extend(ProcessStruct(data))
+
+    return entities
+
+def GuardName(name):
+    name = '_'.join(name.split('.'))
+    name = '_'.join(name.split('/'))
+    guard = '_'+name.upper()+'_'
+
+    return guard
+
+def HeaderPreamble(name):
+    guard = GuardName(name)
+    pre = (
+        '/*\n'
+        ' * Automatically generated from %s\n'
+        ' */\n\n'
+        '#ifndef %s\n'
+        '#define %s\n\n' ) % (
+        name, guard, guard)
+
+    # insert stdint.h - let's hope everyone has it
+    pre += (
+        '#include <event-config.h>\n'
+        '#ifdef _EVENT_HAVE_STDINT_H\n'
+        '#include <stdint.h>\n'
+        '#endif\n' )
+
+    for statement in headerdirect:
+        pre += '%s\n' % statement
+    if headerdirect:
+        pre += '\n'
+
+    pre += (
+        '#define EVTAG_HAS(msg, member) ((msg)->member##_set == 1)\n'
+        '#ifdef __GNUC__\n'
+        '#define EVTAG_ASSIGN(msg, member, args...) '
+        '(*(msg)->base->member##_assign)(msg, ## args)\n'
+        '#define EVTAG_GET(msg, member, args...) '
+        '(*(msg)->base->member##_get)(msg, ## args)\n'
+        '#else\n'
+        '#define EVTAG_ASSIGN(msg, member, ...) '
+        '(*(msg)->base->member##_assign)(msg, ## __VA_ARGS__)\n'
+        '#define EVTAG_GET(msg, member, ...) '
+        '(*(msg)->base->member##_get)(msg, ## __VA_ARGS__)\n'
+        '#endif\n'
+        '#define EVTAG_ADD(msg, member) (*(msg)->base->member##_add)(msg)\n'
+        '#define EVTAG_LEN(msg, member) ((msg)->member##_length)\n'
+        )
+
+    return pre
+     
+
+def HeaderPostamble(name):
+    guard = GuardName(name)
+    return '#endif  /* %s */' % guard
+
+def BodyPreamble(name):
+    global _NAME
+    global _VERSION
+    
+    header_file = '.'.join(name.split('.')[:-1]) + '.gen.h'
+
+    pre = ( '/*\n'
+            ' * Automatically generated from %s\n'
+            ' * by %s/%s.  DO NOT EDIT THIS FILE.\n'
+            ' */\n\n' ) % (name, _NAME, _VERSION)
+    pre += ( '#include <sys/types.h>\n'
+             '#include <sys/time.h>\n'
+             '#include <stdlib.h>\n'
+             '#include <string.h>\n'
+             '#include <assert.h>\n'
+             '#include <event.h>\n\n' )
+
+    for statement in cppdirect:
+        pre += '%s\n' % statement
+    
+    pre += '\n#include "%s"\n\n' % header_file
+
+    pre += 'void event_err(int eval, const char *fmt, ...);\n'
+    pre += 'void event_warn(const char *fmt, ...);\n'
+    pre += 'void event_errx(int eval, const char *fmt, ...);\n'
+    pre += 'void event_warnx(const char *fmt, ...);\n\n'
+
+    return pre
+
+def main(argv):
+    if len(argv) < 2 or not argv[1]:
+        print >>sys.stderr, 'Need RPC description file as first argument.'
+        sys.exit(1)
+
+    filename = argv[1]
+
+    ext = filename.split('.')[-1]
+    if ext != 'rpc':
+        print >>sys.stderr, 'Unrecognized file extension: %s' % ext
+        sys.exit(1)
+
+    print >>sys.stderr, 'Reading \"%s\"' % filename
+
+    fp = open(filename, 'r')
+    entities = Parse(fp)
+    fp.close()
+
+    header_file = '.'.join(filename.split('.')[:-1]) + '.gen.h'
+    impl_file = '.'.join(filename.split('.')[:-1]) + '.gen.c'
+
+    print >>sys.stderr, '... creating "%s"' % header_file
+    header_fp = open(header_file, 'w')
+    print >>header_fp, HeaderPreamble(filename)
+
+    # Create forward declarations: allows other structs to reference
+    # each other
+    for entry in entities:
+        entry.PrintForwardDeclaration(header_fp)
+    print >>header_fp, ''
+
+    for entry in entities:
+        entry.PrintTags(header_fp)
+        entry.PrintDeclaration(header_fp)
+    print >>header_fp, HeaderPostamble(filename)
+    header_fp.close()
+
+    print >>sys.stderr, '... creating "%s"' % impl_file
+    impl_fp = open(impl_file, 'w')
+    print >>impl_fp, BodyPreamble(filename)
+    for entry in entities:
+        entry.PrintCode(impl_fp)
+    impl_fp.close()
+
+if __name__ == '__main__':
+    main(sys.argv)
diff --git a/libevent/event_tagging.c b/libevent/event_tagging.c
new file mode 100644
index 00000000000..d436e3fd65b
--- /dev/null
+++ b/libevent/event_tagging.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright (c) 2003, 2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <winsock2.h>
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#else
+#include <sys/ioctl.h>
+#endif
+
+#include <sys/queue.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifndef WIN32
+#include <syslog.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "event.h"
+#include "evutil.h"
+#include "log.h"
+
+int evtag_decode_int(ev_uint32_t *pnumber, struct evbuffer *evbuf);
+int evtag_encode_tag(struct evbuffer *evbuf, ev_uint32_t tag);
+int evtag_decode_tag(ev_uint32_t *ptag, struct evbuffer *evbuf);
+
+static struct evbuffer *_buf;	/* not thread safe */
+
+void
+evtag_init(void)
+{
+	if (_buf != NULL)
+		return;
+
+	if ((_buf = evbuffer_new()) == NULL)
+		event_err(1, "%s: malloc", __func__);
+}
+
+/* 
+ * We encode integer's by nibbles; the first nibble contains the number
+ * of significant nibbles - 1;  this allows us to encode up to 64-bit
+ * integers.  This function is byte-order independent.
+ */
+
+void
+encode_int(struct evbuffer *evbuf, ev_uint32_t number)
+{
+	int off = 1, nibbles = 0;
+	ev_uint8_t data[5];
+
+	memset(data, 0, sizeof(ev_uint32_t)+1);
+	while (number) {
+		if (off & 0x1)
+			data[off/2] = (data[off/2] & 0xf0) | (number & 0x0f);
+		else
+			data[off/2] = (data[off/2] & 0x0f) |
+			    ((number & 0x0f) << 4);
+		number >>= 4;
+		off++;
+	}
+
+	if (off > 2)
+		nibbles = off - 2;
+
+	/* Off - 1 is the number of encoded nibbles */
+	data[0] = (data[0] & 0x0f) | ((nibbles & 0x0f) << 4);
+
+	evbuffer_add(evbuf, data, (off + 1) / 2);
+}
+
+/*
+ * Support variable length encoding of tags; we use the high bit in each
+ * octet as a continuation signal.
+ */
+
+int
+evtag_encode_tag(struct evbuffer *evbuf, ev_uint32_t tag)
+{
+	int bytes = 0;
+	ev_uint8_t data[5];
+
+	memset(data, 0, sizeof(data));
+	do {
+		ev_uint8_t lower = tag & 0x7f;
+		tag >>= 7;
+
+		if (tag)
+			lower |= 0x80;
+
+		data[bytes++] = lower;
+	} while (tag);
+
+	if (evbuf != NULL)
+		evbuffer_add(evbuf, data, bytes);
+
+	return (bytes);
+}
+
+static int
+decode_tag_internal(ev_uint32_t *ptag, struct evbuffer *evbuf, int dodrain)
+{
+	ev_uint32_t number = 0;
+	ev_uint8_t *data = EVBUFFER_DATA(evbuf);
+	int len = EVBUFFER_LENGTH(evbuf);
+	int count = 0, shift = 0, done = 0;
+
+	while (count++ < len) {
+		ev_uint8_t lower = *data++;
+		number |= (lower & 0x7f) << shift;
+		shift += 7;
+
+		if (!(lower & 0x80)) {
+			done = 1;
+			break;
+		}
+	}
+
+	if (!done)
+		return (-1);
+
+	if (dodrain)
+		evbuffer_drain(evbuf, count);
+
+	if (ptag != NULL)
+		*ptag = number;
+
+	return (count);
+}
+
+int
+evtag_decode_tag(ev_uint32_t *ptag, struct evbuffer *evbuf)
+{
+	return (decode_tag_internal(ptag, evbuf, 1 /* dodrain */));
+}
+
+/*
+ * Marshal a data type, the general format is as follows:
+ *
+ * tag number: one byte; length: var bytes; payload: var bytes
+ */
+
+void
+evtag_marshal(struct evbuffer *evbuf, ev_uint32_t tag,
+    const void *data, ev_uint32_t len)
+{
+	evtag_encode_tag(evbuf, tag);
+	encode_int(evbuf, len);
+	evbuffer_add(evbuf, (void *)data, len);
+}
+
+/* Marshaling for integers */
+void
+evtag_marshal_int(struct evbuffer *evbuf, ev_uint32_t tag, ev_uint32_t integer)
+{
+	evbuffer_drain(_buf, EVBUFFER_LENGTH(_buf));
+	encode_int(_buf, integer);
+
+	evtag_encode_tag(evbuf, tag);
+	encode_int(evbuf, EVBUFFER_LENGTH(_buf));
+	evbuffer_add_buffer(evbuf, _buf);
+}
+
+void
+evtag_marshal_string(struct evbuffer *buf, ev_uint32_t tag, const char *string)
+{
+	evtag_marshal(buf, tag, string, strlen(string));
+}
+
+void
+evtag_marshal_timeval(struct evbuffer *evbuf, ev_uint32_t tag, struct timeval *tv)
+{
+	evbuffer_drain(_buf, EVBUFFER_LENGTH(_buf));
+
+	encode_int(_buf, tv->tv_sec);
+	encode_int(_buf, tv->tv_usec);
+
+	evtag_marshal(evbuf, tag, EVBUFFER_DATA(_buf),
+	    EVBUFFER_LENGTH(_buf));
+}
+
+static int
+decode_int_internal(ev_uint32_t *pnumber, struct evbuffer *evbuf, int dodrain)
+{
+	ev_uint32_t number = 0;
+	ev_uint8_t *data = EVBUFFER_DATA(evbuf);
+	int len = EVBUFFER_LENGTH(evbuf);
+	int nibbles = 0;
+
+	if (!len)
+		return (-1);
+
+	nibbles = ((data[0] & 0xf0) >> 4) + 1;
+	if (nibbles > 8 || (nibbles >> 1) + 1 > len)
+		return (-1);
+	len = (nibbles >> 1) + 1;
+
+	while (nibbles > 0) {
+		number <<= 4;
+		if (nibbles & 0x1)
+			number |= data[nibbles >> 1] & 0x0f;
+		else
+			number |= (data[nibbles >> 1] & 0xf0) >> 4;
+		nibbles--;
+	}
+
+	if (dodrain)
+		evbuffer_drain(evbuf, len);
+
+	*pnumber = number;
+
+	return (len);
+}
+
+int
+evtag_decode_int(ev_uint32_t *pnumber, struct evbuffer *evbuf)
+{
+	return (decode_int_internal(pnumber, evbuf, 1) == -1 ? -1 : 0);
+}
+
+int
+evtag_peek(struct evbuffer *evbuf, ev_uint32_t *ptag)
+{
+	return (decode_tag_internal(ptag, evbuf, 0 /* dodrain */));
+}
+
+int
+evtag_peek_length(struct evbuffer *evbuf, ev_uint32_t *plength)
+{
+	struct evbuffer tmp;
+	int res, len;
+
+	len = decode_tag_internal(NULL, evbuf, 0 /* dodrain */);
+	if (len == -1)
+		return (-1);
+
+	tmp = *evbuf;
+	tmp.buffer += len;
+	tmp.off -= len;
+
+	res = decode_int_internal(plength, &tmp, 0);
+	if (res == -1)
+		return (-1);
+
+	*plength += res + len;
+
+	return (0);
+}
+
+int
+evtag_payload_length(struct evbuffer *evbuf, ev_uint32_t *plength)
+{
+	struct evbuffer tmp;
+	int res, len;
+
+	len = decode_tag_internal(NULL, evbuf, 0 /* dodrain */);
+	if (len == -1)
+		return (-1);
+
+	tmp = *evbuf;
+	tmp.buffer += len;
+	tmp.off -= len;
+
+	res = decode_int_internal(plength, &tmp, 0);
+	if (res == -1)
+		return (-1);
+
+	return (0);
+}
+
+int
+evtag_consume(struct evbuffer *evbuf)
+{
+	ev_uint32_t len;
+	if (decode_tag_internal(NULL, evbuf, 1 /* dodrain */) == -1)
+		return (-1);
+	if (evtag_decode_int(&len, evbuf) == -1)
+		return (-1);
+	evbuffer_drain(evbuf, len);
+
+	return (0);
+}
+
+/* Reads the data type from an event buffer */
+
+int
+evtag_unmarshal(struct evbuffer *src, ev_uint32_t *ptag, struct evbuffer *dst)
+{
+	ev_uint32_t len;
+	ev_uint32_t integer;
+
+	if (decode_tag_internal(ptag, src, 1 /* dodrain */) == -1)
+		return (-1);
+	if (evtag_decode_int(&integer, src) == -1)
+		return (-1);
+	len = integer;
+
+	if (EVBUFFER_LENGTH(src) < len)
+		return (-1);
+
+	if (evbuffer_add(dst, EVBUFFER_DATA(src), len) == -1)
+		return (-1);
+
+	evbuffer_drain(src, len);
+
+	return (len);
+}
+
+/* Marshaling for integers */
+
+int
+evtag_unmarshal_int(struct evbuffer *evbuf, ev_uint32_t need_tag,
+    ev_uint32_t *pinteger)
+{
+	ev_uint32_t tag;
+	ev_uint32_t len;
+	ev_uint32_t integer;
+
+	if (decode_tag_internal(&tag, evbuf, 1 /* dodrain */) == -1)
+		return (-1);
+	if (need_tag != tag)
+		return (-1);
+	if (evtag_decode_int(&integer, evbuf) == -1)
+		return (-1);
+	len = integer;
+
+	if (EVBUFFER_LENGTH(evbuf) < len)
+		return (-1);
+	
+	evbuffer_drain(_buf, EVBUFFER_LENGTH(_buf));
+	if (evbuffer_add(_buf, EVBUFFER_DATA(evbuf), len) == -1)
+		return (-1);
+
+	evbuffer_drain(evbuf, len);
+
+	return (evtag_decode_int(pinteger, _buf));
+}
+
+/* Unmarshal a fixed length tag */
+
+int
+evtag_unmarshal_fixed(struct evbuffer *src, ev_uint32_t need_tag, void *data,
+    size_t len)
+{
+	ev_uint32_t tag;
+
+	/* Initialize this event buffer so that we can read into it */
+	evbuffer_drain(_buf, EVBUFFER_LENGTH(_buf));
+
+	/* Now unmarshal a tag and check that it matches the tag we want */
+	if (evtag_unmarshal(src, &tag, _buf) == -1 || tag != need_tag)
+		return (-1);
+
+	if (EVBUFFER_LENGTH(_buf) != len)
+		return (-1);
+
+	memcpy(data, EVBUFFER_DATA(_buf), len);
+	return (0);
+}
+
+int
+evtag_unmarshal_string(struct evbuffer *evbuf, ev_uint32_t need_tag,
+    char **pstring)
+{
+	ev_uint32_t tag;
+
+	evbuffer_drain(_buf, EVBUFFER_LENGTH(_buf));
+
+	if (evtag_unmarshal(evbuf, &tag, _buf) == -1 || tag != need_tag)
+		return (-1);
+
+	*pstring = calloc(EVBUFFER_LENGTH(_buf) + 1, 1);
+	if (*pstring == NULL)
+		event_err(1, "%s: calloc", __func__);
+	evbuffer_remove(_buf, *pstring, EVBUFFER_LENGTH(_buf));
+
+	return (0);
+}
+
+int
+evtag_unmarshal_timeval(struct evbuffer *evbuf, ev_uint32_t need_tag,
+    struct timeval *ptv)
+{
+	ev_uint32_t tag;
+	ev_uint32_t integer;
+
+	evbuffer_drain(_buf, EVBUFFER_LENGTH(_buf));
+	if (evtag_unmarshal(evbuf, &tag, _buf) == -1 || tag != need_tag)
+		return (-1);
+
+	if (evtag_decode_int(&integer, _buf) == -1)
+		return (-1);
+	ptv->tv_sec = integer;
+	if (evtag_decode_int(&integer, _buf) == -1)
+		return (-1);
+	ptv->tv_usec = integer;
+
+	return (0);
+}
diff --git a/libevent/evhttp.h b/libevent/evhttp.h
new file mode 100644
index 00000000000..99d16a2f47a
--- /dev/null
+++ b/libevent/evhttp.h
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVHTTP_H_
+#define _EVHTTP_H_
+
+#include <event.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <winsock2.h>
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#endif
+
+/** @file evhttp.h
+ *
+ * Basic support for HTTP serving.
+ *
+ * As libevent is a library for dealing with event notification and most
+ * interesting applications are networked today, I have often found the
+ * need to write HTTP code.  The following prototypes and definitions provide
+ * an application with a minimal interface for making HTTP requests and for
+ * creating a very simple HTTP server.
+ */
+
+/* Response codes */
+#define HTTP_OK			200
+#define HTTP_NOCONTENT		204
+#define HTTP_MOVEPERM		301
+#define HTTP_MOVETEMP		302
+#define HTTP_NOTMODIFIED	304
+#define HTTP_BADREQUEST		400
+#define HTTP_NOTFOUND		404
+#define HTTP_SERVUNAVAIL	503
+
+struct evhttp;
+struct evhttp_request;
+struct evkeyvalq;
+
+/** Create a new HTTP server
+ *
+ * @param base (optional) the event base to receive the HTTP events
+ * @return a pointer to a newly initialized evhttp server structure
+ */
+struct evhttp *evhttp_new(struct event_base *base);
+
+/**
+ * Binds an HTTP server on the specified address and port.
+ *
+ * Can be called multiple times to bind the same http server
+ * to multiple different ports.
+ *
+ * @param http a pointer to an evhttp object
+ * @param address a string containing the IP address to listen(2) on
+ * @param port the port number to listen on
+ * @return a newly allocated evhttp struct
+ * @see evhttp_free()
+ */
+int evhttp_bind_socket(struct evhttp *http, const char *address, u_short port);
+
+/**
+ * Makes an HTTP server accept connections on the specified socket
+ *
+ * This may be useful to create a socket and then fork multiple instances
+ * of an http server, or when a socket has been communicated via file
+ * descriptor passing in situations where an http servers does not have
+ * permissions to bind to a low-numbered port.
+ *
+ * Can be called multiple times to have the http server listen to
+ * multiple different sockets.
+ *
+ * @param http a pointer to an evhttp object
+ * @param fd a socket fd that is ready for accepting connections
+ * @return 0 on success, -1 on failure.
+ * @see evhttp_free(), evhttp_bind_socket()
+ */
+int evhttp_accept_socket(struct evhttp *http, int fd);
+
+/**
+ * Free the previously created HTTP server.
+ *
+ * Works only if no requests are currently being served.
+ *
+ * @param http the evhttp server object to be freed
+ * @see evhttp_start()
+ */
+void evhttp_free(struct evhttp* http);
+
+/** Set a callback for a specified URI */
+void evhttp_set_cb(struct evhttp *, const char *,
+    void (*)(struct evhttp_request *, void *), void *);
+
+/** Removes the callback for a specified URI */
+int evhttp_del_cb(struct evhttp *, const char *);
+
+/** Set a callback for all requests that are not caught by specific callbacks
+ */
+void evhttp_set_gencb(struct evhttp *,
+    void (*)(struct evhttp_request *, void *), void *);
+
+/**
+ * Set the timeout for an HTTP request.
+ *
+ * @param http an evhttp object
+ * @param timeout_in_secs the timeout, in seconds
+ */
+void evhttp_set_timeout(struct evhttp *, int timeout_in_secs);
+
+/* Request/Response functionality */
+
+/**
+ * Send an HTML error message to the client.
+ *
+ * @param req a request object
+ * @param error the HTTP error code
+ * @param reason a brief explanation of the error
+ */
+void evhttp_send_error(struct evhttp_request *req, int error,
+    const char *reason);
+
+/**
+ * Send an HTML reply to the client.
+ *
+ * @param req a request object
+ * @param code the HTTP response code to send
+ * @param reason a brief message to send with the response code
+ * @param databuf the body of the response
+ */
+void evhttp_send_reply(struct evhttp_request *req, int code,
+    const char *reason, struct evbuffer *databuf);
+
+/* Low-level response interface, for streaming/chunked replies */
+void evhttp_send_reply_start(struct evhttp_request *, int, const char *);
+void evhttp_send_reply_chunk(struct evhttp_request *, struct evbuffer *);
+void evhttp_send_reply_end(struct evhttp_request *);
+
+/**
+ * Start an HTTP server on the specified address and port
+ *
+ * DEPRECATED: it does not allow an event base to be specified
+ *
+ * @param address the address to which the HTTP server should be bound
+ * @param port the port number on which the HTTP server should listen
+ * @return an struct evhttp object
+ */
+struct evhttp *evhttp_start(const char *address, u_short port);
+
+/*
+ * Interfaces for making requests
+ */
+enum evhttp_cmd_type { EVHTTP_REQ_GET, EVHTTP_REQ_POST, EVHTTP_REQ_HEAD };
+
+enum evhttp_request_kind { EVHTTP_REQUEST, EVHTTP_RESPONSE };
+
+/**
+ * the request structure that a server receives.
+ * WARNING: expect this structure to change.  I will try to provide
+ * reasonable accessors.
+ */
+struct evhttp_request {
+#if defined(TAILQ_ENTRY)
+	TAILQ_ENTRY(evhttp_request) next;
+#else
+struct {
+	struct evhttp_request *tqe_next;
+	struct evhttp_request **tqe_prev;
+}       next;
+#endif
+
+	/* the connection object that this request belongs to */
+	struct evhttp_connection *evcon;
+	int flags;
+#define EVHTTP_REQ_OWN_CONNECTION	0x0001
+#define EVHTTP_PROXY_REQUEST		0x0002
+
+	struct evkeyvalq *input_headers;
+	struct evkeyvalq *output_headers;
+
+	/* address of the remote host and the port connection came from */
+	char *remote_host;
+	u_short remote_port;
+
+	enum evhttp_request_kind kind;
+	enum evhttp_cmd_type type;
+
+	char *uri;			/* uri after HTTP request was parsed */
+
+	char major;			/* HTTP Major number */
+	char minor;			/* HTTP Minor number */
+
+	int response_code;		/* HTTP Response code */
+	char *response_code_line;	/* Readable response */
+
+	struct evbuffer *input_buffer;	/* read data */
+	ev_int64_t ntoread;
+	int chunked;
+
+	struct evbuffer *output_buffer;	/* outgoing post or data */
+
+	/* Callback */
+	void (*cb)(struct evhttp_request *, void *);
+	void *cb_arg;
+
+	/*
+	 * Chunked data callback - call for each completed chunk if
+	 * specified.  If not specified, all the data is delivered via
+	 * the regular callback.
+	 */
+	void (*chunk_cb)(struct evhttp_request *, void *);
+};
+
+/**
+ * Creates a new request object that needs to be filled in with the request
+ * parameters.  The callback is executed when the request completed or an
+ * error occurred.
+ */
+struct evhttp_request *evhttp_request_new(
+	void (*cb)(struct evhttp_request *, void *), void *arg);
+
+/** enable delivery of chunks to requestor */
+void evhttp_request_set_chunked_cb(struct evhttp_request *,
+    void (*cb)(struct evhttp_request *, void *));
+
+/** Frees the request object and removes associated events. */
+void evhttp_request_free(struct evhttp_request *req);
+
+/**
+ * A connection object that can be used to for making HTTP requests.  The
+ * connection object tries to establish the connection when it is given an
+ * http request object.
+ */
+struct evhttp_connection *evhttp_connection_new(
+	const char *address, unsigned short port);
+
+/** Frees an http connection */
+void evhttp_connection_free(struct evhttp_connection *evcon);
+
+/** sets the ip address from which http connections are made */
+void evhttp_connection_set_local_address(struct evhttp_connection *evcon,
+    const char *address);
+
+/** sets the local port from which http connections are made */
+void evhttp_connection_set_local_port(struct evhttp_connection *evcon,
+    unsigned short port);
+
+/** Sets the timeout for events related to this connection */
+void evhttp_connection_set_timeout(struct evhttp_connection *evcon,
+    int timeout_in_secs);
+
+/** Sets the retry limit for this connection - -1 repeats indefnitely */
+void evhttp_connection_set_retries(struct evhttp_connection *evcon,
+    int retry_max);
+
+/** Set a callback for connection close. */
+void evhttp_connection_set_closecb(struct evhttp_connection *evcon,
+    void (*)(struct evhttp_connection *, void *), void *);
+
+/**
+ * Associates an event base with the connection - can only be called
+ * on a freshly created connection object that has not been used yet.
+ */
+void evhttp_connection_set_base(struct evhttp_connection *evcon,
+    struct event_base *base);
+
+/** Get the remote address and port associated with this connection. */
+void evhttp_connection_get_peer(struct evhttp_connection *evcon,
+    char **address, u_short *port);
+
+/** The connection gets ownership of the request */
+int evhttp_make_request(struct evhttp_connection *evcon,
+    struct evhttp_request *req,
+    enum evhttp_cmd_type type, const char *uri);
+
+const char *evhttp_request_uri(struct evhttp_request *req);
+
+/* Interfaces for dealing with HTTP headers */
+
+const char *evhttp_find_header(const struct evkeyvalq *, const char *);
+int evhttp_remove_header(struct evkeyvalq *, const char *);
+int evhttp_add_header(struct evkeyvalq *, const char *, const char *);
+void evhttp_clear_headers(struct evkeyvalq *);
+
+/* Miscellaneous utility functions */
+
+
+/**
+  Helper function to encode a URI.
+
+  The returned string must be freed by the caller.
+
+  @param uri an unencoded URI
+  @return a newly allocated URI-encoded string
+ */
+char *evhttp_encode_uri(const char *uri);
+
+
+/**
+  Helper function to decode a URI.
+
+  The returned string must be freed by the caller.
+
+  @param uri an encoded URI
+  @return a newly allocated unencoded URI
+ */
+char *evhttp_decode_uri(const char *uri);
+
+
+/**
+ * Helper function to parse out arguments in a query.
+ *
+ * Parsing a uri like
+ *
+ *    http://foo.com/?q=test&s=some+thing
+ *
+ * will result in two entries in the key value queue.
+
+ * The first entry is: key="q", value="test"
+ * The second entry is: key="s", value="some thing"
+ *
+ * @param uri the request URI
+ * @param headers the head of the evkeyval queue
+ */
+void evhttp_parse_query(const char *uri, struct evkeyvalq *headers);
+
+
+/**
+ * Escape HTML character entities in a string.
+ *
+ * Replaces <, >, ", ' and & with &lt;, &gt;, &quot;,
+ * &#039; and &amp; correspondingly.
+ *
+ * The returned string needs to be freed by the caller.
+ *
+ * @param html an unescaped HTML string
+ * @return an escaped HTML string
+ */
+char *evhttp_htmlescape(const char *html);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _EVHTTP_H_ */
diff --git a/libevent/evport.c b/libevent/evport.c
new file mode 100644
index 00000000000..dae6900cc10
--- /dev/null
+++ b/libevent/evport.c
@@ -0,0 +1,513 @@
+/*
+ * Submitted by David Pacheco (dp.spambait@gmail.com)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SUN MICROSYSTEMS, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL SUN MICROSYSTEMS, INC. BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+/*
+ * Copyright (c) 2007 Sun Microsystems. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * evport.c: event backend using Solaris 10 event ports. See port_create(3C).
+ * This implementation is loosely modeled after the one used for select(2) (in
+ * select.c).
+ *
+ * The outstanding events are tracked in a data structure called evport_data.
+ * Each entry in the ed_fds array corresponds to a file descriptor, and contains
+ * pointers to the read and write events that correspond to that fd. (That is,
+ * when the file is readable, the "read" event should handle it, etc.)
+ *
+ * evport_add and evport_del update this data structure. evport_dispatch uses it
+ * to determine where to callback when an event occurs (which it gets from
+ * port_getn). 
+ *
+ * Helper functions are used: grow() grows the file descriptor array as
+ * necessary when large fd's come in. reassociate() takes care of maintaining
+ * the proper file-descriptor/event-port associations.
+ *
+ * As in the select(2) implementation, signals are handled by evsignal.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/time.h>
+#include <assert.h>
+#include <sys/queue.h>
+#include <errno.h>
+#include <poll.h>
+#include <port.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#ifdef CHECK_INVARIANTS
+#include <assert.h>
+#endif
+
+#include "event.h"
+#include "event-internal.h"
+#include "log.h"
+#include "evsignal.h"
+
+
+/*
+ * Default value for ed_nevents, which is the maximum file descriptor number we
+ * can handle. If an event comes in for a file descriptor F > nevents, we will
+ * grow the array of file descriptors, doubling its size.
+ */
+#define DEFAULT_NFDS	16
+
+
+/*
+ * EVENTS_PER_GETN is the maximum number of events to retrieve from port_getn on
+ * any particular call. You can speed things up by increasing this, but it will
+ * (obviously) require more memory.
+ */
+#define EVENTS_PER_GETN 8
+
+/*
+ * Per-file-descriptor information about what events we're subscribed to. These
+ * fields are NULL if no event is subscribed to either of them.
+ */
+
+struct fd_info {
+	struct event* fdi_revt; /* the event responsible for the "read"  */
+	struct event* fdi_wevt; /* the event responsible for the "write" */
+};
+
+#define FDI_HAS_READ(fdi)  ((fdi)->fdi_revt != NULL)
+#define FDI_HAS_WRITE(fdi) ((fdi)->fdi_wevt != NULL)
+#define FDI_HAS_EVENTS(fdi) (FDI_HAS_READ(fdi) || FDI_HAS_WRITE(fdi))
+#define FDI_TO_SYSEVENTS(fdi) (FDI_HAS_READ(fdi) ? POLLIN : 0) | \
+    (FDI_HAS_WRITE(fdi) ? POLLOUT : 0)
+
+struct evport_data {
+	int 		ed_port;	/* event port for system events  */
+	int		ed_nevents;	/* number of allocated fdi's 	 */
+	struct fd_info *ed_fds;		/* allocated fdi table 		 */
+	/* fdi's that we need to reassoc */
+	int ed_pending[EVENTS_PER_GETN]; /* fd's with pending events */
+};
+
+static void*	evport_init	(struct event_base *);
+static int 	evport_add	(void *, struct event *);
+static int 	evport_del	(void *, struct event *);
+static int 	evport_dispatch	(struct event_base *, void *, struct timeval *);
+static void	evport_dealloc	(struct event_base *, void *);
+
+const struct eventop evportops = {
+	"evport",
+	evport_init,
+	evport_add,
+	evport_del,
+	evport_dispatch,
+	evport_dealloc,
+	1 /* need reinit */
+};
+
+/*
+ * Initialize the event port implementation.
+ */
+
+static void*
+evport_init(struct event_base *base)
+{
+	struct evport_data *evpd;
+	int i;
+	/*
+	 * Disable event ports when this environment variable is set 
+	 */
+	if (getenv("EVENT_NOEVPORT"))
+		return (NULL);
+
+	if (!(evpd = calloc(1, sizeof(struct evport_data))))
+		return (NULL);
+
+	if ((evpd->ed_port = port_create()) == -1) {
+		free(evpd);
+		return (NULL);
+	}
+
+	/*
+	 * Initialize file descriptor structure
+	 */
+	evpd->ed_fds = calloc(DEFAULT_NFDS, sizeof(struct fd_info));
+	if (evpd->ed_fds == NULL) {
+		close(evpd->ed_port);
+		free(evpd);
+		return (NULL);
+	}
+	evpd->ed_nevents = DEFAULT_NFDS;
+	for (i = 0; i < EVENTS_PER_GETN; i++)
+		evpd->ed_pending[i] = -1;
+
+	evsignal_init(base);
+
+	return (evpd);
+}
+
+#ifdef CHECK_INVARIANTS
+/*
+ * Checks some basic properties about the evport_data structure. Because it
+ * checks all file descriptors, this function can be expensive when the maximum
+ * file descriptor ever used is rather large.
+ */
+
+static void
+check_evportop(struct evport_data *evpd)
+{
+	assert(evpd);
+	assert(evpd->ed_nevents > 0);
+	assert(evpd->ed_port > 0);
+	assert(evpd->ed_fds > 0);
+
+	/*
+	 * Verify the integrity of the fd_info struct as well as the events to
+	 * which it points (at least, that they're valid references and correct
+	 * for their position in the structure).
+	 */
+	int i;
+	for (i = 0; i < evpd->ed_nevents; ++i) {
+		struct event 	*ev;
+		struct fd_info 	*fdi;
+
+		fdi = &evpd->ed_fds[i];
+		if ((ev = fdi->fdi_revt) != NULL) {
+			assert(ev->ev_fd == i);
+		}
+		if ((ev = fdi->fdi_wevt) != NULL) {
+			assert(ev->ev_fd == i);
+		}
+	}
+}
+
+/*
+ * Verifies very basic integrity of a given port_event.
+ */
+static void
+check_event(port_event_t* pevt)
+{
+	/*
+	 * We've only registered for PORT_SOURCE_FD events. The only
+	 * other thing we can legitimately receive is PORT_SOURCE_ALERT,
+	 * but since we're not using port_alert either, we can assume
+	 * PORT_SOURCE_FD.
+	 */
+	assert(pevt->portev_source == PORT_SOURCE_FD);
+	assert(pevt->portev_user == NULL);
+}
+
+#else
+#define check_evportop(epop)
+#define check_event(pevt)
+#endif /* CHECK_INVARIANTS */
+
+/*
+ * Doubles the size of the allocated file descriptor array.
+ */
+static int
+grow(struct evport_data *epdp, int factor)
+{
+	struct fd_info *tmp;
+	int oldsize = epdp->ed_nevents;
+	int newsize = factor * oldsize;
+	assert(factor > 1);
+
+	check_evportop(epdp);
+
+	tmp = realloc(epdp->ed_fds, sizeof(struct fd_info) * newsize);
+	if (NULL == tmp)
+		return -1;
+	epdp->ed_fds = tmp;
+	memset((char*) (epdp->ed_fds + oldsize), 0, 
+	    (newsize - oldsize)*sizeof(struct fd_info));
+	epdp->ed_nevents = newsize;
+
+	check_evportop(epdp);
+
+	return 0;
+}
+
+
+/*
+ * (Re)associates the given file descriptor with the event port. The OS events
+ * are specified (implicitly) from the fd_info struct.
+ */
+static int
+reassociate(struct evport_data *epdp, struct fd_info *fdip, int fd)
+{
+	int sysevents = FDI_TO_SYSEVENTS(fdip);
+
+	if (sysevents != 0) {
+		if (port_associate(epdp->ed_port, PORT_SOURCE_FD,
+				   fd, sysevents, NULL) == -1) {
+			event_warn("port_associate");
+			return (-1);
+		}
+	}
+
+	check_evportop(epdp);
+
+	return (0);
+}
+
+/*
+ * Main event loop - polls port_getn for some number of events, and processes
+ * them.
+ */
+
+static int
+evport_dispatch(struct event_base *base, void *arg, struct timeval *tv)
+{
+	int i, res;
+	struct evport_data *epdp = arg;
+	port_event_t pevtlist[EVENTS_PER_GETN];
+
+	/*
+	 * port_getn will block until it has at least nevents events. It will
+	 * also return how many it's given us (which may be more than we asked
+	 * for, as long as it's less than our maximum (EVENTS_PER_GETN)) in
+	 * nevents.
+	 */
+	int nevents = 1;
+
+	/*
+	 * We have to convert a struct timeval to a struct timespec
+	 * (only difference is nanoseconds vs. microseconds). If no time-based
+	 * events are active, we should wait for I/O (and tv == NULL).
+	 */
+	struct timespec ts;
+	struct timespec *ts_p = NULL;
+	if (tv != NULL) {
+		ts.tv_sec = tv->tv_sec;
+		ts.tv_nsec = tv->tv_usec * 1000;
+		ts_p = &ts;
+	}
+
+	/*
+	 * Before doing anything else, we need to reassociate the events we hit
+	 * last time which need reassociation. See comment at the end of the
+	 * loop below.
+	 */
+	for (i = 0; i < EVENTS_PER_GETN; ++i) {
+		struct fd_info *fdi = NULL;
+		if (epdp->ed_pending[i] != -1) {
+			fdi = &(epdp->ed_fds[epdp->ed_pending[i]]);
+		}
+
+		if (fdi != NULL && FDI_HAS_EVENTS(fdi)) {
+			int fd = FDI_HAS_READ(fdi) ? fdi->fdi_revt->ev_fd : 
+			    fdi->fdi_wevt->ev_fd;
+			reassociate(epdp, fdi, fd);
+			epdp->ed_pending[i] = -1;
+		}
+	}
+
+	if ((res = port_getn(epdp->ed_port, pevtlist, EVENTS_PER_GETN, 
+		    (unsigned int *) &nevents, ts_p)) == -1) {
+		if (errno == EINTR || errno == EAGAIN) {
+			evsignal_process(base);
+			return (0);
+		} else if (errno == ETIME) {
+			if (nevents == 0)
+				return (0);
+		} else {
+			event_warn("port_getn");
+			return (-1);
+		}
+	} else if (base->sig.evsignal_caught) {
+		evsignal_process(base);
+	}
+	
+	event_debug(("%s: port_getn reports %d events", __func__, nevents));
+
+	for (i = 0; i < nevents; ++i) {
+		struct event *ev;
+		struct fd_info *fdi;
+		port_event_t *pevt = &pevtlist[i];
+		int fd = (int) pevt->portev_object;
+
+		check_evportop(epdp);
+		check_event(pevt);
+		epdp->ed_pending[i] = fd;
+
+		/*
+		 * Figure out what kind of event it was 
+		 * (because we have to pass this to the callback)
+		 */
+		res = 0;
+		if (pevt->portev_events & POLLIN)
+			res |= EV_READ;
+		if (pevt->portev_events & POLLOUT)
+			res |= EV_WRITE;
+
+		assert(epdp->ed_nevents > fd);
+		fdi = &(epdp->ed_fds[fd]);
+
+		/*
+		 * We now check for each of the possible events (READ
+		 * or WRITE).  Then, we activate the event (which will
+		 * cause its callback to be executed).
+		 */
+
+		if ((res & EV_READ) && ((ev = fdi->fdi_revt) != NULL)) {
+			event_active(ev, res, 1);
+		}
+
+		if ((res & EV_WRITE) && ((ev = fdi->fdi_wevt) != NULL)) {
+			event_active(ev, res, 1);
+		}
+	} /* end of all events gotten */
+
+	check_evportop(epdp);
+
+	return (0);
+}
+
+
+/*
+ * Adds the given event (so that you will be notified when it happens via
+ * the callback function).
+ */
+
+static int
+evport_add(void *arg, struct event *ev)
+{
+	struct evport_data *evpd = arg;
+	struct fd_info *fdi;
+	int factor;
+
+	check_evportop(evpd);
+
+	/*
+	 * Delegate, if it's not ours to handle.
+	 */
+	if (ev->ev_events & EV_SIGNAL)
+		return (evsignal_add(ev));
+
+	/*
+	 * If necessary, grow the file descriptor info table
+	 */
+
+	factor = 1;
+	while (ev->ev_fd >= factor * evpd->ed_nevents)
+		factor *= 2;
+
+	if (factor > 1) {
+		if (-1 == grow(evpd, factor)) {
+			return (-1);
+		}
+	}
+
+	fdi = &evpd->ed_fds[ev->ev_fd];
+	if (ev->ev_events & EV_READ)
+		fdi->fdi_revt = ev;
+	if (ev->ev_events & EV_WRITE)
+		fdi->fdi_wevt = ev;
+
+	return reassociate(evpd, fdi, ev->ev_fd);
+}
+
+/*
+ * Removes the given event from the list of events to wait for.
+ */
+
+static int
+evport_del(void *arg, struct event *ev)
+{
+	struct evport_data *evpd = arg;
+	struct fd_info *fdi;
+	int i;
+	int associated = 1;
+
+	check_evportop(evpd);
+
+	/*
+	 * Delegate, if it's not ours to handle
+	 */
+	if (ev->ev_events & EV_SIGNAL) {
+		return (evsignal_del(ev));
+	}
+
+	if (evpd->ed_nevents < ev->ev_fd) {
+		return (-1);
+	}
+
+	for (i = 0; i < EVENTS_PER_GETN; ++i) {
+		if (evpd->ed_pending[i] == ev->ev_fd) {
+			associated = 0;
+			break;
+		}
+	}
+
+	fdi = &evpd->ed_fds[ev->ev_fd];
+	if (ev->ev_events & EV_READ)
+		fdi->fdi_revt = NULL;
+	if (ev->ev_events & EV_WRITE)
+		fdi->fdi_wevt = NULL;
+
+	if (associated) {
+		if (!FDI_HAS_EVENTS(fdi) &&
+		    port_dissociate(evpd->ed_port, PORT_SOURCE_FD,
+		    ev->ev_fd) == -1) {	 
+			/*
+			 * Ignre EBADFD error the fd could have been closed
+			 * before event_del() was called.
+			 */
+			if (errno != EBADFD) {
+				event_warn("port_dissociate");
+				return (-1);
+			}
+		} else {
+			if (FDI_HAS_EVENTS(fdi)) {
+				return (reassociate(evpd, fdi, ev->ev_fd));
+			}
+		}
+	} else {
+		if (fdi->fdi_revt == NULL && fdi->fdi_wevt == NULL) {
+			evpd->ed_pending[i] = -1;
+		}
+	}
+	return 0;
+}
+
+
+static void
+evport_dealloc(struct event_base *base, void *arg)
+{
+	struct evport_data *evpd = arg;
+
+	evsignal_dealloc(base);
+
+	close(evpd->ed_port);
+
+	if (evpd->ed_fds)
+		free(evpd->ed_fds);
+	free(evpd);
+}
diff --git a/libevent/evrpc-internal.h b/libevent/evrpc-internal.h
new file mode 100644
index 00000000000..c900f959f97
--- /dev/null
+++ b/libevent/evrpc-internal.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2006 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVRPC_INTERNAL_H_
+#define _EVRPC_INTERNAL_H_
+
+#include "http-internal.h"
+
+struct evrpc;
+
+#define EVRPC_URI_PREFIX "/.rpc."
+
+struct evrpc_hook {
+	TAILQ_ENTRY(evrpc_hook) (next);
+
+	/* returns -1; if the rpc should be aborted, is allowed to rewrite */
+	int (*process)(struct evhttp_request *, struct evbuffer *, void *);
+	void *process_arg;
+};
+
+TAILQ_HEAD(evrpc_hook_list, evrpc_hook);
+
+/*
+ * this is shared between the base and the pool, so that we can reuse
+ * the hook adding functions; we alias both evrpc_pool and evrpc_base
+ * to this common structure.
+ */
+struct _evrpc_hooks {
+	/* hooks for processing outbound and inbound rpcs */
+	struct evrpc_hook_list in_hooks;
+	struct evrpc_hook_list out_hooks;
+};
+
+#define input_hooks common.in_hooks
+#define output_hooks common.out_hooks
+
+struct evrpc_base {
+	struct _evrpc_hooks common;
+
+	/* the HTTP server under which we register our RPC calls */
+	struct evhttp* http_server;
+
+	/* a list of all RPCs registered with us */
+	TAILQ_HEAD(evrpc_list, evrpc) registered_rpcs;
+};
+
+struct evrpc_req_generic;
+void evrpc_reqstate_free(struct evrpc_req_generic* rpc_state);
+
+/* A pool for holding evhttp_connection objects */
+struct evrpc_pool {
+	struct _evrpc_hooks common;
+
+	struct event_base *base;
+
+	struct evconq connections;
+
+	int timeout;
+
+	TAILQ_HEAD(evrpc_requestq, evrpc_request_wrapper) requests;
+};
+
+
+#endif /* _EVRPC_INTERNAL_H_ */
diff --git a/libevent/evrpc.c b/libevent/evrpc.c
new file mode 100644
index 00000000000..8b3b071d0bf
--- /dev/null
+++ b/libevent/evrpc.c
@@ -0,0 +1,661 @@
+/*
+ * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <winsock2.h>
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#endif
+
+#include <sys/types.h>
+#ifndef WIN32
+#include <sys/socket.h>
+#endif
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <sys/queue.h>
+#include <stdio.h>
+#include <stdlib.h>
+#ifndef WIN32
+#include <unistd.h>
+#endif
+#ifndef HAVE_TAILQFOREACH
+#include <event-internal.h>
+#endif
+
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <assert.h>
+
+#include "event.h"
+#include "evrpc.h"
+#include "evrpc-internal.h"
+#include "evhttp.h"
+#include "evutil.h"
+#include "log.h"
+
+struct evrpc_base *
+evrpc_init(struct evhttp *http_server)
+{
+	struct evrpc_base* base = calloc(1, sizeof(struct evrpc_base));
+	if (base == NULL)
+		return (NULL);
+
+	/* we rely on the tagging sub system */
+	evtag_init();
+
+	TAILQ_INIT(&base->registered_rpcs);
+	TAILQ_INIT(&base->input_hooks);
+	TAILQ_INIT(&base->output_hooks);
+	base->http_server = http_server;
+
+	return (base);
+}
+
+void
+evrpc_free(struct evrpc_base *base)
+{
+	struct evrpc *rpc;
+	struct evrpc_hook *hook;
+
+	while ((rpc = TAILQ_FIRST(&base->registered_rpcs)) != NULL) {
+		assert(evrpc_unregister_rpc(base, rpc->uri));
+	}
+	while ((hook = TAILQ_FIRST(&base->input_hooks)) != NULL) {
+		assert(evrpc_remove_hook(base, EVRPC_INPUT, hook));
+	}
+	while ((hook = TAILQ_FIRST(&base->output_hooks)) != NULL) {
+		assert(evrpc_remove_hook(base, EVRPC_OUTPUT, hook));
+	}
+	free(base);
+}
+
+void *
+evrpc_add_hook(void *vbase,
+    enum EVRPC_HOOK_TYPE hook_type,
+    int (*cb)(struct evhttp_request *, struct evbuffer *, void *),
+    void *cb_arg)
+{
+	struct _evrpc_hooks *base = vbase;
+	struct evrpc_hook_list *head = NULL;
+	struct evrpc_hook *hook = NULL;
+	switch (hook_type) {
+	case EVRPC_INPUT:
+		head = &base->in_hooks;
+		break;
+	case EVRPC_OUTPUT:
+		head = &base->out_hooks;
+		break;
+	default:
+		assert(hook_type == EVRPC_INPUT || hook_type == EVRPC_OUTPUT);
+	}
+
+	hook = calloc(1, sizeof(struct evrpc_hook));
+	assert(hook != NULL);
+	
+	hook->process = cb;
+	hook->process_arg = cb_arg;
+	TAILQ_INSERT_TAIL(head, hook, next);
+
+	return (hook);
+}
+
+static int
+evrpc_remove_hook_internal(struct evrpc_hook_list *head, void *handle)
+{
+	struct evrpc_hook *hook = NULL;
+	TAILQ_FOREACH(hook, head, next) {
+		if (hook == handle) {
+			TAILQ_REMOVE(head, hook, next);
+			free(hook);
+			return (1);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * remove the hook specified by the handle
+ */
+
+int
+evrpc_remove_hook(void *vbase, enum EVRPC_HOOK_TYPE hook_type, void *handle)
+{
+	struct _evrpc_hooks *base = vbase;
+	struct evrpc_hook_list *head = NULL;
+	switch (hook_type) {
+	case EVRPC_INPUT:
+		head = &base->in_hooks;
+		break;
+	case EVRPC_OUTPUT:
+		head = &base->out_hooks;
+		break;
+	default:
+		assert(hook_type == EVRPC_INPUT || hook_type == EVRPC_OUTPUT);
+	}
+
+	return (evrpc_remove_hook_internal(head, handle));
+}
+
+static int
+evrpc_process_hooks(struct evrpc_hook_list *head,
+    struct evhttp_request *req, struct evbuffer *evbuf)
+{
+	struct evrpc_hook *hook;
+	TAILQ_FOREACH(hook, head, next) {
+		if (hook->process(req, evbuf, hook->process_arg) == -1)
+			return (-1);
+	}
+
+	return (0);
+}
+
+static void evrpc_pool_schedule(struct evrpc_pool *pool);
+static void evrpc_request_cb(struct evhttp_request *, void *);
+void evrpc_request_done(struct evrpc_req_generic*);
+
+/*
+ * Registers a new RPC with the HTTP server.   The evrpc object is expected
+ * to have been filled in via the EVRPC_REGISTER_OBJECT macro which in turn
+ * calls this function.
+ */
+
+static char *
+evrpc_construct_uri(const char *uri)
+{
+	char *constructed_uri;
+	int constructed_uri_len;
+
+	constructed_uri_len = strlen(EVRPC_URI_PREFIX) + strlen(uri) + 1;
+	if ((constructed_uri = malloc(constructed_uri_len)) == NULL)
+		event_err(1, "%s: failed to register rpc at %s",
+		    __func__, uri);
+	memcpy(constructed_uri, EVRPC_URI_PREFIX, strlen(EVRPC_URI_PREFIX));
+	memcpy(constructed_uri + strlen(EVRPC_URI_PREFIX), uri, strlen(uri));
+	constructed_uri[constructed_uri_len - 1] = '\0';
+
+	return (constructed_uri);
+}
+
+int
+evrpc_register_rpc(struct evrpc_base *base, struct evrpc *rpc,
+    void (*cb)(struct evrpc_req_generic *, void *), void *cb_arg)
+{
+	char *constructed_uri = evrpc_construct_uri(rpc->uri);
+
+	rpc->base = base;
+	rpc->cb = cb;
+	rpc->cb_arg = cb_arg;
+
+	TAILQ_INSERT_TAIL(&base->registered_rpcs, rpc, next);
+
+	evhttp_set_cb(base->http_server,
+	    constructed_uri,
+	    evrpc_request_cb,
+	    rpc);
+	
+	free(constructed_uri);
+
+	return (0);
+}
+
+int
+evrpc_unregister_rpc(struct evrpc_base *base, const char *name)
+{
+	char *registered_uri = NULL;
+	struct evrpc *rpc;
+
+	/* find the right rpc; linear search might be slow */
+	TAILQ_FOREACH(rpc, &base->registered_rpcs, next) {
+		if (strcmp(rpc->uri, name) == 0)
+			break;
+	}
+	if (rpc == NULL) {
+		/* We did not find an RPC with this name */
+		return (-1);
+	}
+	TAILQ_REMOVE(&base->registered_rpcs, rpc, next);
+	
+	free((char *)rpc->uri);
+	free(rpc);
+
+        registered_uri = evrpc_construct_uri(name);
+
+	/* remove the http server callback */
+	assert(evhttp_del_cb(base->http_server, registered_uri) == 0);
+
+	free(registered_uri);
+	return (0);
+}
+
+static void
+evrpc_request_cb(struct evhttp_request *req, void *arg)
+{
+	struct evrpc *rpc = arg;
+	struct evrpc_req_generic *rpc_state = NULL;
+
+	/* let's verify the outside parameters */
+	if (req->type != EVHTTP_REQ_POST ||
+	    EVBUFFER_LENGTH(req->input_buffer) <= 0)
+		goto error;
+
+	/*
+	 * we might want to allow hooks to suspend the processing,
+	 * but at the moment, we assume that they just act as simple
+	 * filters.
+	 */
+	if (evrpc_process_hooks(&rpc->base->input_hooks,
+		req, req->input_buffer) == -1)
+		goto error;
+
+	rpc_state = calloc(1, sizeof(struct evrpc_req_generic));
+	if (rpc_state == NULL)
+		goto error;
+
+	/* let's check that we can parse the request */
+	rpc_state->request = rpc->request_new();
+	if (rpc_state->request == NULL)
+		goto error;
+
+	rpc_state->rpc = rpc;
+
+	if (rpc->request_unmarshal(
+		    rpc_state->request, req->input_buffer) == -1) {
+		/* we failed to parse the request; that's a bummer */
+		goto error;
+	}
+
+	/* at this point, we have a well formed request, prepare the reply */
+
+	rpc_state->reply = rpc->reply_new();
+	if (rpc_state->reply == NULL)
+		goto error;
+
+	rpc_state->http_req = req;
+	rpc_state->done = evrpc_request_done;
+
+	/* give the rpc to the user; they can deal with it */
+	rpc->cb(rpc_state, rpc->cb_arg);
+
+	return;
+
+error:
+	evrpc_reqstate_free(rpc_state);
+	evhttp_send_error(req, HTTP_SERVUNAVAIL, "Service Error");
+	return;
+}
+
+void
+evrpc_reqstate_free(struct evrpc_req_generic* rpc_state)
+{
+	/* clean up all memory */
+	if (rpc_state != NULL) {
+		struct evrpc *rpc = rpc_state->rpc;
+
+		if (rpc_state->request != NULL)
+			rpc->request_free(rpc_state->request);
+		if (rpc_state->reply != NULL)
+			rpc->reply_free(rpc_state->reply);
+		free(rpc_state);
+	}
+}
+
+void
+evrpc_request_done(struct evrpc_req_generic* rpc_state)
+{
+	struct evhttp_request *req = rpc_state->http_req;
+	struct evrpc *rpc = rpc_state->rpc;
+	struct evbuffer* data = NULL;
+
+	if (rpc->reply_complete(rpc_state->reply) == -1) {
+		/* the reply was not completely filled in.  error out */
+		goto error;
+	}
+
+	if ((data = evbuffer_new()) == NULL) {
+		/* out of memory */
+		goto error;
+	}
+
+	/* serialize the reply */
+	rpc->reply_marshal(data, rpc_state->reply);
+
+	/* do hook based tweaks to the request */
+	if (evrpc_process_hooks(&rpc->base->output_hooks,
+		req, data) == -1)
+		goto error;
+
+	/* on success, we are going to transmit marshaled binary data */
+	if (evhttp_find_header(req->output_headers, "Content-Type") == NULL) {
+		evhttp_add_header(req->output_headers,
+		    "Content-Type", "application/octet-stream");
+	}
+
+	evhttp_send_reply(req, HTTP_OK, "OK", data);
+
+	evbuffer_free(data);
+
+	evrpc_reqstate_free(rpc_state);
+
+	return;
+
+error:
+	if (data != NULL)
+		evbuffer_free(data);
+	evrpc_reqstate_free(rpc_state);
+	evhttp_send_error(req, HTTP_SERVUNAVAIL, "Service Error");
+	return;
+}
+
+/* Client implementation of RPC site */
+
+static int evrpc_schedule_request(struct evhttp_connection *connection,
+    struct evrpc_request_wrapper *ctx);
+
+struct evrpc_pool *
+evrpc_pool_new(struct event_base *base)
+{
+	struct evrpc_pool *pool = calloc(1, sizeof(struct evrpc_pool));
+	if (pool == NULL)
+		return (NULL);
+
+	TAILQ_INIT(&pool->connections);
+	TAILQ_INIT(&pool->requests);
+
+	TAILQ_INIT(&pool->input_hooks);
+	TAILQ_INIT(&pool->output_hooks);
+
+	pool->base = base;
+	pool->timeout = -1;
+
+	return (pool);
+}
+
+static void
+evrpc_request_wrapper_free(struct evrpc_request_wrapper *request)
+{
+	free(request->name);
+	free(request);
+}
+
+void
+evrpc_pool_free(struct evrpc_pool *pool)
+{
+	struct evhttp_connection *connection;
+	struct evrpc_request_wrapper *request;
+	struct evrpc_hook *hook;
+
+	while ((request = TAILQ_FIRST(&pool->requests)) != NULL) {
+		TAILQ_REMOVE(&pool->requests, request, next);
+		/* if this gets more complicated we need our own function */
+		evrpc_request_wrapper_free(request);
+	}
+
+	while ((connection = TAILQ_FIRST(&pool->connections)) != NULL) {
+		TAILQ_REMOVE(&pool->connections, connection, next);
+		evhttp_connection_free(connection);
+	}
+
+	while ((hook = TAILQ_FIRST(&pool->input_hooks)) != NULL) {
+		assert(evrpc_remove_hook(pool, EVRPC_INPUT, hook));
+	}
+
+	while ((hook = TAILQ_FIRST(&pool->output_hooks)) != NULL) {
+		assert(evrpc_remove_hook(pool, EVRPC_OUTPUT, hook));
+	}
+
+	free(pool);
+}
+
+/*
+ * Add a connection to the RPC pool.   A request scheduled on the pool
+ * may use any available connection.
+ */
+
+void
+evrpc_pool_add_connection(struct evrpc_pool *pool,
+    struct evhttp_connection *connection) {
+	assert(connection->http_server == NULL);
+	TAILQ_INSERT_TAIL(&pool->connections, connection, next);
+
+	/*
+	 * associate an event base with this connection
+	 */
+	if (pool->base != NULL)
+		evhttp_connection_set_base(connection, pool->base);
+
+	/* 
+	 * unless a timeout was specifically set for a connection,
+	 * the connection inherits the timeout from the pool.
+	 */
+	if (connection->timeout == -1)
+		connection->timeout = pool->timeout;
+
+	/* 
+	 * if we have any requests pending, schedule them with the new
+	 * connections.
+	 */
+
+	if (TAILQ_FIRST(&pool->requests) != NULL) {
+		struct evrpc_request_wrapper *request = 
+		    TAILQ_FIRST(&pool->requests);
+		TAILQ_REMOVE(&pool->requests, request, next);
+		evrpc_schedule_request(connection, request);
+	}
+}
+
+void
+evrpc_pool_set_timeout(struct evrpc_pool *pool, int timeout_in_secs)
+{
+	struct evhttp_connection *evcon;
+	TAILQ_FOREACH(evcon, &pool->connections, next) {
+		evcon->timeout = timeout_in_secs;
+	}
+	pool->timeout = timeout_in_secs;
+}
+
+
+static void evrpc_reply_done(struct evhttp_request *, void *);
+static void evrpc_request_timeout(int, short, void *);
+
+/*
+ * Finds a connection object associated with the pool that is currently
+ * idle and can be used to make a request.
+ */
+static struct evhttp_connection *
+evrpc_pool_find_connection(struct evrpc_pool *pool)
+{
+	struct evhttp_connection *connection;
+	TAILQ_FOREACH(connection, &pool->connections, next) {
+		if (TAILQ_FIRST(&connection->requests) == NULL)
+			return (connection);
+	}
+
+	return (NULL);
+}
+
+/*
+ * We assume that the ctx is no longer queued on the pool.
+ */
+static int
+evrpc_schedule_request(struct evhttp_connection *connection,
+    struct evrpc_request_wrapper *ctx)
+{
+	struct evhttp_request *req = NULL;
+	struct evrpc_pool *pool = ctx->pool;
+	struct evrpc_status status;
+	char *uri = NULL;
+	int res = 0;
+
+	if ((req = evhttp_request_new(evrpc_reply_done, ctx)) == NULL)
+		goto error;
+
+	/* serialize the request data into the output buffer */
+	ctx->request_marshal(req->output_buffer, ctx->request);
+
+	uri = evrpc_construct_uri(ctx->name);
+	if (uri == NULL)
+		goto error;
+
+	/* we need to know the connection that we might have to abort */
+	ctx->evcon = connection;
+
+	/* apply hooks to the outgoing request */
+	if (evrpc_process_hooks(&pool->output_hooks,
+		req, req->output_buffer) == -1)
+		goto error;
+
+	if (pool->timeout > 0) {
+		/* 
+		 * a timeout after which the whole rpc is going to be aborted.
+		 */
+		struct timeval tv;
+		evutil_timerclear(&tv);
+		tv.tv_sec = pool->timeout;
+		evtimer_add(&ctx->ev_timeout, &tv);
+	}
+
+	/* start the request over the connection */
+	res = evhttp_make_request(connection, req, EVHTTP_REQ_POST, uri);
+	free(uri);
+
+	if (res == -1)
+		goto error;
+
+	return (0);
+
+error:
+	memset(&status, 0, sizeof(status));
+	status.error = EVRPC_STATUS_ERR_UNSTARTED;
+	(*ctx->cb)(&status, ctx->request, ctx->reply, ctx->cb_arg);
+	evrpc_request_wrapper_free(ctx);
+	return (-1);
+}
+
+int
+evrpc_make_request(struct evrpc_request_wrapper *ctx)
+{
+	struct evrpc_pool *pool = ctx->pool;
+
+	/* initialize the event structure for this rpc */
+	evtimer_set(&ctx->ev_timeout, evrpc_request_timeout, ctx);
+	if (pool->base != NULL)
+		event_base_set(pool->base, &ctx->ev_timeout);
+
+	/* we better have some available connections on the pool */
+	assert(TAILQ_FIRST(&pool->connections) != NULL);
+
+	/* 
+	 * if no connection is available, we queue the request on the pool,
+	 * the next time a connection is empty, the rpc will be send on that.
+	 */
+	TAILQ_INSERT_TAIL(&pool->requests, ctx, next);
+
+	evrpc_pool_schedule(pool);
+
+	return (0);
+}
+
+static void
+evrpc_reply_done(struct evhttp_request *req, void *arg)
+{
+	struct evrpc_request_wrapper *ctx = arg;
+	struct evrpc_pool *pool = ctx->pool;
+	struct evrpc_status status;
+	int res = -1;
+	
+	/* cancel any timeout we might have scheduled */
+	event_del(&ctx->ev_timeout);
+
+	memset(&status, 0, sizeof(status));
+	status.http_req = req;
+
+	/* we need to get the reply now */
+	if (req != NULL) {
+		/* apply hooks to the incoming request */
+		if (evrpc_process_hooks(&pool->input_hooks,
+			req, req->input_buffer) == -1) {
+			status.error = EVRPC_STATUS_ERR_HOOKABORTED;
+			res = -1;
+		} else {
+			res = ctx->reply_unmarshal(ctx->reply,
+			    req->input_buffer);
+			if (res == -1) {
+				status.error = EVRPC_STATUS_ERR_BADPAYLOAD;
+			}
+		}
+	} else {
+		status.error = EVRPC_STATUS_ERR_TIMEOUT;
+	}
+
+	if (res == -1) {
+		/* clear everything that we might have written previously */
+		ctx->reply_clear(ctx->reply);
+	}
+
+	(*ctx->cb)(&status, ctx->request, ctx->reply, ctx->cb_arg);
+	
+	evrpc_request_wrapper_free(ctx);
+
+	/* the http layer owns the request structure */
+
+	/* see if we can schedule another request */
+	evrpc_pool_schedule(pool);
+}
+
+static void
+evrpc_pool_schedule(struct evrpc_pool *pool)
+{
+	struct evrpc_request_wrapper *ctx = TAILQ_FIRST(&pool->requests);
+	struct evhttp_connection *evcon;
+
+	/* if no requests are pending, we have no work */
+	if (ctx == NULL)
+		return;
+
+	if ((evcon = evrpc_pool_find_connection(pool)) != NULL) {
+		TAILQ_REMOVE(&pool->requests, ctx, next);
+		evrpc_schedule_request(evcon, ctx);
+	}
+}
+
+static void
+evrpc_request_timeout(int fd, short what, void *arg)
+{
+	struct evrpc_request_wrapper *ctx = arg;
+	struct evhttp_connection *evcon = ctx->evcon;
+	assert(evcon != NULL);
+
+	evhttp_connection_fail(evcon, EVCON_HTTP_TIMEOUT);
+}
diff --git a/libevent/evrpc.h b/libevent/evrpc.h
new file mode 100644
index 00000000000..7c16b95c775
--- /dev/null
+++ b/libevent/evrpc.h
@@ -0,0 +1,486 @@
+/*
+ * Copyright (c) 2006 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVRPC_H_
+#define _EVRPC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @file evrpc.h
+ *
+ * This header files provides basic support for an RPC server and client.
+ *
+ * To support RPCs in a server, every supported RPC command needs to be
+ * defined and registered.
+ *
+ * EVRPC_HEADER(SendCommand, Request, Reply);
+ *
+ *  SendCommand is the name of the RPC command.
+ *  Request is the name of a structure generated by event_rpcgen.py.
+ *    It contains all parameters relating to the SendCommand RPC.  The
+ *    server needs to fill in the Reply structure.
+ *  Reply is the name of a structure generated by event_rpcgen.py.  It
+ *    contains the answer to the RPC.
+ *
+ * To register an RPC with an HTTP server, you need to first create an RPC
+ * base with:
+ *
+ *   struct evrpc_base *base = evrpc_init(http);
+ *
+ * A specific RPC can then be registered with
+ *
+ * EVRPC_REGISTER(base, SendCommand, Request, Reply,  FunctionCB, arg);
+ *
+ * when the server receives an appropriately formatted RPC, the user callback
+ * is invokved.   The callback needs to fill in the reply structure.
+ *
+ * void FunctionCB(EVRPC_STRUCT(SendCommand)* rpc, void *arg);
+ *
+ * To send the reply, call EVRPC_REQUEST_DONE(rpc);
+ *
+ * See the regression test for an example.
+ */
+
+struct evbuffer;
+struct event_base;
+struct evrpc_req_generic;
+
+/* Encapsulates a request */
+struct evrpc {
+	TAILQ_ENTRY(evrpc) next;
+
+	/* the URI at which the request handler lives */
+	const char* uri;
+
+	/* creates a new request structure */
+	void *(*request_new)(void);
+
+	/* frees the request structure */
+	void (*request_free)(void *);
+
+	/* unmarshals the buffer into the proper request structure */
+	int (*request_unmarshal)(void *, struct evbuffer *);
+
+	/* creates a new reply structure */
+	void *(*reply_new)(void);
+
+	/* creates a new reply structure */
+	void (*reply_free)(void *);
+
+	/* verifies that the reply is valid */
+	int (*reply_complete)(void *);
+	
+	/* marshals the reply into a buffer */
+	void (*reply_marshal)(struct evbuffer*, void *);
+
+	/* the callback invoked for each received rpc */
+	void (*cb)(struct evrpc_req_generic *, void *);
+	void *cb_arg;
+
+	/* reference for further configuration */
+	struct evrpc_base *base;
+};
+
+/** The type of a specific RPC Message
+ *
+ * @param rpcname the name of the RPC message
+ */
+#define EVRPC_STRUCT(rpcname) struct evrpc_req__##rpcname
+
+struct evhttp_request;
+struct evrpc_status;
+
+/* We alias the RPC specific structs to this voided one */
+struct evrpc_req_generic {
+	/* the unmarshaled request object */
+	void *request;
+
+	/* the empty reply object that needs to be filled in */
+	void *reply;
+
+	/* 
+	 * the static structure for this rpc; that can be used to
+	 * automatically unmarshal and marshal the http buffers.
+	 */
+	struct evrpc *rpc;
+
+	/*
+	 * the http request structure on which we need to answer.
+	 */
+	struct evhttp_request* http_req;
+
+	/*
+	 * callback to reply and finish answering this rpc
+	 */
+	void (*done)(struct evrpc_req_generic* rpc); 
+};
+
+/** Creates the definitions and prototypes for an RPC
+ *
+ * You need to use EVRPC_HEADER to create structures and function prototypes
+ * needed by the server and client implementation.  The structures have to be
+ * defined in an .rpc file and converted to source code via event_rpcgen.py
+ *
+ * @param rpcname the name of the RPC
+ * @param reqstruct the name of the RPC request structure
+ * @param replystruct the name of the RPC reply structure
+ * @see EVRPC_GENERATE()
+ */
+#define EVRPC_HEADER(rpcname, reqstruct, rplystruct) \
+EVRPC_STRUCT(rpcname) {	\
+	struct reqstruct* request; \
+	struct rplystruct* reply; \
+	struct evrpc* rpc; \
+	struct evhttp_request* http_req; \
+	void (*done)(struct evrpc_status *, \
+	    struct evrpc* rpc, void *request, void *reply);	     \
+};								     \
+int evrpc_send_request_##rpcname(struct evrpc_pool *, \
+    struct reqstruct *, struct rplystruct *, \
+    void (*)(struct evrpc_status *, \
+	struct reqstruct *, struct rplystruct *, void *cbarg),	\
+    void *);
+
+/** Generates the code for receiving and sending an RPC message
+ *
+ * EVRPC_GENERATE is used to create the code corresponding to sending
+ * and receiving a particular RPC message
+ *
+ * @param rpcname the name of the RPC
+ * @param reqstruct the name of the RPC request structure
+ * @param replystruct the name of the RPC reply structure
+ * @see EVRPC_HEADER()
+ */
+#define EVRPC_GENERATE(rpcname, reqstruct, rplystruct) \
+int evrpc_send_request_##rpcname(struct evrpc_pool *pool, \
+    struct reqstruct *request, struct rplystruct *reply, \
+    void (*cb)(struct evrpc_status *, \
+	struct reqstruct *, struct rplystruct *, void *cbarg),	\
+    void *cbarg) { \
+	struct evrpc_status status;				    \
+	struct evrpc_request_wrapper *ctx;			    \
+	ctx = (struct evrpc_request_wrapper *) \
+	    malloc(sizeof(struct evrpc_request_wrapper));	    \
+	if (ctx == NULL)					    \
+		goto error;					    \
+	ctx->pool = pool;					    \
+	ctx->evcon = NULL;					    \
+	ctx->name = strdup(#rpcname);				    \
+	if (ctx->name == NULL) {				    \
+		free(ctx);					    \
+		goto error;					    \
+	}							    \
+	ctx->cb = (void (*)(struct evrpc_status *, \
+		void *, void *, void *))cb;			    \
+	ctx->cb_arg = cbarg;					    \
+	ctx->request = (void *)request;				    \
+	ctx->reply = (void *)reply;				    \
+	ctx->request_marshal = (void (*)(struct evbuffer *, void *))reqstruct##_marshal; \
+	ctx->reply_clear = (void (*)(void *))rplystruct##_clear;    \
+	ctx->reply_unmarshal = (int (*)(void *, struct evbuffer *))rplystruct##_unmarshal; \
+	return (evrpc_make_request(ctx));			    \
+error:								    \
+	memset(&status, 0, sizeof(status));			    \
+	status.error = EVRPC_STATUS_ERR_UNSTARTED;		    \
+	(*(cb))(&status, request, reply, cbarg);		    \
+	return (-1);						    \
+}
+
+/** Provides access to the HTTP request object underlying an RPC
+ *
+ * Access to the underlying http object; can be used to look at headers or
+ * for getting the remote ip address
+ *
+ * @param rpc_req the rpc request structure provided to the server callback
+ * @return an struct evhttp_request object that can be inspected for
+ * HTTP headers or sender information.
+ */
+#define EVRPC_REQUEST_HTTP(rpc_req) (rpc_req)->http_req
+
+/** Creates the reply to an RPC request
+ * 
+ * EVRPC_REQUEST_DONE is used to answer a request; the reply is expected
+ * to have been filled in.  The request and reply pointers become invalid
+ * after this call has finished.
+ * 
+ * @param rpc_req the rpc request structure provided to the server callback
+ */
+#define EVRPC_REQUEST_DONE(rpc_req) do { \
+  struct evrpc_req_generic *_req = (struct evrpc_req_generic *)(rpc_req); \
+  _req->done(_req); \
+} while (0)
+  
+
+/* Takes a request object and fills it in with the right magic */
+#define EVRPC_REGISTER_OBJECT(rpc, name, request, reply) \
+  do { \
+    (rpc)->uri = strdup(#name); \
+    if ((rpc)->uri == NULL) {			 \
+      fprintf(stderr, "failed to register object\n");	\
+      exit(1);						\
+    } \
+    (rpc)->request_new = (void *(*)(void))request##_new; \
+    (rpc)->request_free = (void (*)(void *))request##_free; \
+    (rpc)->request_unmarshal = (int (*)(void *, struct evbuffer *))request##_unmarshal; \
+    (rpc)->reply_new = (void *(*)(void))reply##_new; \
+    (rpc)->reply_free = (void (*)(void *))reply##_free; \
+    (rpc)->reply_complete = (int (*)(void *))reply##_complete; \
+    (rpc)->reply_marshal = (void (*)(struct evbuffer*, void *))reply##_marshal; \
+  } while (0)
+
+struct evrpc_base;
+struct evhttp;
+
+/* functions to start up the rpc system */
+
+/** Creates a new rpc base from which RPC requests can be received
+ *
+ * @param server a pointer to an existing HTTP server
+ * @return a newly allocated evrpc_base struct
+ * @see evrpc_free()
+ */
+struct evrpc_base *evrpc_init(struct evhttp *server);
+
+/** 
+ * Frees the evrpc base
+ *
+ * For now, you are responsible for making sure that no rpcs are ongoing.
+ *
+ * @param base the evrpc_base object to be freed
+ * @see evrpc_init
+ */
+void evrpc_free(struct evrpc_base *base);
+
+/** register RPCs with the HTTP Server
+ *
+ * registers a new RPC with the HTTP server, each RPC needs to have
+ * a unique name under which it can be identified.
+ *
+ * @param base the evrpc_base structure in which the RPC should be
+ *   registered.
+ * @param name the name of the RPC
+ * @param request the name of the RPC request structure
+ * @param reply the name of the RPC reply structure
+ * @param callback the callback that should be invoked when the RPC
+ * is received.  The callback has the following prototype
+ *   void (*callback)(EVRPC_STRUCT(Message)* rpc, void *arg)
+ * @param cbarg an additional parameter that can be passed to the callback.
+ *   The parameter can be used to carry around state.
+ */
+#define EVRPC_REGISTER(base, name, request, reply, callback, cbarg) \
+  do { \
+    struct evrpc* rpc = (struct evrpc *)calloc(1, sizeof(struct evrpc)); \
+    EVRPC_REGISTER_OBJECT(rpc, name, request, reply); \
+    evrpc_register_rpc(base, rpc, \
+	(void (*)(struct evrpc_req_generic*, void *))callback, cbarg);	\
+  } while (0)
+
+int evrpc_register_rpc(struct evrpc_base *, struct evrpc *,
+    void (*)(struct evrpc_req_generic*, void *), void *);
+
+/**
+ * Unregisters an already registered RPC
+ *
+ * @param base the evrpc_base object from which to unregister an RPC
+ * @param name the name of the rpc to unregister
+ * @return -1 on error or 0 when successful.
+ * @see EVRPC_REGISTER()
+ */
+#define EVRPC_UNREGISTER(base, name) evrpc_unregister_rpc(base, #name)
+
+int evrpc_unregister_rpc(struct evrpc_base *base, const char *name);
+
+/*
+ * Client-side RPC support
+ */
+
+struct evrpc_pool;
+struct evhttp_connection;
+
+/** 
+ * provides information about the completed RPC request.
+ */
+struct evrpc_status {
+#define EVRPC_STATUS_ERR_NONE		0
+#define EVRPC_STATUS_ERR_TIMEOUT	1
+#define EVRPC_STATUS_ERR_BADPAYLOAD	2
+#define EVRPC_STATUS_ERR_UNSTARTED	3
+#define EVRPC_STATUS_ERR_HOOKABORTED	4
+	int error;
+
+	/* for looking at headers or other information */
+	struct evhttp_request *http_req;
+};
+
+struct evrpc_request_wrapper {
+	TAILQ_ENTRY(evrpc_request_wrapper) next;
+
+        /* pool on which this rpc request is being made */
+        struct evrpc_pool *pool;
+
+        /* connection on which the request is being sent */
+	struct evhttp_connection *evcon;
+
+	/* event for implementing request timeouts */
+	struct event ev_timeout;
+
+	/* the name of the rpc */
+	char *name;
+
+	/* callback */
+	void (*cb)(struct evrpc_status*, void *request, void *reply, void *arg);
+	void *cb_arg;
+
+	void *request;
+	void *reply;
+
+	/* unmarshals the buffer into the proper request structure */
+	void (*request_marshal)(struct evbuffer *, void *);
+
+	/* removes all stored state in the reply */
+	void (*reply_clear)(void *);
+
+	/* marshals the reply into a buffer */
+	int (*reply_unmarshal)(void *, struct evbuffer*);
+};
+
+/** launches an RPC and sends it to the server
+ *
+ * EVRPC_MAKE_REQUEST() is used by the client to send an RPC to the server.
+ *
+ * @param name the name of the RPC
+ * @param pool the evrpc_pool that contains the connection objects over which
+ *   the request should be sent.
+ * @param request a pointer to the RPC request structure - it contains the
+ *   data to be sent to the server.
+ * @param reply a pointer to the RPC reply structure.  It is going to be filled
+ *   if the request was answered successfully
+ * @param cb the callback to invoke when the RPC request has been answered
+ * @param cbarg an additional argument to be passed to the client
+ * @return 0 on success, -1 on failure
+ */
+#define EVRPC_MAKE_REQUEST(name, pool, request, reply, cb, cbarg)	\
+	evrpc_send_request_##name(pool, request, reply, cb, cbarg)
+
+int evrpc_make_request(struct evrpc_request_wrapper *);
+
+/** creates an rpc connection pool
+ * 
+ * a pool has a number of connections associated with it.
+ * rpc requests are always made via a pool.
+ *
+ * @param base a pointer to an struct event_based object; can be left NULL
+ *   in singled-threaded applications
+ * @return a newly allocated struct evrpc_pool object
+ * @see evrpc_pool_free()
+ */
+struct evrpc_pool *evrpc_pool_new(struct event_base *base);
+/** frees an rpc connection pool
+ *
+ * @param pool a pointer to an evrpc_pool allocated via evrpc_pool_new()
+ * @see evrpc_pool_new()
+ */
+void evrpc_pool_free(struct evrpc_pool *pool);
+/*
+ * adds a connection over which rpc can be dispatched.  the connection
+ * object must have been newly created.
+ */
+void evrpc_pool_add_connection(struct evrpc_pool *, 
+    struct evhttp_connection *);
+
+/**
+ * Sets the timeout in secs after which a request has to complete.  The
+ * RPC is completely aborted if it does not complete by then.  Setting
+ * the timeout to 0 means that it never timeouts and can be used to
+ * implement callback type RPCs.
+ *
+ * Any connection already in the pool will be updated with the new
+ * timeout.  Connections added to the pool after set_timeout has be
+ * called receive the pool timeout only if no timeout has been set
+ * for the connection itself.
+ *
+ * @param pool a pointer to a struct evrpc_pool object
+ * @param timeout_in_secs the number of seconds after which a request should
+ *   timeout and a failure be returned to the callback.
+ */
+void evrpc_pool_set_timeout(struct evrpc_pool *pool, int timeout_in_secs);
+
+/**
+ * Hooks for changing the input and output of RPCs; this can be used to
+ * implement compression, authentication, encryption, ...
+ */
+
+enum EVRPC_HOOK_TYPE {
+	EVRPC_INPUT,		/**< apply the function to an input hook */
+	EVRPC_OUTPUT		/**< apply the function to an output hook */
+};
+
+#ifndef WIN32
+/** Deprecated alias for EVRPC_INPUT.  Not available on windows, where it
+ * conflicts with platform headers. */
+#define INPUT EVRPC_INPUT
+/** Deprecated alias for EVRPC_OUTPUT.  Not available on windows, where it
+ * conflicts with platform headers. */
+#define OUTPUT EVRPC_OUTPUT
+#endif
+
+/** adds a processing hook to either an rpc base or rpc pool
+ *
+ * If a hook returns -1, the processing is aborted.
+ *
+ * The add functions return handles that can be used for removing hooks.
+ *
+ * @param vbase a pointer to either struct evrpc_base or struct evrpc_pool
+ * @param hook_type either INPUT or OUTPUT
+ * @param cb the callback to call when the hook is activated
+ * @param cb_arg an additional argument for the callback
+ * @return a handle to the hook so it can be removed later
+ * @see evrpc_remove_hook()
+ */
+void *evrpc_add_hook(void *vbase,
+    enum EVRPC_HOOK_TYPE hook_type,
+    int (*cb)(struct evhttp_request *, struct evbuffer *, void *),
+    void *cb_arg);
+
+/** removes a previously added hook
+ *
+ * @param vbase a pointer to either struct evrpc_base or struct evrpc_pool
+ * @param hook_type either INPUT or OUTPUT
+ * @param handle a handle returned by evrpc_add_hook()
+ * @return 1 on success or 0 on failure
+ * @see evrpc_add_hook()
+ */
+int evrpc_remove_hook(void *vbase,
+    enum EVRPC_HOOK_TYPE hook_type,
+    void *handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _EVRPC_H_ */
diff --git a/libevent/evsignal.h b/libevent/evsignal.h
new file mode 100644
index 00000000000..9b0405eea09
--- /dev/null
+++ b/libevent/evsignal.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVSIGNAL_H_
+#define _EVSIGNAL_H_
+
+#include <signal.h>
+
+typedef void (*ev_sighandler_t)(int);
+
+struct evsignal_info {
+	struct event ev_signal;
+	int ev_signal_pair[2];
+	int ev_signal_added;
+	volatile sig_atomic_t evsignal_caught;
+	struct event_list evsigevents[NSIG];
+	sig_atomic_t evsigcaught[NSIG];
+#ifdef HAVE_SIGACTION
+	struct sigaction **sh_old;
+#else
+	ev_sighandler_t **sh_old;
+#endif
+	int sh_old_max;
+};
+int evsignal_init(struct event_base *);
+void evsignal_process(struct event_base *);
+int evsignal_add(struct event *);
+int evsignal_del(struct event *);
+void evsignal_dealloc(struct event_base *);
+
+#endif /* _EVSIGNAL_H_ */
diff --git a/libevent/evutil.c b/libevent/evutil.c
new file mode 100644
index 00000000000..7d22d3eac16
--- /dev/null
+++ b/libevent/evutil.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2007 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef WIN32
+#include <winsock2.h>
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#endif
+
+#include <sys/types.h>
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#include <errno.h>
+#if defined WIN32 && !defined(HAVE_GETTIMEOFDAY_H)
+#include <sys/timeb.h>
+#endif
+#include <stdio.h>
+
+#include "evutil.h"
+#include "log.h"
+
+int
+evutil_socketpair(int family, int type, int protocol, int fd[2])
+{
+#ifndef WIN32
+	return socketpair(family, type, protocol, fd);
+#else
+	/* This code is originally from Tor.  Used with permission. */
+
+	/* This socketpair does not work when localhost is down. So
+	 * it's really not the same thing at all. But it's close enough
+	 * for now, and really, when localhost is down sometimes, we
+	 * have other problems too.
+	 */
+	int listener = -1;
+	int connector = -1;
+	int acceptor = -1;
+	struct sockaddr_in listen_addr;
+	struct sockaddr_in connect_addr;
+	int size;
+	int saved_errno = -1;
+
+	if (protocol
+#ifdef AF_UNIX
+		|| family != AF_UNIX
+#endif
+		) {
+		EVUTIL_SET_SOCKET_ERROR(WSAEAFNOSUPPORT);
+		return -1;
+	}
+	if (!fd) {
+		EVUTIL_SET_SOCKET_ERROR(WSAEINVAL);
+		return -1;
+	}
+
+	listener = socket(AF_INET, type, 0);
+	if (listener < 0)
+		return -1;
+	memset(&listen_addr, 0, sizeof(listen_addr));
+	listen_addr.sin_family = AF_INET;
+	listen_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	listen_addr.sin_port = 0;	/* kernel chooses port.	 */
+	if (bind(listener, (struct sockaddr *) &listen_addr, sizeof (listen_addr))
+		== -1)
+		goto tidy_up_and_fail;
+	if (listen(listener, 1) == -1)
+		goto tidy_up_and_fail;
+
+	connector = socket(AF_INET, type, 0);
+	if (connector < 0)
+		goto tidy_up_and_fail;
+	/* We want to find out the port number to connect to.  */
+	size = sizeof(connect_addr);
+	if (getsockname(listener, (struct sockaddr *) &connect_addr, &size) == -1)
+		goto tidy_up_and_fail;
+	if (size != sizeof (connect_addr))
+		goto abort_tidy_up_and_fail;
+	if (connect(connector, (struct sockaddr *) &connect_addr,
+				sizeof(connect_addr)) == -1)
+		goto tidy_up_and_fail;
+
+	size = sizeof(listen_addr);
+	acceptor = accept(listener, (struct sockaddr *) &listen_addr, &size);
+	if (acceptor < 0)
+		goto tidy_up_and_fail;
+	if (size != sizeof(listen_addr))
+		goto abort_tidy_up_and_fail;
+	EVUTIL_CLOSESOCKET(listener);
+	/* Now check we are talking to ourself by matching port and host on the
+	   two sockets.	 */
+	if (getsockname(connector, (struct sockaddr *) &connect_addr, &size) == -1)
+		goto tidy_up_and_fail;
+	if (size != sizeof (connect_addr)
+		|| listen_addr.sin_family != connect_addr.sin_family
+		|| listen_addr.sin_addr.s_addr != connect_addr.sin_addr.s_addr
+		|| listen_addr.sin_port != connect_addr.sin_port)
+		goto abort_tidy_up_and_fail;
+	fd[0] = connector;
+	fd[1] = acceptor;
+
+	return 0;
+
+ abort_tidy_up_and_fail:
+	saved_errno = WSAECONNABORTED;
+ tidy_up_and_fail:
+	if (saved_errno < 0)
+		saved_errno = WSAGetLastError();
+	if (listener != -1)
+		EVUTIL_CLOSESOCKET(listener);
+	if (connector != -1)
+		EVUTIL_CLOSESOCKET(connector);
+	if (acceptor != -1)
+		EVUTIL_CLOSESOCKET(acceptor);
+
+	EVUTIL_SET_SOCKET_ERROR(saved_errno);
+	return -1;
+#endif
+}
+
+int
+evutil_make_socket_nonblocking(int fd)
+{
+#ifdef WIN32
+	{
+		unsigned long nonblocking = 1;
+		ioctlsocket(fd, FIONBIO, (unsigned long*) &nonblocking);
+	}
+#else
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
+		event_warn("fcntl(O_NONBLOCK)");
+		return -1;
+}	
+#endif
+	return 0;
+}
+
+ev_int64_t
+evutil_strtoll(const char *s, char **endptr, int base)
+{
+#ifdef HAVE_STRTOLL
+	return (ev_int64_t)strtoll(s, endptr, base);
+#elif SIZEOF_LONG == 8
+	return (ev_int64_t)strtol(s, endptr, base);
+#elif defined(WIN32) && defined(_MSC_VER) && _MSC_VER < 1300
+	/* XXXX on old versions of MS APIs, we only support base
+	 * 10. */
+	ev_int64_t r;
+	if (base != 10)
+		return 0;
+	r = (ev_int64_t) _atoi64(s);
+	while (isspace(*s))
+		++s;
+	while (isdigit(*s))
+		++s;
+	if (endptr)
+		*endptr = (char*) s;
+	return r;
+#elif defined(WIN32)
+	return (ev_int64_t) _strtoi64(s, endptr, base);
+#else
+#error "I don't know how to parse 64-bit integers."
+#endif
+}
+
+#ifndef HAVE_GETTIMEOFDAY
+int
+evutil_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	struct _timeb tb;
+
+	if(tv == NULL)
+		return -1;
+
+	_ftime(&tb);
+	tv->tv_sec = (long) tb.time;
+	tv->tv_usec = ((int) tb.millitm) * 1000;
+	return 0;
+}
+#endif
+
+int
+evutil_snprintf(char *buf, size_t buflen, const char *format, ...)
+{
+	int r;
+	va_list ap;
+	va_start(ap, format);
+	r = evutil_vsnprintf(buf, buflen, format, ap);
+	va_end(ap);
+	return r;
+}
+
+int
+evutil_vsnprintf(char *buf, size_t buflen, const char *format, va_list ap)
+{
+#ifdef _MSC_VER
+	int r = _vsnprintf(buf, buflen, format, ap);
+	buf[buflen-1] = '\0';
+	if (r >= 0)
+		return r;
+	else
+		return _vscprintf(format, ap);
+#else
+	int r = vsnprintf(buf, buflen, format, ap);
+	buf[buflen-1] = '\0';
+	return r;
+#endif
+}
diff --git a/libevent/evutil.h b/libevent/evutil.h
new file mode 100644
index 00000000000..ea751ddf7b7
--- /dev/null
+++ b/libevent/evutil.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2007 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVUTIL_H_
+#define _EVUTIL_H_
+
+/** @file evutil.h
+
+  Common convenience functions for cross-platform portability and
+  related socket manipulations.
+
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <config.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#elif defined(HAVE_INTTYPES_H)
+#include <inttypes.h>
+#endif
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#include <stdarg.h>
+
+#ifdef HAVE_UINT64_T
+#define ev_uint64_t uint64_t
+#define ev_int64_t int64_t
+#elif defined(WIN32)
+#define ev_uint64_t unsigned __int64
+#define ev_int64_t signed __int64
+#elif SIZEOF_LONG_LONG == 8
+#define ev_uint64_t unsigned long long
+#define ev_int64_t long long
+#elif SIZEOF_LONG == 8
+#define ev_uint64_t unsigned long
+#define ev_int64_t long
+#else
+#error "No way to define ev_uint64_t"
+#endif
+
+#ifdef HAVE_UINT32_T
+#define ev_uint32_t uint32_t
+#elif defined(WIN32)
+#define ev_uint32_t unsigned int
+#elif SIZEOF_LONG == 4
+#define ev_uint32_t unsigned long
+#elif SIZEOF_INT == 4
+#define ev_uint32_t unsigned int
+#else
+#error "No way to define ev_uint32_t"
+#endif
+
+#ifdef HAVE_UINT16_T
+#define ev_uint16_t uint16_t
+#elif defined(WIN32)
+#define ev_uint16_t unsigned short
+#elif SIZEOF_INT == 2
+#define ev_uint16_t unsigned int
+#elif SIZEOF_SHORT == 2
+#define ev_uint16_t unsigned short
+#else
+#error "No way to define ev_uint16_t"
+#endif
+
+#ifdef HAVE_UINT8_T
+#define ev_uint8_t uint8_t
+#else
+#define ev_uint8_t unsigned char
+#endif
+
+int evutil_socketpair(int d, int type, int protocol, int sv[2]);
+int evutil_make_socket_nonblocking(int sock);
+#ifdef WIN32
+#define EVUTIL_CLOSESOCKET(s) closesocket(s)
+#else
+#define EVUTIL_CLOSESOCKET(s) close(s)
+#endif
+
+#ifdef WIN32
+#define EVUTIL_SOCKET_ERROR() WSAGetLastError()
+#define EVUTIL_SET_SOCKET_ERROR(errcode)		\
+	do { WSASetLastError(errcode); } while (0)
+#else
+#define EVUTIL_SOCKET_ERROR() (errno)
+#define EVUTIL_SET_SOCKET_ERROR(errcode)		\
+		do { errno = (errcode); } while (0)
+#endif
+
+/*
+ * Manipulation functions for struct timeval
+ */
+#ifdef HAVE_TIMERADD
+#define evutil_timeradd(tvp, uvp, vvp) timeradd((tvp), (uvp), (vvp))
+#define evutil_timersub(tvp, uvp, vvp) timersub((tvp), (uvp), (vvp))
+#else
+#define evutil_timeradd(tvp, uvp, vvp)							\
+	do {														\
+		(vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec;			\
+		(vvp)->tv_usec = (tvp)->tv_usec + (uvp)->tv_usec;       \
+		if ((vvp)->tv_usec >= 1000000) {						\
+			(vvp)->tv_sec++;									\
+			(vvp)->tv_usec -= 1000000;							\
+		}														\
+	} while (0)
+#define	evutil_timersub(tvp, uvp, vvp)						\
+	do {													\
+		(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;		\
+		(vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;	\
+		if ((vvp)->tv_usec < 0) {							\
+			(vvp)->tv_sec--;								\
+			(vvp)->tv_usec += 1000000;						\
+		}													\
+	} while (0)
+#endif /* !HAVE_HAVE_TIMERADD */
+
+#ifdef HAVE_TIMERCLEAR
+#define evutil_timerclear(tvp) timerclear(tvp)
+#else
+#define	evutil_timerclear(tvp)	(tvp)->tv_sec = (tvp)->tv_usec = 0
+#endif
+
+#define	evutil_timercmp(tvp, uvp, cmp)							\
+	(((tvp)->tv_sec == (uvp)->tv_sec) ?							\
+	 ((tvp)->tv_usec cmp (uvp)->tv_usec) :						\
+	 ((tvp)->tv_sec cmp (uvp)->tv_sec))
+
+#ifdef HAVE_TIMERISSET
+#define evutil_timerisset(tvp) timerisset(tvp)
+#else
+#define	evutil_timerisset(tvp)	((tvp)->tv_sec || (tvp)->tv_usec)
+#endif
+
+
+/* big-int related functions */
+ev_int64_t evutil_strtoll(const char *s, char **endptr, int base);
+
+
+#ifdef HAVE_GETTIMEOFDAY
+#define evutil_gettimeofday(tv, tz) gettimeofday((tv), (tz))
+#else
+int evutil_gettimeofday(struct timeval *tv, struct timezone *tz);
+#endif
+
+int evutil_snprintf(char *buf, size_t buflen, const char *format, ...)
+#ifdef __GNUC__
+	__attribute__((format(printf, 3, 4)))
+#endif
+	;
+int evutil_vsnprintf(char *buf, size_t buflen, const char *format, va_list ap);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _EVUTIL_H_ */
diff --git a/libevent/http-internal.h b/libevent/http-internal.h
new file mode 100644
index 00000000000..9cd03cdd2bc
--- /dev/null
+++ b/libevent/http-internal.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright 2001 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * This header file contains definitions for dealing with HTTP requests
+ * that are internal to libevent.  As user of the library, you should not
+ * need to know about these.
+ */
+
+#ifndef _HTTP_H_
+#define _HTTP_H_
+
+#define HTTP_CONNECT_TIMEOUT	45
+#define HTTP_WRITE_TIMEOUT	50
+#define HTTP_READ_TIMEOUT	50
+
+#define HTTP_PREFIX		"http://"
+#define HTTP_DEFAULTPORT	80
+
+enum message_read_status {
+	ALL_DATA_READ = 1,
+	MORE_DATA_EXPECTED = 0,
+	DATA_CORRUPTED = -1,
+	REQUEST_CANCELED = -2
+};
+
+enum evhttp_connection_error {
+	EVCON_HTTP_TIMEOUT,
+	EVCON_HTTP_EOF,
+	EVCON_HTTP_INVALID_HEADER
+};
+
+struct evbuffer;
+struct addrinfo;
+struct evhttp_request;
+
+/* A stupid connection object - maybe make this a bufferevent later */
+
+enum evhttp_connection_state {
+	EVCON_DISCONNECTED,	/**< not currently connected not trying either*/
+	EVCON_CONNECTING,	/**< tries to currently connect */
+	EVCON_IDLE,		/**< connection is established */
+	EVCON_READING_FIRSTLINE,/**< reading Request-Line (incoming conn) or
+				 **< Status-Line (outgoing conn) */
+	EVCON_READING_HEADERS,	/**< reading request/response headers */
+	EVCON_READING_BODY,	/**< reading request/response body */
+	EVCON_READING_TRAILER,	/**< reading request/response chunked trailer */
+	EVCON_WRITING		/**< writing request/response headers/body */
+};
+
+struct event_base;
+
+struct evhttp_connection {
+	/* we use tailq only if they were created for an http server */
+	TAILQ_ENTRY(evhttp_connection) (next);
+
+	int fd;
+	struct event ev;
+	struct event close_ev;
+	struct evbuffer *input_buffer;
+	struct evbuffer *output_buffer;
+	
+	char *bind_address;		/* address to use for binding the src */
+	u_short bind_port;		/* local port for binding the src */
+
+	char *address;			/* address to connect to */
+	u_short port;
+
+	int flags;
+#define EVHTTP_CON_INCOMING	0x0001	/* only one request on it ever */
+#define EVHTTP_CON_OUTGOING	0x0002  /* multiple requests possible */
+#define EVHTTP_CON_CLOSEDETECT  0x0004  /* detecting if persistent close */
+
+	int timeout;			/* timeout in seconds for events */
+	int retry_cnt;			/* retry count */
+	int retry_max;			/* maximum number of retries */
+	
+	enum evhttp_connection_state state;
+
+	/* for server connections, the http server they are connected with */
+	struct evhttp *http_server;
+
+	TAILQ_HEAD(evcon_requestq, evhttp_request) requests;
+	
+						   void (*cb)(struct evhttp_connection *, void *);
+	void *cb_arg;
+	
+	void (*closecb)(struct evhttp_connection *, void *);
+	void *closecb_arg;
+
+	struct event_base *base;
+};
+
+struct evhttp_cb {
+	TAILQ_ENTRY(evhttp_cb) next;
+
+	char *what;
+
+	void (*cb)(struct evhttp_request *req, void *);
+	void *cbarg;
+};
+
+/* both the http server as well as the rpc system need to queue connections */
+TAILQ_HEAD(evconq, evhttp_connection);
+
+/* each bound socket is stored in one of these */
+struct evhttp_bound_socket {
+	TAILQ_ENTRY(evhttp_bound_socket) (next);
+
+	struct event  bind_ev;
+};
+
+struct evhttp {
+	TAILQ_HEAD(boundq, evhttp_bound_socket) sockets;
+
+	TAILQ_HEAD(httpcbq, evhttp_cb) callbacks;
+        struct evconq connections;
+
+        int timeout;
+
+	void (*gencb)(struct evhttp_request *req, void *);
+	void *gencbarg;
+
+	struct event_base *base;
+};
+
+/* resets the connection; can be reused for more requests */
+void evhttp_connection_reset(struct evhttp_connection *);
+
+/* connects if necessary */
+int evhttp_connection_connect(struct evhttp_connection *);
+
+/* notifies the current request that it failed; resets connection */
+void evhttp_connection_fail(struct evhttp_connection *,
+    enum evhttp_connection_error error);
+
+void evhttp_get_request(struct evhttp *, int, struct sockaddr *, socklen_t);
+
+int evhttp_hostportfile(char *, char **, u_short *, char **);
+
+int evhttp_parse_firstline(struct evhttp_request *, struct evbuffer*);
+int evhttp_parse_headers(struct evhttp_request *, struct evbuffer*);
+
+void evhttp_start_read(struct evhttp_connection *);
+void evhttp_make_header(struct evhttp_connection *, struct evhttp_request *);
+
+void evhttp_write_buffer(struct evhttp_connection *,
+    void (*)(struct evhttp_connection *, void *), void *);
+
+/* response sending HTML the data in the buffer */
+void evhttp_response_code(struct evhttp_request *, int, const char *);
+void evhttp_send_page(struct evhttp_request *, struct evbuffer *);
+
+#endif /* _HTTP_H */
diff --git a/libevent/http.c b/libevent/http.c
new file mode 100644
index 00000000000..871bc2e4d0c
--- /dev/null
+++ b/libevent/http.c
@@ -0,0 +1,2830 @@
+/*
+ * Copyright (c) 2002-2006 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_SYS_IOCCOM_H
+#include <sys/ioccom.h>
+#endif
+
+#ifndef WIN32
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#endif
+
+#include <sys/queue.h>
+
+#ifndef HAVE_TAILQFOREACH
+#include <event-internal.h>
+#endif
+
+#ifndef WIN32
+#include <netinet/in.h>
+#include <netdb.h>
+#endif
+
+#ifdef WIN32
+#include <winsock2.h>
+#endif
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifndef WIN32
+#include <syslog.h>
+#endif
+#include <signal.h>
+#include <time.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+
+#undef timeout_pending
+#undef timeout_initialized
+
+#include "strlcpy-internal.h"
+#include "event.h"
+#include "evhttp.h"
+#include "evutil.h"
+#include "log.h"
+#include "http-internal.h"
+
+#ifdef WIN32
+#define strcasecmp _stricmp
+#define strncasecmp _strnicmp
+#define strdup _strdup
+#endif
+
+#ifndef HAVE_GETNAMEINFO
+#define NI_MAXSERV 32
+#define NI_MAXHOST 1025
+
+#define NI_NUMERICHOST 1
+#define NI_NUMERICSERV 2
+
+static int
+fake_getnameinfo(const struct sockaddr *sa, size_t salen, char *host, 
+	size_t hostlen, char *serv, size_t servlen, int flags)
+{
+        struct sockaddr_in *sin = (struct sockaddr_in *)sa;
+        
+        if (serv != NULL) {
+				char tmpserv[16];
+				evutil_snprintf(tmpserv, sizeof(tmpserv),
+					"%d", ntohs(sin->sin_port));
+                if (strlcpy(serv, tmpserv, servlen) >= servlen)
+                        return (-1);
+        }
+
+        if (host != NULL) {
+                if (flags & NI_NUMERICHOST) {
+                        if (strlcpy(host, inet_ntoa(sin->sin_addr),
+                            hostlen) >= hostlen)
+                                return (-1);
+                        else
+                                return (0);
+                } else {
+						struct hostent *hp;
+                        hp = gethostbyaddr((char *)&sin->sin_addr, 
+                            sizeof(struct in_addr), AF_INET);
+                        if (hp == NULL)
+                                return (-2);
+                        
+                        if (strlcpy(host, hp->h_name, hostlen) >= hostlen)
+                                return (-1);
+                        else
+                                return (0);
+                }
+        }
+        return (0);
+}
+
+#endif
+
+#ifndef HAVE_GETADDRINFO
+struct addrinfo {
+	int ai_family;
+	int ai_socktype;
+	int ai_protocol;
+	size_t ai_addrlen;
+	struct sockaddr *ai_addr;
+	struct addrinfo *ai_next;
+};
+static int
+fake_getaddrinfo(const char *hostname, struct addrinfo *ai)
+{
+	struct hostent *he = NULL;
+	struct sockaddr_in *sa;
+	if (hostname) {
+		he = gethostbyname(hostname);
+		if (!he)
+			return (-1);
+	}
+	ai->ai_family = he ? he->h_addrtype : AF_INET;
+	ai->ai_socktype = SOCK_STREAM;
+	ai->ai_protocol = 0;
+	ai->ai_addrlen = sizeof(struct sockaddr_in);
+	if (NULL == (ai->ai_addr = malloc(ai->ai_addrlen)))
+		return (-1);
+	sa = (struct sockaddr_in*)ai->ai_addr;
+	memset(sa, 0, ai->ai_addrlen);
+	if (he) {
+		sa->sin_family = he->h_addrtype;
+		memcpy(&sa->sin_addr, he->h_addr_list[0], he->h_length);
+	} else {
+		sa->sin_family = AF_INET;
+		sa->sin_addr.s_addr = INADDR_ANY;
+	}
+	ai->ai_next = NULL;
+	return (0);
+}
+static void
+fake_freeaddrinfo(struct addrinfo *ai)
+{
+	free(ai->ai_addr);
+}
+#endif
+
+#ifndef MIN
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#endif
+
+/* wrapper for setting the base from the http server */
+#define EVHTTP_BASE_SET(x, y) do { \
+	if ((x)->base != NULL) event_base_set((x)->base, y);	\
+} while (0) 
+
+extern int debug;
+
+static int socket_connect(int fd, const char *address, unsigned short port);
+static int bind_socket_ai(struct addrinfo *, int reuse);
+static int bind_socket(const char *, u_short, int reuse);
+static void name_from_addr(struct sockaddr *, socklen_t, char **, char **);
+static int evhttp_associate_new_request_with_connection(
+	struct evhttp_connection *evcon);
+static void evhttp_connection_start_detectclose(
+	struct evhttp_connection *evcon);
+static void evhttp_connection_stop_detectclose(
+	struct evhttp_connection *evcon);
+static void evhttp_request_dispatch(struct evhttp_connection* evcon);
+static void evhttp_read_firstline(struct evhttp_connection *evcon,
+				  struct evhttp_request *req);
+static void evhttp_read_header(struct evhttp_connection *evcon,
+    struct evhttp_request *req);
+static int evhttp_add_header_internal(struct evkeyvalq *headers,
+    const char *key, const char *value);
+static int evhttp_decode_uri_internal(const char *uri, size_t length,
+    char *ret, int always_decode_plus);
+
+void evhttp_read(int, short, void *);
+void evhttp_write(int, short, void *);
+
+#ifndef HAVE_STRSEP
+/* strsep replacement for platforms that lack it.  Only works if
+ * del is one character long. */
+static char *
+strsep(char **s, const char *del)
+{
+	char *d, *tok;
+	assert(strlen(del) == 1);
+	if (!s || !*s)
+		return NULL;
+	tok = *s;
+	d = strstr(tok, del);
+	if (d) {
+		*d = '\0';
+		*s = d + 1;
+	} else
+		*s = NULL;
+	return tok;
+}
+#endif
+
+static const char *
+html_replace(char ch, char *buf)
+{
+	switch (ch) {
+	case '<':
+		return "&lt;";
+	case '>':
+		return "&gt;";
+	case '"':
+		return "&quot;";
+	case '\'':
+		return "&#039;";
+	case '&':
+		return "&amp;";
+	default:
+		break;
+	}
+
+	/* Echo the character back */
+	buf[0] = ch;
+	buf[1] = '\0';
+	
+	return buf;
+}
+
+/*
+ * Replaces <, >, ", ' and & with &lt;, &gt;, &quot;,
+ * &#039; and &amp; correspondingly.
+ *
+ * The returned string needs to be freed by the caller.
+ */
+
+char *
+evhttp_htmlescape(const char *html)
+{
+	int i, new_size = 0, old_size = strlen(html);
+	char *escaped_html, *p;
+	char scratch_space[2];
+	
+	for (i = 0; i < old_size; ++i)
+          new_size += strlen(html_replace(html[i], scratch_space));
+
+	p = escaped_html = malloc(new_size + 1);
+	if (escaped_html == NULL)
+		event_err(1, "%s: malloc(%d)", __func__, new_size + 1);
+	for (i = 0; i < old_size; ++i) {
+		const char *replaced = html_replace(html[i], scratch_space);
+		/* this is length checked */
+		strcpy(p, replaced);
+		p += strlen(replaced);
+	}
+
+	*p = '\0';
+
+	return (escaped_html);
+}
+
+static const char *
+evhttp_method(enum evhttp_cmd_type type)
+{
+	const char *method;
+
+	switch (type) {
+	case EVHTTP_REQ_GET:
+		method = "GET";
+		break;
+	case EVHTTP_REQ_POST:
+		method = "POST";
+		break;
+	case EVHTTP_REQ_HEAD:
+		method = "HEAD";
+		break;
+	default:
+		method = NULL;
+		break;
+	}
+
+	return (method);
+}
+
+static void
+evhttp_add_event(struct event *ev, int timeout, int default_timeout)
+{
+	if (timeout != 0) {
+		struct timeval tv;
+		
+		evutil_timerclear(&tv);
+		tv.tv_sec = timeout != -1 ? timeout : default_timeout;
+		event_add(ev, &tv);
+	} else {
+		event_add(ev, NULL);
+	}
+}
+
+void
+evhttp_write_buffer(struct evhttp_connection *evcon,
+    void (*cb)(struct evhttp_connection *, void *), void *arg)
+{
+	event_debug(("%s: preparing to write buffer\n", __func__));
+
+	/* Set call back */
+	evcon->cb = cb;
+	evcon->cb_arg = arg;
+
+	/* check if the event is already pending */
+	if (event_pending(&evcon->ev, EV_WRITE|EV_TIMEOUT, NULL))
+		event_del(&evcon->ev);
+
+	event_set(&evcon->ev, evcon->fd, EV_WRITE, evhttp_write, evcon);
+	EVHTTP_BASE_SET(evcon, &evcon->ev);
+	evhttp_add_event(&evcon->ev, evcon->timeout, HTTP_WRITE_TIMEOUT);
+}
+
+static int
+evhttp_connected(struct evhttp_connection *evcon)
+{
+	switch (evcon->state) {
+	case EVCON_DISCONNECTED:
+	case EVCON_CONNECTING:
+		return (0);
+	case EVCON_IDLE:
+	case EVCON_READING_FIRSTLINE:
+	case EVCON_READING_HEADERS:
+	case EVCON_READING_BODY:
+	case EVCON_READING_TRAILER:
+	case EVCON_WRITING:
+	default:
+		return (1);
+	}
+}
+
+/*
+ * Create the headers needed for an HTTP request
+ */
+static void
+evhttp_make_header_request(struct evhttp_connection *evcon,
+    struct evhttp_request *req)
+{
+	const char *method;
+	
+	evhttp_remove_header(req->output_headers, "Proxy-Connection");
+
+	/* Generate request line */
+	method = evhttp_method(req->type);
+	evbuffer_add_printf(evcon->output_buffer, "%s %s HTTP/%d.%d\r\n",
+	    method, req->uri, req->major, req->minor);
+
+	/* Add the content length on a post request if missing */
+	if (req->type == EVHTTP_REQ_POST &&
+	    evhttp_find_header(req->output_headers, "Content-Length") == NULL){
+		char size[12];
+		evutil_snprintf(size, sizeof(size), "%ld",
+		    (long)EVBUFFER_LENGTH(req->output_buffer));
+		evhttp_add_header(req->output_headers, "Content-Length", size);
+	}
+}
+
+static int
+evhttp_is_connection_close(int flags, struct evkeyvalq* headers)
+{
+	if (flags & EVHTTP_PROXY_REQUEST) {
+		/* proxy connection */
+		const char *connection = evhttp_find_header(headers, "Proxy-Connection");
+		return (connection == NULL || strcasecmp(connection, "keep-alive") != 0);
+	} else {
+		const char *connection = evhttp_find_header(headers, "Connection");
+		return (connection != NULL && strcasecmp(connection, "close") == 0);
+	}
+}
+
+static int
+evhttp_is_connection_keepalive(struct evkeyvalq* headers)
+{
+	const char *connection = evhttp_find_header(headers, "Connection");
+	return (connection != NULL 
+	    && strncasecmp(connection, "keep-alive", 10) == 0);
+}
+
+static void
+evhttp_maybe_add_date_header(struct evkeyvalq *headers)
+{
+	if (evhttp_find_header(headers, "Date") == NULL) {
+		char date[50];
+#ifndef WIN32
+		struct tm cur;
+#endif
+		struct tm *cur_p;
+		time_t t = time(NULL);
+#ifdef WIN32
+		cur_p = gmtime(&t);
+#else
+		gmtime_r(&t, &cur);
+		cur_p = &cur;
+#endif
+		if (strftime(date, sizeof(date),
+			"%a, %d %b %Y %H:%M:%S GMT", cur_p) != 0) {
+			evhttp_add_header(headers, "Date", date);
+		}
+	}
+}
+
+static void
+evhttp_maybe_add_content_length_header(struct evkeyvalq *headers,
+    long content_length)
+{
+	if (evhttp_find_header(headers, "Transfer-Encoding") == NULL &&
+	    evhttp_find_header(headers,	"Content-Length") == NULL) {
+		char len[12];
+		evutil_snprintf(len, sizeof(len), "%ld", content_length);
+		evhttp_add_header(headers, "Content-Length", len);
+	}
+}
+
+/*
+ * Create the headers needed for an HTTP reply
+ */
+
+static void
+evhttp_make_header_response(struct evhttp_connection *evcon,
+    struct evhttp_request *req)
+{
+	int is_keepalive = evhttp_is_connection_keepalive(req->input_headers);
+	evbuffer_add_printf(evcon->output_buffer, "HTTP/%d.%d %d %s\r\n",
+	    req->major, req->minor, req->response_code,
+	    req->response_code_line);
+
+	if (req->major == 1) {
+		if (req->minor == 1)
+			evhttp_maybe_add_date_header(req->output_headers);
+
+		/*
+		 * if the protocol is 1.0; and the connection was keep-alive
+		 * we need to add a keep-alive header, too.
+		 */
+		if (req->minor == 0 && is_keepalive)
+			evhttp_add_header(req->output_headers,
+			    "Connection", "keep-alive");
+
+		if (req->minor == 1 || is_keepalive) {
+			/* 
+			 * we need to add the content length if the
+			 * user did not give it, this is required for
+			 * persistent connections to work.
+			 */
+			evhttp_maybe_add_content_length_header(
+				req->output_headers,
+				(long)EVBUFFER_LENGTH(req->output_buffer));
+		}
+	}
+
+	/* Potentially add headers for unidentified content. */
+	if (EVBUFFER_LENGTH(req->output_buffer)) {
+		if (evhttp_find_header(req->output_headers,
+			"Content-Type") == NULL) {
+			evhttp_add_header(req->output_headers,
+			    "Content-Type", "text/html; charset=ISO-8859-1");
+		}
+	}
+
+	/* if the request asked for a close, we send a close, too */
+	if (evhttp_is_connection_close(req->flags, req->input_headers)) {
+		evhttp_remove_header(req->output_headers, "Connection");
+		if (!(req->flags & EVHTTP_PROXY_REQUEST))
+		    evhttp_add_header(req->output_headers, "Connection", "close");
+		evhttp_remove_header(req->output_headers, "Proxy-Connection");
+	}
+}
+
+void
+evhttp_make_header(struct evhttp_connection *evcon, struct evhttp_request *req)
+{
+	struct evkeyval *header;
+
+	/*
+	 * Depending if this is a HTTP request or response, we might need to
+	 * add some new headers or remove existing headers.
+	 */
+	if (req->kind == EVHTTP_REQUEST) {
+		evhttp_make_header_request(evcon, req);
+	} else {
+		evhttp_make_header_response(evcon, req);
+	}
+
+	TAILQ_FOREACH(header, req->output_headers, next) {
+		evbuffer_add_printf(evcon->output_buffer, "%s: %s\r\n",
+		    header->key, header->value);
+	}
+	evbuffer_add(evcon->output_buffer, "\r\n", 2);
+
+	if (EVBUFFER_LENGTH(req->output_buffer) > 0) {
+		/*
+		 * For a request, we add the POST data, for a reply, this
+		 * is the regular data.
+		 */
+		evbuffer_add_buffer(evcon->output_buffer, req->output_buffer);
+	}
+}
+
+/* Separated host, port and file from URI */
+
+int
+evhttp_hostportfile(char *url, char **phost, u_short *pport, char **pfile)
+{
+	/* XXX not threadsafe. */
+	static char host[1024];
+	static char file[1024];
+	char *p;
+	const char *p2;
+	int len;
+	u_short port;
+
+	len = strlen(HTTP_PREFIX);
+	if (strncasecmp(url, HTTP_PREFIX, len))
+		return (-1);
+
+	url += len;
+
+	/* We might overrun */
+	if (strlcpy(host, url, sizeof (host)) >= sizeof(host))
+		return (-1);
+
+	p = strchr(host, '/');
+	if (p != NULL) {
+		*p = '\0';
+		p2 = p + 1;
+	} else
+		p2 = NULL;
+
+	if (pfile != NULL) {
+		/* Generate request file */
+		if (p2 == NULL)
+			p2 = "";
+		evutil_snprintf(file, sizeof(file), "/%s", p2);
+	}
+
+	p = strchr(host, ':');
+	if (p != NULL) {
+		*p = '\0';
+		port = atoi(p + 1);
+
+		if (port == 0)
+			return (-1);
+	} else
+		port = HTTP_DEFAULTPORT;
+
+	if (phost != NULL)
+		*phost = host;
+	if (pport != NULL)
+		*pport = port;
+	if (pfile != NULL)
+		*pfile = file;
+
+	return (0);
+}
+
+static int
+evhttp_connection_incoming_fail(struct evhttp_request *req,
+    enum evhttp_connection_error error)
+{
+	switch (error) {
+	case EVCON_HTTP_TIMEOUT:
+	case EVCON_HTTP_EOF:
+		/* 
+		 * these are cases in which we probably should just
+		 * close the connection and not send a reply.  this
+		 * case may happen when a browser keeps a persistent
+		 * connection open and we timeout on the read.
+		 */
+		return (-1);
+	case EVCON_HTTP_INVALID_HEADER:
+	default:	/* xxx: probably should just error on default */
+		/* the callback looks at the uri to determine errors */
+		if (req->uri) {
+			free(req->uri);
+			req->uri = NULL;
+		}
+
+		/* 
+		 * the callback needs to send a reply, once the reply has
+		 * been send, the connection should get freed.
+		 */
+		(*req->cb)(req, req->cb_arg);
+	}
+	
+	return (0);
+}
+
+void
+evhttp_connection_fail(struct evhttp_connection *evcon,
+    enum evhttp_connection_error error)
+{
+	struct evhttp_request* req = TAILQ_FIRST(&evcon->requests);
+	void (*cb)(struct evhttp_request *, void *);
+	void *cb_arg;
+	assert(req != NULL);
+	
+	if (evcon->flags & EVHTTP_CON_INCOMING) {
+		/* 
+		 * for incoming requests, there are two different
+		 * failure cases.  it's either a network level error
+		 * or an http layer error. for problems on the network
+		 * layer like timeouts we just drop the connections.
+		 * For HTTP problems, we might have to send back a
+		 * reply before the connection can be freed.
+		 */
+		if (evhttp_connection_incoming_fail(req, error) == -1)
+			evhttp_connection_free(evcon);
+		return;
+	}
+
+	/* save the callback for later; the cb might free our object */
+	cb = req->cb;
+	cb_arg = req->cb_arg;
+
+	TAILQ_REMOVE(&evcon->requests, req, next);
+	evhttp_request_free(req);
+
+	/* xxx: maybe we should fail all requests??? */
+
+	/* reset the connection */
+	evhttp_connection_reset(evcon);
+	
+	/* We are trying the next request that was queued on us */
+	if (TAILQ_FIRST(&evcon->requests) != NULL)
+		evhttp_connection_connect(evcon);
+
+	/* inform the user */
+	if (cb != NULL)
+		(*cb)(NULL, cb_arg);
+}
+
+void
+evhttp_write(int fd, short what, void *arg)
+{
+	struct evhttp_connection *evcon = arg;
+	int n;
+
+	if (what == EV_TIMEOUT) {
+		evhttp_connection_fail(evcon, EVCON_HTTP_TIMEOUT);
+		return;
+	}
+
+	n = evbuffer_write(evcon->output_buffer, fd);
+	if (n == -1) {
+		event_debug(("%s: evbuffer_write", __func__));
+		evhttp_connection_fail(evcon, EVCON_HTTP_EOF);
+		return;
+	}
+
+	if (n == 0) {
+		event_debug(("%s: write nothing", __func__));
+		evhttp_connection_fail(evcon, EVCON_HTTP_EOF);
+		return;
+	}
+
+	if (EVBUFFER_LENGTH(evcon->output_buffer) != 0) {
+		evhttp_add_event(&evcon->ev, 
+		    evcon->timeout, HTTP_WRITE_TIMEOUT);
+		return;
+	}
+
+	/* Activate our call back */
+	if (evcon->cb != NULL)
+		(*evcon->cb)(evcon, evcon->cb_arg);
+}
+
+/**
+ * Advance the connection state.
+ * - If this is an outgoing connection, we've just processed the response;
+ *   idle or close the connection.
+ * - If this is an incoming connection, we've just processed the request;
+ *   respond.
+ */
+static void
+evhttp_connection_done(struct evhttp_connection *evcon)
+{
+	struct evhttp_request *req = TAILQ_FIRST(&evcon->requests);
+	int con_outgoing = evcon->flags & EVHTTP_CON_OUTGOING;
+
+	if (con_outgoing) {
+		/* idle or close the connection */
+	        int need_close;
+		TAILQ_REMOVE(&evcon->requests, req, next);
+		req->evcon = NULL;
+
+		evcon->state = EVCON_IDLE;
+
+		need_close = 
+		    evhttp_is_connection_close(req->flags, req->input_headers)||
+		    evhttp_is_connection_close(req->flags, req->output_headers);
+
+		/* check if we got asked to close the connection */
+		if (need_close)
+			evhttp_connection_reset(evcon);
+
+		if (TAILQ_FIRST(&evcon->requests) != NULL) {
+			/*
+			 * We have more requests; reset the connection
+			 * and deal with the next request.
+			 */
+			if (!evhttp_connected(evcon))
+				evhttp_connection_connect(evcon);
+			else
+				evhttp_request_dispatch(evcon);
+		} else if (!need_close) {
+			/*
+			 * The connection is going to be persistent, but we
+			 * need to detect if the other side closes it.
+			 */
+			evhttp_connection_start_detectclose(evcon);
+		}
+	} else {
+		/*
+		 * incoming connection - we need to leave the request on the
+		 * connection so that we can reply to it.
+		 */
+		evcon->state = EVCON_WRITING;
+	}
+
+	/* notify the user of the request */
+	(*req->cb)(req, req->cb_arg);
+
+	/* if this was an outgoing request, we own and it's done. so free it */
+	if (con_outgoing) {
+		evhttp_request_free(req);
+	}
+}
+
+/*
+ * Handles reading from a chunked request.
+ *   return ALL_DATA_READ:
+ *     all data has been read
+ *   return MORE_DATA_EXPECTED:
+ *     more data is expected
+ *   return DATA_CORRUPTED:
+ *     data is corrupted
+ *   return REQUEST_CANCLED:
+ *     request was canceled by the user calling evhttp_cancel_request
+ */
+
+static enum message_read_status
+evhttp_handle_chunked_read(struct evhttp_request *req, struct evbuffer *buf)
+{
+	int len;
+
+	while ((len = EVBUFFER_LENGTH(buf)) > 0) {
+		if (req->ntoread < 0) {
+			/* Read chunk size */
+			ev_int64_t ntoread;
+			char *p = evbuffer_readline(buf);
+			char *endp;
+			int error;
+			if (p == NULL)
+				break;
+			/* the last chunk is on a new line? */
+			if (strlen(p) == 0) {
+				free(p);
+				continue;
+			}
+			ntoread = evutil_strtoll(p, &endp, 16);
+			error = (*p == '\0' ||
+			    (*endp != '\0' && *endp != ' ') ||
+			    ntoread < 0);
+			free(p);
+			if (error) {
+				/* could not get chunk size */
+				return (DATA_CORRUPTED);
+			}
+			req->ntoread = ntoread;
+			if (req->ntoread == 0) {
+				/* Last chunk */
+				return (ALL_DATA_READ);
+			}
+			continue;
+		}
+
+		/* don't have enough to complete a chunk; wait for more */
+		if (len < req->ntoread)
+			return (MORE_DATA_EXPECTED);
+
+		/* Completed chunk */
+		evbuffer_add(req->input_buffer,
+		    EVBUFFER_DATA(buf), (size_t)req->ntoread);
+		evbuffer_drain(buf, (size_t)req->ntoread);
+		req->ntoread = -1;
+		if (req->chunk_cb != NULL) {
+			(*req->chunk_cb)(req, req->cb_arg);
+			evbuffer_drain(req->input_buffer,
+			    EVBUFFER_LENGTH(req->input_buffer));
+		}
+	}
+
+	return (MORE_DATA_EXPECTED);
+}
+
+static void
+evhttp_read_trailer(struct evhttp_connection *evcon, struct evhttp_request *req)
+{
+	struct evbuffer *buf = evcon->input_buffer;
+
+	switch (evhttp_parse_headers(req, buf)) {
+	case DATA_CORRUPTED:
+		evhttp_connection_fail(evcon, EVCON_HTTP_INVALID_HEADER);
+		break;
+	case ALL_DATA_READ:
+		event_del(&evcon->ev);
+		evhttp_connection_done(evcon);
+		break;
+	case MORE_DATA_EXPECTED:
+	default:
+		evhttp_add_event(&evcon->ev, evcon->timeout,
+		    HTTP_READ_TIMEOUT);
+		break;
+	}
+}
+
+static void
+evhttp_read_body(struct evhttp_connection *evcon, struct evhttp_request *req)
+{
+	struct evbuffer *buf = evcon->input_buffer;
+	
+	if (req->chunked) {
+		switch (evhttp_handle_chunked_read(req, buf)) {
+		case ALL_DATA_READ:
+			/* finished last chunk */
+			evcon->state = EVCON_READING_TRAILER;
+			evhttp_read_trailer(evcon, req);
+			return;
+		case DATA_CORRUPTED:
+			/* corrupted data */
+			evhttp_connection_fail(evcon,
+			    EVCON_HTTP_INVALID_HEADER);
+			return;
+		case REQUEST_CANCELED:
+			/* request canceled */
+			evhttp_request_free(req);
+			return;
+		case MORE_DATA_EXPECTED:
+		default:
+			break;
+		}
+	} else if (req->ntoread < 0) {
+		/* Read until connection close. */
+		evbuffer_add_buffer(req->input_buffer, buf);
+	} else if (EVBUFFER_LENGTH(buf) >= req->ntoread) {
+		/* Completed content length */
+		evbuffer_add(req->input_buffer, EVBUFFER_DATA(buf),
+		    (size_t)req->ntoread);
+		evbuffer_drain(buf, (size_t)req->ntoread);
+		req->ntoread = 0;
+		evhttp_connection_done(evcon);
+		return;
+	}
+	/* Read more! */
+	event_set(&evcon->ev, evcon->fd, EV_READ, evhttp_read, evcon);
+	EVHTTP_BASE_SET(evcon, &evcon->ev);
+	evhttp_add_event(&evcon->ev, evcon->timeout, HTTP_READ_TIMEOUT);
+}
+
+/*
+ * Reads data into a buffer structure until no more data
+ * can be read on the file descriptor or we have read all
+ * the data that we wanted to read.
+ * Execute callback when done.
+ */
+
+void
+evhttp_read(int fd, short what, void *arg)
+{
+	struct evhttp_connection *evcon = arg;
+	struct evhttp_request *req = TAILQ_FIRST(&evcon->requests);
+	struct evbuffer *buf = evcon->input_buffer;
+	int n, len;
+
+	if (what == EV_TIMEOUT) {
+		evhttp_connection_fail(evcon, EVCON_HTTP_TIMEOUT);
+		return;
+	}
+	n = evbuffer_read(buf, fd, -1);
+	len = EVBUFFER_LENGTH(buf);
+	event_debug(("%s: got %d on %d\n", __func__, n, fd));
+	
+	if (n == -1) {
+		if (errno != EINTR && errno != EAGAIN) {
+			event_debug(("%s: evbuffer_read", __func__));
+			evhttp_connection_fail(evcon, EVCON_HTTP_EOF);
+		} else {
+			evhttp_add_event(&evcon->ev, evcon->timeout,
+			    HTTP_READ_TIMEOUT);	       
+		}
+		return;
+	} else if (n == 0) {
+		/* Connection closed */
+		evhttp_connection_done(evcon);
+		return;
+	}
+
+	switch (evcon->state) {
+	case EVCON_READING_FIRSTLINE:
+		evhttp_read_firstline(evcon, req);
+		break;
+	case EVCON_READING_HEADERS:
+		evhttp_read_header(evcon, req);
+		break;
+	case EVCON_READING_BODY:
+		evhttp_read_body(evcon, req);
+		break;
+	case EVCON_READING_TRAILER:
+		evhttp_read_trailer(evcon, req);
+		break;
+	case EVCON_DISCONNECTED:
+	case EVCON_CONNECTING:
+	case EVCON_IDLE:
+	case EVCON_WRITING:
+	default:
+		event_errx(1, "%s: illegal connection state %d",
+			   __func__, evcon->state);
+	}
+}
+
+static void
+evhttp_write_connectioncb(struct evhttp_connection *evcon, void *arg)
+{
+	/* This is after writing the request to the server */
+	struct evhttp_request *req = TAILQ_FIRST(&evcon->requests);
+	assert(req != NULL);
+
+	assert(evcon->state == EVCON_WRITING);
+
+	/* We are done writing our header and are now expecting the response */
+	req->kind = EVHTTP_RESPONSE;
+
+	evhttp_start_read(evcon);
+}
+
+/*
+ * Clean up a connection object
+ */
+
+void
+evhttp_connection_free(struct evhttp_connection *evcon)
+{
+	struct evhttp_request *req;
+
+	/* notify interested parties that this connection is going down */
+	if (evcon->fd != -1) {
+		if (evhttp_connected(evcon) && evcon->closecb != NULL)
+			(*evcon->closecb)(evcon, evcon->closecb_arg);
+	}
+
+	/* remove all requests that might be queued on this connection */
+	while ((req = TAILQ_FIRST(&evcon->requests)) != NULL) {
+		TAILQ_REMOVE(&evcon->requests, req, next);
+		evhttp_request_free(req);
+	}
+
+	if (evcon->http_server != NULL) {
+		struct evhttp *http = evcon->http_server;
+		TAILQ_REMOVE(&http->connections, evcon, next);
+	}
+
+	if (event_initialized(&evcon->close_ev))
+		event_del(&evcon->close_ev);
+
+	if (event_initialized(&evcon->ev))
+		event_del(&evcon->ev);
+	
+	if (evcon->fd != -1)
+		EVUTIL_CLOSESOCKET(evcon->fd);
+
+	if (evcon->bind_address != NULL)
+		free(evcon->bind_address);
+
+	if (evcon->address != NULL)
+		free(evcon->address);
+
+	if (evcon->input_buffer != NULL)
+		evbuffer_free(evcon->input_buffer);
+
+	if (evcon->output_buffer != NULL)
+		evbuffer_free(evcon->output_buffer);
+
+	free(evcon);
+}
+
+void
+evhttp_connection_set_local_address(struct evhttp_connection *evcon,
+    const char *address)
+{
+	assert(evcon->state == EVCON_DISCONNECTED);
+	if (evcon->bind_address)
+		free(evcon->bind_address);
+	if ((evcon->bind_address = strdup(address)) == NULL)
+		event_err(1, "%s: strdup", __func__);
+}
+
+void
+evhttp_connection_set_local_port(struct evhttp_connection *evcon,
+    unsigned short port)
+{
+	assert(evcon->state == EVCON_DISCONNECTED);
+	evcon->bind_port = port;
+}
+
+static void
+evhttp_request_dispatch(struct evhttp_connection* evcon)
+{
+	struct evhttp_request *req = TAILQ_FIRST(&evcon->requests);
+	
+	/* this should not usually happy but it's possible */
+	if (req == NULL)
+		return;
+
+	/* delete possible close detection events */
+	evhttp_connection_stop_detectclose(evcon);
+	
+	/* we assume that the connection is connected already */
+	assert(evcon->state == EVCON_IDLE);
+
+	evcon->state = EVCON_WRITING;
+
+	/* Create the header from the store arguments */
+	evhttp_make_header(evcon, req);
+
+	evhttp_write_buffer(evcon, evhttp_write_connectioncb, NULL);
+}
+
+/* Reset our connection state */
+void
+evhttp_connection_reset(struct evhttp_connection *evcon)
+{
+	if (event_initialized(&evcon->ev))
+		event_del(&evcon->ev);
+
+	if (evcon->fd != -1) {
+		/* inform interested parties about connection close */
+		if (evhttp_connected(evcon) && evcon->closecb != NULL)
+			(*evcon->closecb)(evcon, evcon->closecb_arg);
+
+		EVUTIL_CLOSESOCKET(evcon->fd);
+		evcon->fd = -1;
+	}
+	evcon->state = EVCON_DISCONNECTED;
+
+	evbuffer_drain(evcon->input_buffer,
+	    EVBUFFER_LENGTH(evcon->input_buffer));
+	evbuffer_drain(evcon->output_buffer,
+	    EVBUFFER_LENGTH(evcon->output_buffer));
+}
+
+static void
+evhttp_detect_close_cb(int fd, short what, void *arg)
+{
+	struct evhttp_connection *evcon = arg;
+	evhttp_connection_reset(evcon);
+}
+
+static void
+evhttp_connection_start_detectclose(struct evhttp_connection *evcon)
+{
+	evcon->flags |= EVHTTP_CON_CLOSEDETECT;
+
+	if (event_initialized(&evcon->close_ev))
+		event_del(&evcon->close_ev);
+	event_set(&evcon->close_ev, evcon->fd, EV_READ,
+	    evhttp_detect_close_cb, evcon);
+	EVHTTP_BASE_SET(evcon, &evcon->close_ev);
+	event_add(&evcon->close_ev, NULL);
+}
+
+static void
+evhttp_connection_stop_detectclose(struct evhttp_connection *evcon)
+{
+	evcon->flags &= ~EVHTTP_CON_CLOSEDETECT;
+	event_del(&evcon->close_ev);
+}
+
+static void
+evhttp_connection_retry(int fd, short what, void *arg)
+{
+	struct evhttp_connection *evcon = arg;
+
+	evcon->state = EVCON_DISCONNECTED;
+	evhttp_connection_connect(evcon);
+}
+
+/*
+ * Call back for asynchronous connection attempt.
+ */
+
+static void
+evhttp_connectioncb(int fd, short what, void *arg)
+{
+	struct evhttp_connection *evcon = arg;
+	int error;
+	socklen_t errsz = sizeof(error);
+		
+	if (what == EV_TIMEOUT) {
+		event_debug(("%s: connection timeout for \"%s:%d\" on %d",
+			__func__, evcon->address, evcon->port, evcon->fd));
+		goto cleanup;
+	}
+
+	/* Check if the connection completed */
+	if (getsockopt(evcon->fd, SOL_SOCKET, SO_ERROR, (void*)&error,
+		       &errsz) == -1) {
+		event_debug(("%s: getsockopt for \"%s:%d\" on %d",
+			__func__, evcon->address, evcon->port, evcon->fd));
+		goto cleanup;
+	}
+
+	if (error) {
+		event_debug(("%s: connect failed for \"%s:%d\" on %d: %s",
+		    __func__, evcon->address, evcon->port, evcon->fd,
+			strerror(error)));
+		goto cleanup;
+	}
+
+	/* We are connected to the server now */
+	event_debug(("%s: connected to \"%s:%d\" on %d\n",
+			__func__, evcon->address, evcon->port, evcon->fd));
+
+	/* Reset the retry count as we were successful in connecting */
+	evcon->retry_cnt = 0;
+	evcon->state = EVCON_IDLE;
+
+	/* try to start requests that have queued up on this connection */
+	evhttp_request_dispatch(evcon);
+	return;
+
+ cleanup:
+	if (evcon->retry_max < 0 || evcon->retry_cnt < evcon->retry_max) {
+		evtimer_set(&evcon->ev, evhttp_connection_retry, evcon);
+		EVHTTP_BASE_SET(evcon, &evcon->ev);
+		evhttp_add_event(&evcon->ev, MIN(3600, 2 << evcon->retry_cnt),
+		    HTTP_CONNECT_TIMEOUT);
+		evcon->retry_cnt++;
+		return;
+	}
+	evhttp_connection_reset(evcon);
+
+	/* for now, we just signal all requests by executing their callbacks */
+	while (TAILQ_FIRST(&evcon->requests) != NULL) {
+		struct evhttp_request *request = TAILQ_FIRST(&evcon->requests);
+		TAILQ_REMOVE(&evcon->requests, request, next);
+		request->evcon = NULL;
+
+		/* we might want to set an error here */
+		request->cb(request, request->cb_arg);
+		evhttp_request_free(request);
+	}
+}
+
+/*
+ * Check if we got a valid response code.
+ */
+
+static int
+evhttp_valid_response_code(int code)
+{
+	if (code == 0)
+		return (0);
+
+	return (1);
+}
+
+/* Parses the status line of a web server */
+
+static int
+evhttp_parse_response_line(struct evhttp_request *req, char *line)
+{
+	char *protocol;
+	char *number;
+	char *readable;
+
+	protocol = strsep(&line, " ");
+	if (line == NULL)
+		return (-1);
+	number = strsep(&line, " ");
+	if (line == NULL)
+		return (-1);
+	readable = line;
+
+	if (strcmp(protocol, "HTTP/1.0") == 0) {
+		req->major = 1;
+		req->minor = 0;
+	} else if (strcmp(protocol, "HTTP/1.1") == 0) {
+		req->major = 1;
+		req->minor = 1;
+	} else {
+		event_debug(("%s: bad protocol \"%s\"",
+			__func__, protocol));
+		return (-1);
+	}
+
+	req->response_code = atoi(number);
+	if (!evhttp_valid_response_code(req->response_code)) {
+		event_debug(("%s: bad response code \"%s\"",
+			__func__, number));
+		return (-1);
+	}
+
+	if ((req->response_code_line = strdup(readable)) == NULL)
+		event_err(1, "%s: strdup", __func__);
+
+	return (0);
+}
+
+/* Parse the first line of a HTTP request */
+
+static int
+evhttp_parse_request_line(struct evhttp_request *req, char *line)
+{
+	char *method;
+	char *uri;
+	char *version;
+
+	/* Parse the request line */
+	method = strsep(&line, " ");
+	if (line == NULL)
+		return (-1);
+	uri = strsep(&line, " ");
+	if (line == NULL)
+		return (-1);
+	version = strsep(&line, " ");
+	if (line != NULL)
+		return (-1);
+
+	/* First line */
+	if (strcmp(method, "GET") == 0) {
+		req->type = EVHTTP_REQ_GET;
+	} else if (strcmp(method, "POST") == 0) {
+		req->type = EVHTTP_REQ_POST;
+	} else if (strcmp(method, "HEAD") == 0) {
+		req->type = EVHTTP_REQ_HEAD;
+	} else {
+		event_debug(("%s: bad method %s on request %p from %s",
+			__func__, method, req, req->remote_host));
+		return (-1);
+	}
+
+	if (strcmp(version, "HTTP/1.0") == 0) {
+		req->major = 1;
+		req->minor = 0;
+	} else if (strcmp(version, "HTTP/1.1") == 0) {
+		req->major = 1;
+		req->minor = 1;
+	} else {
+		event_debug(("%s: bad version %s on request %p from %s",
+			__func__, version, req, req->remote_host));
+		return (-1);
+	}
+
+	if ((req->uri = strdup(uri)) == NULL) {
+		event_debug(("%s: evhttp_decode_uri", __func__));
+		return (-1);
+	}
+
+	/* determine if it's a proxy request */
+	if (strlen(req->uri) > 0 && req->uri[0] != '/')
+		req->flags |= EVHTTP_PROXY_REQUEST;
+
+	return (0);
+}
+
+const char *
+evhttp_find_header(const struct evkeyvalq *headers, const char *key)
+{
+	struct evkeyval *header;
+
+	TAILQ_FOREACH(header, headers, next) {
+		if (strcasecmp(header->key, key) == 0)
+			return (header->value);
+	}
+
+	return (NULL);
+}
+
+void
+evhttp_clear_headers(struct evkeyvalq *headers)
+{
+	struct evkeyval *header;
+
+	for (header = TAILQ_FIRST(headers);
+	    header != NULL;
+	    header = TAILQ_FIRST(headers)) {
+		TAILQ_REMOVE(headers, header, next);
+		free(header->key);
+		free(header->value);
+		free(header);
+	}
+}
+
+/*
+ * Returns 0,  if the header was successfully removed.
+ * Returns -1, if the header could not be found.
+ */
+
+int
+evhttp_remove_header(struct evkeyvalq *headers, const char *key)
+{
+	struct evkeyval *header;
+
+	TAILQ_FOREACH(header, headers, next) {
+		if (strcasecmp(header->key, key) == 0)
+			break;
+	}
+
+	if (header == NULL)
+		return (-1);
+
+	/* Free and remove the header that we found */
+	TAILQ_REMOVE(headers, header, next);
+	free(header->key);
+	free(header->value);
+	free(header);
+
+	return (0);
+}
+
+static int
+evhttp_header_is_valid_value(const char *value)
+{
+	const char *p = value;
+
+	while ((p = strpbrk(p, "\r\n")) != NULL) {
+		/* we really expect only one new line */
+		p += strspn(p, "\r\n");
+		/* we expect a space or tab for continuation */
+		if (*p != ' ' && *p != '\t')
+			return (0);
+	}
+	return (1);
+}
+
+int
+evhttp_add_header(struct evkeyvalq *headers,
+    const char *key, const char *value)
+{
+	event_debug(("%s: key: %s val: %s\n", __func__, key, value));
+
+	if (strchr(key, '\r') != NULL || strchr(key, '\n') != NULL) {
+		/* drop illegal headers */
+		event_debug(("%s: dropping illegal header key\n", __func__));
+		return (-1);
+	}
+	
+	if (!evhttp_header_is_valid_value(value)) {
+		event_debug(("%s: dropping illegal header value\n", __func__));
+		return (-1);
+	}
+
+	return (evhttp_add_header_internal(headers, key, value));
+}
+
+static int
+evhttp_add_header_internal(struct evkeyvalq *headers,
+    const char *key, const char *value)
+{
+	struct evkeyval *header = calloc(1, sizeof(struct evkeyval));
+	if (header == NULL) {
+		event_warn("%s: calloc", __func__);
+		return (-1);
+	}
+	if ((header->key = strdup(key)) == NULL) {
+		free(header);
+		event_warn("%s: strdup", __func__);
+		return (-1);
+	}
+	if ((header->value = strdup(value)) == NULL) {
+		free(header->key);
+		free(header);
+		event_warn("%s: strdup", __func__);
+		return (-1);
+	}
+
+	TAILQ_INSERT_TAIL(headers, header, next);
+
+	return (0);
+}
+
+/*
+ * Parses header lines from a request or a response into the specified
+ * request object given an event buffer.
+ *
+ * Returns
+ *   DATA_CORRUPTED      on error
+ *   MORE_DATA_EXPECTED  when we need to read more headers
+ *   ALL_DATA_READ       when all headers have been read.
+ */
+
+enum message_read_status
+evhttp_parse_firstline(struct evhttp_request *req, struct evbuffer *buffer)
+{
+	char *line;
+	enum message_read_status status = ALL_DATA_READ;
+
+	line = evbuffer_readline(buffer);
+	if (line == NULL)
+		return (MORE_DATA_EXPECTED);
+
+	switch (req->kind) {
+	case EVHTTP_REQUEST:
+		if (evhttp_parse_request_line(req, line) == -1)
+			status = DATA_CORRUPTED;
+		break;
+	case EVHTTP_RESPONSE:
+		if (evhttp_parse_response_line(req, line) == -1)
+			status = DATA_CORRUPTED;
+		break;
+	default:
+		status = DATA_CORRUPTED;
+	}
+
+	free(line);
+	return (status);
+}
+
+static int
+evhttp_append_to_last_header(struct evkeyvalq *headers, const char *line)
+{
+	struct evkeyval *header = TAILQ_LAST(headers, evkeyvalq);
+	char *newval;
+	size_t old_len, line_len;
+
+	if (header == NULL)
+		return (-1);
+
+	old_len = strlen(header->value);
+	line_len = strlen(line);
+
+	newval = realloc(header->value, old_len + line_len + 1);
+	if (newval == NULL)
+		return (-1);
+
+	memcpy(newval + old_len, line, line_len + 1);
+	header->value = newval;
+
+	return (0);
+}
+
+enum message_read_status
+evhttp_parse_headers(struct evhttp_request *req, struct evbuffer* buffer)
+{
+	char *line;
+	enum message_read_status status = MORE_DATA_EXPECTED;
+
+	struct evkeyvalq* headers = req->input_headers;
+	while ((line = evbuffer_readline(buffer))
+	       != NULL) {
+		char *skey, *svalue;
+
+		if (*line == '\0') { /* Last header - Done */
+			status = ALL_DATA_READ;
+			free(line);
+			break;
+		}
+
+		/* Check if this is a continuation line */
+		if (*line == ' ' || *line == '\t') {
+			if (evhttp_append_to_last_header(headers, line) == -1)
+				goto error;
+			free(line);
+			continue;
+		}
+
+		/* Processing of header lines */
+		svalue = line;
+		skey = strsep(&svalue, ":");
+		if (svalue == NULL)
+			goto error;
+
+		svalue += strspn(svalue, " ");
+
+		if (evhttp_add_header(headers, skey, svalue) == -1)
+			goto error;
+
+		free(line);
+	}
+
+	return (status);
+
+ error:
+	free(line);
+	return (DATA_CORRUPTED);
+}
+
+static int
+evhttp_get_body_length(struct evhttp_request *req)
+{
+	struct evkeyvalq *headers = req->input_headers;
+	const char *content_length;
+	const char *connection;
+
+	content_length = evhttp_find_header(headers, "Content-Length");
+	connection = evhttp_find_header(headers, "Connection");
+		
+	if (content_length == NULL && connection == NULL)
+		req->ntoread = -1;
+	else if (content_length == NULL &&
+	    strcasecmp(connection, "Close") != 0) {
+		/* Bad combination, we don't know when it will end */
+		event_warnx("%s: we got no content length, but the "
+		    "server wants to keep the connection open: %s.",
+		    __func__, connection);
+		return (-1);
+	} else if (content_length == NULL) {
+		req->ntoread = -1;
+	} else {
+		char *endp;
+		ev_int64_t ntoread = evutil_strtoll(content_length, &endp, 10);
+		if (*content_length == '\0' || *endp != '\0' || ntoread < 0) {
+			event_debug(("%s: illegal content length: %s",
+				__func__, content_length));
+			return (-1);
+		}
+		req->ntoread = ntoread;
+	}
+		
+	event_debug(("%s: bytes to read: %lld (in buffer %ld)\n",
+		__func__, req->ntoread,
+		EVBUFFER_LENGTH(req->evcon->input_buffer)));
+
+	return (0);
+}
+
+static void
+evhttp_get_body(struct evhttp_connection *evcon, struct evhttp_request *req)
+{
+	const char *xfer_enc;
+	
+	/* If this is a request without a body, then we are done */
+	if (req->kind == EVHTTP_REQUEST && req->type != EVHTTP_REQ_POST) {
+		evhttp_connection_done(evcon);
+		return;
+	}
+	evcon->state = EVCON_READING_BODY;
+	xfer_enc = evhttp_find_header(req->input_headers, "Transfer-Encoding");
+	if (xfer_enc != NULL && strcasecmp(xfer_enc, "chunked") == 0) {
+		req->chunked = 1;
+		req->ntoread = -1;
+	} else {
+		if (evhttp_get_body_length(req) == -1) {
+			evhttp_connection_fail(evcon,
+			    EVCON_HTTP_INVALID_HEADER);
+			return;
+		}
+	}
+	evhttp_read_body(evcon, req);
+}
+
+static void
+evhttp_read_firstline(struct evhttp_connection *evcon,
+		      struct evhttp_request *req)
+{
+	enum message_read_status res;
+
+	res = evhttp_parse_firstline(req, evcon->input_buffer);
+	if (res == DATA_CORRUPTED) {
+		/* Error while reading, terminate */
+		event_debug(("%s: bad header lines on %d\n",
+			__func__, evcon->fd));
+		evhttp_connection_fail(evcon, EVCON_HTTP_INVALID_HEADER);
+		return;
+	} else if (res == MORE_DATA_EXPECTED) {
+		/* Need more header lines */
+		evhttp_add_event(&evcon->ev, 
+                    evcon->timeout, HTTP_READ_TIMEOUT);
+		return;
+	}
+
+	evcon->state = EVCON_READING_HEADERS;
+	evhttp_read_header(evcon, req);
+}
+
+static void
+evhttp_read_header(struct evhttp_connection *evcon, struct evhttp_request *req)
+{
+	enum message_read_status res;
+	int fd = evcon->fd;
+
+	res = evhttp_parse_headers(req, evcon->input_buffer);
+	if (res == DATA_CORRUPTED) {
+		/* Error while reading, terminate */
+		event_debug(("%s: bad header lines on %d\n", __func__, fd));
+		evhttp_connection_fail(evcon, EVCON_HTTP_INVALID_HEADER);
+		return;
+	} else if (res == MORE_DATA_EXPECTED) {
+		/* Need more header lines */
+		evhttp_add_event(&evcon->ev, 
+		    evcon->timeout, HTTP_READ_TIMEOUT);
+		return;
+	}
+
+	/* Done reading headers, do the real work */
+	switch (req->kind) {
+	case EVHTTP_REQUEST:
+		event_debug(("%s: checking for post data on %d\n",
+				__func__, fd));
+		evhttp_get_body(evcon, req);
+		break;
+
+	case EVHTTP_RESPONSE:
+		if (req->response_code == HTTP_NOCONTENT ||
+		    req->response_code == HTTP_NOTMODIFIED ||
+		    (req->response_code >= 100 && req->response_code < 200)) {
+			event_debug(("%s: skipping body for code %d\n",
+					__func__, req->response_code));
+			evhttp_connection_done(evcon);
+		} else {
+			event_debug(("%s: start of read body for %s on %d\n",
+				__func__, req->remote_host, fd));
+			evhttp_get_body(evcon, req);
+		}
+		break;
+
+	default:
+		event_warnx("%s: bad header on %d", __func__, fd);
+		evhttp_connection_fail(evcon, EVCON_HTTP_INVALID_HEADER);
+		break;
+	}
+}
+
+/*
+ * Creates a TCP connection to the specified port and executes a callback
+ * when finished.  Failure or sucess is indicate by the passed connection
+ * object.
+ *
+ * Although this interface accepts a hostname, it is intended to take
+ * only numeric hostnames so that non-blocking DNS resolution can
+ * happen elsewhere.
+ */
+
+struct evhttp_connection *
+evhttp_connection_new(const char *address, unsigned short port)
+{
+	struct evhttp_connection *evcon = NULL;
+	
+	event_debug(("Attempting connection to %s:%d\n", address, port));
+
+	if ((evcon = calloc(1, sizeof(struct evhttp_connection))) == NULL) {
+		event_warn("%s: calloc failed", __func__);
+		goto error;
+	}
+
+	evcon->fd = -1;
+	evcon->port = port;
+
+	evcon->timeout = -1;
+	evcon->retry_cnt = evcon->retry_max = 0;
+
+	if ((evcon->address = strdup(address)) == NULL) {
+		event_warn("%s: strdup failed", __func__);
+		goto error;
+	}
+
+	if ((evcon->input_buffer = evbuffer_new()) == NULL) {
+		event_warn("%s: evbuffer_new failed", __func__);
+		goto error;
+	}
+
+	if ((evcon->output_buffer = evbuffer_new()) == NULL) {
+		event_warn("%s: evbuffer_new failed", __func__);
+		goto error;
+	}
+	
+	evcon->state = EVCON_DISCONNECTED;
+	TAILQ_INIT(&evcon->requests);
+
+	return (evcon);
+	
+ error:
+	if (evcon != NULL)
+		evhttp_connection_free(evcon);
+	return (NULL);
+}
+
+void evhttp_connection_set_base(struct evhttp_connection *evcon,
+    struct event_base *base)
+{
+	assert(evcon->base == NULL);
+	assert(evcon->state == EVCON_DISCONNECTED);
+	evcon->base = base;
+}
+
+void
+evhttp_connection_set_timeout(struct evhttp_connection *evcon,
+    int timeout_in_secs)
+{
+	evcon->timeout = timeout_in_secs;
+}
+
+void
+evhttp_connection_set_retries(struct evhttp_connection *evcon,
+    int retry_max)
+{
+	evcon->retry_max = retry_max;
+}
+
+void
+evhttp_connection_set_closecb(struct evhttp_connection *evcon,
+    void (*cb)(struct evhttp_connection *, void *), void *cbarg)
+{
+	evcon->closecb = cb;
+	evcon->closecb_arg = cbarg;
+}
+
+void
+evhttp_connection_get_peer(struct evhttp_connection *evcon,
+    char **address, u_short *port)
+{
+	*address = evcon->address;
+	*port = evcon->port;
+}
+
+int
+evhttp_connection_connect(struct evhttp_connection *evcon)
+{
+	if (evcon->state == EVCON_CONNECTING)
+		return (0);
+	
+	evhttp_connection_reset(evcon);
+
+	assert(!(evcon->flags & EVHTTP_CON_INCOMING));
+	evcon->flags |= EVHTTP_CON_OUTGOING;
+	
+	evcon->fd = bind_socket(
+		evcon->bind_address, evcon->bind_port, 0 /*reuse*/);
+	if (evcon->fd == -1) {
+		event_debug(("%s: failed to bind to \"%s\"",
+			__func__, evcon->bind_address));
+		return (-1);
+	}
+
+	if (socket_connect(evcon->fd, evcon->address, evcon->port) == -1) {
+		EVUTIL_CLOSESOCKET(evcon->fd); evcon->fd = -1;
+		return (-1);
+	}
+
+	/* Set up a callback for successful connection setup */
+	event_set(&evcon->ev, evcon->fd, EV_WRITE, evhttp_connectioncb, evcon);
+	EVHTTP_BASE_SET(evcon, &evcon->ev);
+	evhttp_add_event(&evcon->ev, evcon->timeout, HTTP_CONNECT_TIMEOUT);
+
+	evcon->state = EVCON_CONNECTING;
+	
+	return (0);
+}
+
+/*
+ * Starts an HTTP request on the provided evhttp_connection object.
+ * If the connection object is not connected to the web server already,
+ * this will start the connection.
+ */
+
+int
+evhttp_make_request(struct evhttp_connection *evcon,
+    struct evhttp_request *req,
+    enum evhttp_cmd_type type, const char *uri)
+{
+	/* We are making a request */
+	req->kind = EVHTTP_REQUEST;
+	req->type = type;
+	if (req->uri != NULL)
+		free(req->uri);
+	if ((req->uri = strdup(uri)) == NULL)
+		event_err(1, "%s: strdup", __func__);
+
+	/* Set the protocol version if it is not supplied */
+	if (!req->major && !req->minor) {
+		req->major = 1;
+		req->minor = 1;
+	}
+	
+	assert(req->evcon == NULL);
+	req->evcon = evcon;
+	assert(!(req->flags & EVHTTP_REQ_OWN_CONNECTION));
+	
+	TAILQ_INSERT_TAIL(&evcon->requests, req, next);
+
+	/* If the connection object is not connected; make it so */
+	if (!evhttp_connected(evcon))
+		return (evhttp_connection_connect(evcon));
+
+	/*
+	 * If it's connected already and we are the first in the queue,
+	 * then we can dispatch this request immediately.  Otherwise, it
+	 * will be dispatched once the pending requests are completed.
+	 */
+	if (TAILQ_FIRST(&evcon->requests) == req)
+		evhttp_request_dispatch(evcon);
+
+	return (0);
+}
+
+/*
+ * Reads data from file descriptor into request structure
+ * Request structure needs to be set up correctly.
+ */
+
+void
+evhttp_start_read(struct evhttp_connection *evcon)
+{
+	/* Set up an event to read the headers */
+	if (event_initialized(&evcon->ev))
+		event_del(&evcon->ev);
+	event_set(&evcon->ev, evcon->fd, EV_READ, evhttp_read, evcon);
+	EVHTTP_BASE_SET(evcon, &evcon->ev);
+	
+	evhttp_add_event(&evcon->ev, evcon->timeout, HTTP_READ_TIMEOUT);
+	evcon->state = EVCON_READING_FIRSTLINE;
+}
+
+static void
+evhttp_send_done(struct evhttp_connection *evcon, void *arg)
+{
+	int need_close;
+	struct evhttp_request *req = TAILQ_FIRST(&evcon->requests);
+	TAILQ_REMOVE(&evcon->requests, req, next);
+
+	/* delete possible close detection events */
+	evhttp_connection_stop_detectclose(evcon);
+	
+	need_close =
+	    (req->minor == 0 &&
+		!evhttp_is_connection_keepalive(req->input_headers))||
+	    evhttp_is_connection_close(req->flags, req->input_headers) ||
+	    evhttp_is_connection_close(req->flags, req->output_headers);
+
+	assert(req->flags & EVHTTP_REQ_OWN_CONNECTION);
+	evhttp_request_free(req);
+
+	if (need_close) {
+		evhttp_connection_free(evcon);
+		return;
+	} 
+
+	/* we have a persistent connection; try to accept another request. */
+	if (evhttp_associate_new_request_with_connection(evcon) == -1)
+		evhttp_connection_free(evcon);
+}
+
+/*
+ * Returns an error page.
+ */
+
+void
+evhttp_send_error(struct evhttp_request *req, int error, const char *reason)
+{
+#define ERR_FORMAT "<HTML><HEAD>\n" \
+	    "<TITLE>%d %s</TITLE>\n" \
+	    "</HEAD><BODY>\n" \
+	    "<H1>Method Not Implemented</H1>\n" \
+	    "Invalid method in request<P>\n" \
+	    "</BODY></HTML>\n"
+
+	struct evbuffer *buf = evbuffer_new();
+
+	/* close the connection on error */
+	evhttp_add_header(req->output_headers, "Connection", "close");
+
+	evhttp_response_code(req, error, reason);
+
+	evbuffer_add_printf(buf, ERR_FORMAT, error, reason);
+
+	evhttp_send_page(req, buf);
+
+	evbuffer_free(buf);
+#undef ERR_FORMAT
+}
+
+/* Requires that headers and response code are already set up */
+
+static inline void
+evhttp_send(struct evhttp_request *req, struct evbuffer *databuf)
+{
+	struct evhttp_connection *evcon = req->evcon;
+
+	assert(TAILQ_FIRST(&evcon->requests) == req);
+
+	/* xxx: not sure if we really should expose the data buffer this way */
+	if (databuf != NULL)
+		evbuffer_add_buffer(req->output_buffer, databuf);
+	
+	/* Adds headers to the response */
+	evhttp_make_header(evcon, req);
+
+	evhttp_write_buffer(evcon, evhttp_send_done, NULL);
+}
+
+void
+evhttp_send_reply(struct evhttp_request *req, int code, const char *reason,
+    struct evbuffer *databuf)
+{
+	evhttp_response_code(req, code, reason);
+	
+	evhttp_send(req, databuf);
+}
+
+void
+evhttp_send_reply_start(struct evhttp_request *req, int code,
+    const char *reason)
+{
+	evhttp_response_code(req, code, reason);
+	if (req->major == 1 && req->minor == 1) {
+		/* use chunked encoding for HTTP/1.1 */
+		evhttp_add_header(req->output_headers, "Transfer-Encoding",
+		    "chunked");
+		req->chunked = 1;
+	}
+	evhttp_make_header(req->evcon, req);
+	evhttp_write_buffer(req->evcon, NULL, NULL);
+}
+
+void
+evhttp_send_reply_chunk(struct evhttp_request *req, struct evbuffer *databuf)
+{
+	if (req->chunked) {
+		evbuffer_add_printf(req->evcon->output_buffer, "%x\r\n",
+				    (unsigned)EVBUFFER_LENGTH(databuf));
+	}
+	evbuffer_add_buffer(req->evcon->output_buffer, databuf);
+	if (req->chunked) {
+		evbuffer_add(req->evcon->output_buffer, "\r\n", 2);
+	}
+	evhttp_write_buffer(req->evcon, NULL, NULL);
+}
+
+void
+evhttp_send_reply_end(struct evhttp_request *req)
+{
+	struct evhttp_connection *evcon = req->evcon;
+
+	if (req->chunked) {
+		evbuffer_add(req->evcon->output_buffer, "0\r\n\r\n", 5);
+		evhttp_write_buffer(req->evcon, evhttp_send_done, NULL);
+		req->chunked = 0;
+	} else if (!event_pending(&evcon->ev, EV_WRITE|EV_TIMEOUT, NULL)) {
+		/* let the connection know that we are done with the request */
+		evhttp_send_done(evcon, NULL);
+	} else {
+		/* make the callback execute after all data has been written */
+		evcon->cb = evhttp_send_done;
+		evcon->cb_arg = NULL;
+	}
+}
+
+void
+evhttp_response_code(struct evhttp_request *req, int code, const char *reason)
+{
+	req->kind = EVHTTP_RESPONSE;
+	req->response_code = code;
+	if (req->response_code_line != NULL)
+		free(req->response_code_line);
+	req->response_code_line = strdup(reason);
+}
+
+void
+evhttp_send_page(struct evhttp_request *req, struct evbuffer *databuf)
+{
+	if (!req->major || !req->minor) {
+		req->major = 1;
+		req->minor = 1;
+	}
+	
+	if (req->kind != EVHTTP_RESPONSE)
+		evhttp_response_code(req, 200, "OK");
+
+	evhttp_clear_headers(req->output_headers);
+	evhttp_add_header(req->output_headers, "Content-Type", "text/html");
+	evhttp_add_header(req->output_headers, "Connection", "close");
+
+	evhttp_send(req, databuf);
+}
+
+static const char uri_chars[256] = {
+	0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
+	0, 1, 0, 0, 1, 0, 0, 1,   1, 1, 1, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 0, 0, 1, 0, 0,
+	/* 64 */
+	1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 0, 0, 0, 0, 1,
+	0, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 0, 0, 0, 1, 0,
+	/* 128 */
+	0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
+	/* 192 */
+	0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/*
+ * Helper functions to encode/decode a URI.
+ * The returned string must be freed by the caller.
+ */
+char *
+evhttp_encode_uri(const char *uri)
+{
+	struct evbuffer *buf = evbuffer_new();
+	char *p;
+
+	for (p = (char *)uri; *p != '\0'; p++) {
+		if (uri_chars[(u_char)(*p)]) {
+			evbuffer_add(buf, p, 1);
+		} else {
+			evbuffer_add_printf(buf, "%%%02X", (u_char)(*p));
+		}
+	}
+	evbuffer_add(buf, "", 1);
+	p = strdup((char *)EVBUFFER_DATA(buf));
+	evbuffer_free(buf);
+	
+	return (p);
+}
+
+/*
+ * @param always_decode_plus: when true we transform plus to space even
+ *     if we have not seen a ?.
+ */
+static int
+evhttp_decode_uri_internal(
+	const char *uri, size_t length, char *ret, int always_decode_plus)
+{
+	char c;
+	int i, j, in_query = always_decode_plus;
+	
+	for (i = j = 0; uri[i] != '\0'; i++) {
+		c = uri[i];
+		if (c == '?') {
+			in_query = 1;
+		} else if (c == '+' && in_query) {
+			c = ' ';
+		} else if (c == '%' && isxdigit((unsigned char)uri[i+1]) &&
+		    isxdigit((unsigned char)uri[i+2])) {
+			char tmp[] = { uri[i+1], uri[i+2], '\0' };
+			c = (char)strtol(tmp, NULL, 16);
+			i += 2;
+		}
+		ret[j++] = c;
+	}
+	ret[j] = '\0';
+
+	return (j);
+}
+
+char *
+evhttp_decode_uri(const char *uri)
+{
+	char *ret;
+
+	if ((ret = malloc(strlen(uri) + 1)) == NULL)
+		event_err(1, "%s: malloc(%lu)", __func__,
+			  (unsigned long)(strlen(uri) + 1));
+
+	evhttp_decode_uri_internal(uri, strlen(uri),
+	    ret, 0 /*always_decode_plus*/);
+
+	return (ret);
+}
+
+/* 
+ * Helper function to parse out arguments in a query.
+ * The arguments are separated by key and value.
+ */
+
+void
+evhttp_parse_query(const char *uri, struct evkeyvalq *headers)
+{
+	char *line;
+	char *argument;
+	char *p;
+
+	TAILQ_INIT(headers);
+
+	/* No arguments - we are done */
+	if (strchr(uri, '?') == NULL)
+		return;
+
+	if ((line = strdup(uri)) == NULL)
+		event_err(1, "%s: strdup", __func__);
+
+
+	argument = line;
+
+	/* We already know that there has to be a ? */
+	strsep(&argument, "?");
+
+	p = argument;
+	while (p != NULL && *p != '\0') {
+		char *key, *value, *decoded_value;
+		argument = strsep(&p, "&");
+
+		value = argument;
+		key = strsep(&value, "=");
+		if (value == NULL)
+			goto error;
+
+		if ((decoded_value = malloc(strlen(value) + 1)) == NULL)
+			event_err(1, "%s: malloc", __func__);
+
+		evhttp_decode_uri_internal(value, strlen(value),
+		    decoded_value, 1 /*always_decode_plus*/);
+		event_debug(("Query Param: %s -> %s\n", key, decoded_value));
+		evhttp_add_header_internal(headers, key, decoded_value);
+		free(decoded_value);
+	}
+
+ error:
+	free(line);
+}
+
+static struct evhttp_cb *
+evhttp_dispatch_callback(struct httpcbq *callbacks, struct evhttp_request *req)
+{
+	struct evhttp_cb *cb;
+	size_t offset = 0;
+
+	/* Test for different URLs */
+	char *p = strchr(req->uri, '?');
+	if (p != NULL)
+		offset = (size_t)(p - req->uri);
+
+	TAILQ_FOREACH(cb, callbacks, next) {
+		int res = 0;
+		if (p == NULL) {
+			res = strcmp(cb->what, req->uri) == 0;
+		} else {
+			res = ((strncmp(cb->what, req->uri, offset) == 0) &&
+					(cb->what[offset] == '\0'));
+		}
+
+		if (res)
+			return (cb);
+	}
+
+	return (NULL);
+}
+
+static void
+evhttp_handle_request(struct evhttp_request *req, void *arg)
+{
+	struct evhttp *http = arg;
+	struct evhttp_cb *cb = NULL;
+
+	if (req->uri == NULL) {
+		evhttp_send_error(req, HTTP_BADREQUEST, "Bad Request");
+		return;
+	}
+
+	if ((cb = evhttp_dispatch_callback(&http->callbacks, req)) != NULL) {
+		(*cb->cb)(req, cb->cbarg);
+		return;
+	}
+
+	/* Generic call back */
+	if (http->gencb) {
+		(*http->gencb)(req, http->gencbarg);
+		return;
+	} else {
+		/* We need to send a 404 here */
+#define ERR_FORMAT "<html><head>" \
+		    "<title>404 Not Found</title>" \
+		    "</head><body>" \
+		    "<h1>Not Found</h1>" \
+		    "<p>The requested URL %s was not found on this server.</p>"\
+		    "</body></html>\n"
+
+		char *escaped_html = evhttp_htmlescape(req->uri);
+		struct evbuffer *buf = evbuffer_new();
+
+		evhttp_response_code(req, HTTP_NOTFOUND, "Not Found");
+
+		evbuffer_add_printf(buf, ERR_FORMAT, escaped_html);
+
+		free(escaped_html);
+
+		evhttp_send_page(req, buf);
+
+		evbuffer_free(buf);
+#undef ERR_FORMAT
+	}
+}
+
+static void
+accept_socket(int fd, short what, void *arg)
+{
+	struct evhttp *http = arg;
+	struct sockaddr_storage ss;
+	socklen_t addrlen = sizeof(ss);
+	int nfd;
+
+	if ((nfd = accept(fd, (struct sockaddr *)&ss, &addrlen)) == -1) {
+		if (errno != EAGAIN && errno != EINTR)
+			event_warn("%s: bad accept", __func__);
+		return;
+	}
+	if (evutil_make_socket_nonblocking(nfd) < 0)
+		return;
+
+	evhttp_get_request(http, nfd, (struct sockaddr *)&ss, addrlen);
+}
+
+int
+evhttp_bind_socket(struct evhttp *http, const char *address, u_short port)
+{
+	int fd;
+	int res;
+
+	if ((fd = bind_socket(address, port, 1 /*reuse*/)) == -1)
+		return (-1);
+
+	if (listen(fd, 128) == -1) {
+		event_warn("%s: listen", __func__);
+		EVUTIL_CLOSESOCKET(fd);
+		return (-1);
+	}
+
+	res = evhttp_accept_socket(http, fd);
+	
+	if (res != -1)
+		event_debug(("Bound to port %d - Awaiting connections ... ",
+			port));
+
+	return (res);
+}
+
+int
+evhttp_accept_socket(struct evhttp *http, int fd)
+{
+	struct evhttp_bound_socket *bound;
+	struct event *ev;
+	int res;
+
+	bound = malloc(sizeof(struct evhttp_bound_socket));
+	if (bound == NULL)
+		return (-1);
+
+	ev = &bound->bind_ev;
+
+	/* Schedule the socket for accepting */
+	event_set(ev, fd, EV_READ | EV_PERSIST, accept_socket, http);
+	EVHTTP_BASE_SET(http, ev);
+
+	res = event_add(ev, NULL);
+
+	if (res == -1) {
+		free(bound);
+		return (-1);
+	}
+
+	TAILQ_INSERT_TAIL(&http->sockets, bound, next);
+
+	return (0);
+}
+
+static struct evhttp*
+evhttp_new_object(void)
+{
+	struct evhttp *http = NULL;
+
+	if ((http = calloc(1, sizeof(struct evhttp))) == NULL) {
+		event_warn("%s: calloc", __func__);
+		return (NULL);
+	}
+
+	http->timeout = -1;
+
+	TAILQ_INIT(&http->sockets);
+	TAILQ_INIT(&http->callbacks);
+	TAILQ_INIT(&http->connections);
+
+	return (http);
+}
+
+struct evhttp *
+evhttp_new(struct event_base *base)
+{
+	struct evhttp *http = evhttp_new_object();
+
+	http->base = base;
+
+	return (http);
+}
+
+/*
+ * Start a web server on the specified address and port.
+ */
+
+struct evhttp *
+evhttp_start(const char *address, u_short port)
+{
+	struct evhttp *http = evhttp_new_object();
+
+	if (evhttp_bind_socket(http, address, port) == -1) {
+		free(http);
+		return (NULL);
+	}
+
+	return (http);
+}
+
+void
+evhttp_free(struct evhttp* http)
+{
+	struct evhttp_cb *http_cb;
+	struct evhttp_connection *evcon;
+	struct evhttp_bound_socket *bound;
+	int fd;
+
+	/* Remove the accepting part */
+	while ((bound = TAILQ_FIRST(&http->sockets)) != NULL) {
+		TAILQ_REMOVE(&http->sockets, bound, next);
+
+		fd = bound->bind_ev.ev_fd;
+		event_del(&bound->bind_ev);
+		EVUTIL_CLOSESOCKET(fd);
+
+		free(bound);
+	}
+
+	while ((evcon = TAILQ_FIRST(&http->connections)) != NULL) {
+		/* evhttp_connection_free removes the connection */
+		evhttp_connection_free(evcon);
+	}
+
+	while ((http_cb = TAILQ_FIRST(&http->callbacks)) != NULL) {
+		TAILQ_REMOVE(&http->callbacks, http_cb, next);
+		free(http_cb->what);
+		free(http_cb);
+	}
+	
+	free(http);
+}
+
+void
+evhttp_set_timeout(struct evhttp* http, int timeout_in_secs)
+{
+	http->timeout = timeout_in_secs;
+}
+
+void
+evhttp_set_cb(struct evhttp *http, const char *uri,
+    void (*cb)(struct evhttp_request *, void *), void *cbarg)
+{
+	struct evhttp_cb *http_cb;
+
+	if ((http_cb = calloc(1, sizeof(struct evhttp_cb))) == NULL)
+		event_err(1, "%s: calloc", __func__);
+
+	http_cb->what = strdup(uri);
+	http_cb->cb = cb;
+	http_cb->cbarg = cbarg;
+
+	TAILQ_INSERT_TAIL(&http->callbacks, http_cb, next);
+}
+
+int
+evhttp_del_cb(struct evhttp *http, const char *uri)
+{
+	struct evhttp_cb *http_cb;
+
+	TAILQ_FOREACH(http_cb, &http->callbacks, next) {
+		if (strcmp(http_cb->what, uri) == 0)
+			break;
+	}
+	if (http_cb == NULL)
+		return (-1);
+
+	TAILQ_REMOVE(&http->callbacks, http_cb, next);
+	free(http_cb->what);
+	free(http_cb);
+
+	return (0);
+}
+
+void
+evhttp_set_gencb(struct evhttp *http,
+    void (*cb)(struct evhttp_request *, void *), void *cbarg)
+{
+	http->gencb = cb;
+	http->gencbarg = cbarg;
+}
+
+/*
+ * Request related functions
+ */
+
+struct evhttp_request *
+evhttp_request_new(void (*cb)(struct evhttp_request *, void *), void *arg)
+{
+	struct evhttp_request *req = NULL;
+
+	/* Allocate request structure */
+	if ((req = calloc(1, sizeof(struct evhttp_request))) == NULL) {
+		event_warn("%s: calloc", __func__);
+		goto error;
+	}
+
+	req->kind = EVHTTP_RESPONSE;
+	req->input_headers = calloc(1, sizeof(struct evkeyvalq));
+	if (req->input_headers == NULL) {
+		event_warn("%s: calloc", __func__);
+		goto error;
+	}
+	TAILQ_INIT(req->input_headers);
+
+	req->output_headers = calloc(1, sizeof(struct evkeyvalq));
+	if (req->output_headers == NULL) {
+		event_warn("%s: calloc", __func__);
+		goto error;
+	}
+	TAILQ_INIT(req->output_headers);
+
+	if ((req->input_buffer = evbuffer_new()) == NULL) {
+		event_warn("%s: evbuffer_new", __func__);
+		goto error;
+	}
+
+	if ((req->output_buffer = evbuffer_new()) == NULL) {
+		event_warn("%s: evbuffer_new", __func__);
+		goto error;
+	}
+
+	req->cb = cb;
+	req->cb_arg = arg;
+
+	return (req);
+
+ error:
+	if (req != NULL)
+		evhttp_request_free(req);
+	return (NULL);
+}
+
+void
+evhttp_request_free(struct evhttp_request *req)
+{
+	if (req->remote_host != NULL)
+		free(req->remote_host);
+	if (req->uri != NULL)
+		free(req->uri);
+	if (req->response_code_line != NULL)
+		free(req->response_code_line);
+
+	evhttp_clear_headers(req->input_headers);
+	free(req->input_headers);
+
+	evhttp_clear_headers(req->output_headers);
+	free(req->output_headers);
+
+	if (req->input_buffer != NULL)
+		evbuffer_free(req->input_buffer);
+
+	if (req->output_buffer != NULL)
+		evbuffer_free(req->output_buffer);
+
+	free(req);
+}
+
+void
+evhttp_request_set_chunked_cb(struct evhttp_request *req,
+    void (*cb)(struct evhttp_request *, void *))
+{
+	req->chunk_cb = cb;
+}
+
+/*
+ * Allows for inspection of the request URI
+ */
+
+const char *
+evhttp_request_uri(struct evhttp_request *req) {
+	if (req->uri == NULL)
+		event_debug(("%s: request %p has no uri\n", __func__, req));
+	return (req->uri);
+}
+
+/*
+ * Takes a file descriptor to read a request from.
+ * The callback is executed once the whole request has been read.
+ */
+
+static struct evhttp_connection*
+evhttp_get_request_connection(
+	struct evhttp* http,
+	int fd, struct sockaddr *sa, socklen_t salen)
+{
+	struct evhttp_connection *evcon;
+	char *hostname = NULL, *portname = NULL;
+
+	name_from_addr(sa, salen, &hostname, &portname);
+	if (hostname == NULL || portname == NULL) {
+		if (hostname) free(hostname);
+		if (portname) free(portname);
+		return (NULL);
+	}
+
+	event_debug(("%s: new request from %s:%s on %d\n",
+			__func__, hostname, portname, fd));
+
+	/* we need a connection object to put the http request on */
+	evcon = evhttp_connection_new(hostname, atoi(portname));
+	free(hostname);
+	free(portname);
+	if (evcon == NULL)
+		return (NULL);
+
+	/* associate the base if we have one*/
+	evhttp_connection_set_base(evcon, http->base);
+
+	evcon->flags |= EVHTTP_CON_INCOMING;
+	evcon->state = EVCON_READING_FIRSTLINE;
+	
+	evcon->fd = fd;
+
+	return (evcon);
+}
+
+static int
+evhttp_associate_new_request_with_connection(struct evhttp_connection *evcon)
+{
+	struct evhttp *http = evcon->http_server;
+	struct evhttp_request *req;
+	if ((req = evhttp_request_new(evhttp_handle_request, http)) == NULL)
+		return (-1);
+
+	req->evcon = evcon;	/* the request ends up owning the connection */
+	req->flags |= EVHTTP_REQ_OWN_CONNECTION;
+	
+	TAILQ_INSERT_TAIL(&evcon->requests, req, next);
+	
+	req->kind = EVHTTP_REQUEST;
+	
+	if ((req->remote_host = strdup(evcon->address)) == NULL)
+		event_err(1, "%s: strdup", __func__);
+	req->remote_port = evcon->port;
+
+	evhttp_start_read(evcon);
+	
+	return (0);
+}
+
+void
+evhttp_get_request(struct evhttp *http, int fd,
+    struct sockaddr *sa, socklen_t salen)
+{
+	struct evhttp_connection *evcon;
+
+	evcon = evhttp_get_request_connection(http, fd, sa, salen);
+	if (evcon == NULL)
+		return;
+
+	/* the timeout can be used by the server to close idle connections */
+	if (http->timeout != -1)
+		evhttp_connection_set_timeout(evcon, http->timeout);
+
+	/* 
+	 * if we want to accept more than one request on a connection,
+	 * we need to know which http server it belongs to.
+	 */
+	evcon->http_server = http;
+	TAILQ_INSERT_TAIL(&http->connections, evcon, next);
+	
+	if (evhttp_associate_new_request_with_connection(evcon) == -1)
+		evhttp_connection_free(evcon);
+}
+
+
+/*
+ * Network helper functions that we do not want to export to the rest of
+ * the world.
+ */
+#if 0 /* Unused */
+static struct addrinfo *
+addr_from_name(char *address)
+{
+#ifdef HAVE_GETADDRINFO
+        struct addrinfo ai, *aitop;
+        int ai_result;
+
+        memset(&ai, 0, sizeof(ai));
+        ai.ai_family = AF_INET;
+        ai.ai_socktype = SOCK_RAW;
+        ai.ai_flags = 0;
+        if ((ai_result = getaddrinfo(address, NULL, &ai, &aitop)) != 0) {
+                if ( ai_result == EAI_SYSTEM )
+                        event_warn("getaddrinfo");
+                else
+                        event_warnx("getaddrinfo: %s", gai_strerror(ai_result));
+        }
+
+	return (aitop);
+#else
+	assert(0);
+	return NULL; /* XXXXX Use gethostbyname, if this function is ever used. */
+#endif
+}
+#endif
+
+static void
+name_from_addr(struct sockaddr *sa, socklen_t salen,
+    char **phost, char **pport)
+{
+	char ntop[NI_MAXHOST];
+	char strport[NI_MAXSERV];
+	int ni_result;
+
+#ifdef HAVE_GETNAMEINFO
+	ni_result = getnameinfo(sa, salen,
+		ntop, sizeof(ntop), strport, sizeof(strport),
+		NI_NUMERICHOST|NI_NUMERICSERV);
+	
+	if (ni_result != 0) {
+		if (ni_result == EAI_SYSTEM)
+			event_err(1, "getnameinfo failed");
+		else
+			event_errx(1, "getnameinfo failed: %s", gai_strerror(ni_result));
+		return;
+	}
+#else
+	ni_result = fake_getnameinfo(sa, salen,
+		ntop, sizeof(ntop), strport, sizeof(strport),
+		NI_NUMERICHOST|NI_NUMERICSERV);
+	if (ni_result != 0)
+			return;
+#endif
+	*phost = strdup(ntop);
+	*pport = strdup(strport);
+}
+
+/* Create a non-blocking socket and bind it */
+/* todo: rename this function */
+static int
+bind_socket_ai(struct addrinfo *ai, int reuse)
+{
+        int fd, on = 1, r;
+	int serrno;
+
+        /* Create listen socket */
+        fd = socket(AF_INET, SOCK_STREAM, 0);
+        if (fd == -1) {
+                event_warn("socket");
+                return (-1);
+        }
+
+        if (evutil_make_socket_nonblocking(fd) < 0)
+                goto out;
+
+#ifndef WIN32
+        if (fcntl(fd, F_SETFD, 1) == -1) {
+                event_warn("fcntl(F_SETFD)");
+                goto out;
+        }
+#endif
+
+        setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (void *)&on, sizeof(on));
+	if (reuse) {
+		setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
+		    (void *)&on, sizeof(on));
+	}
+
+	if (ai != NULL) {
+		r = bind(fd, ai->ai_addr, ai->ai_addrlen);
+		if (r == -1)
+			goto out;
+	}
+
+	return (fd);
+
+ out:
+	serrno = EVUTIL_SOCKET_ERROR();
+	EVUTIL_CLOSESOCKET(fd);
+	EVUTIL_SET_SOCKET_ERROR(serrno);
+	return (-1);
+}
+
+static struct addrinfo *
+make_addrinfo(const char *address, u_short port)
+{
+        struct addrinfo *aitop = NULL;
+
+#ifdef HAVE_GETADDRINFO
+        struct addrinfo ai;
+        char strport[NI_MAXSERV];
+        int ai_result;
+
+        memset(&ai, 0, sizeof(ai));
+        ai.ai_family = AF_INET;
+        ai.ai_socktype = SOCK_STREAM;
+        ai.ai_flags = AI_PASSIVE;  /* turn NULL host name into INADDR_ANY */
+        evutil_snprintf(strport, sizeof(strport), "%d", port);
+        if ((ai_result = getaddrinfo(address, strport, &ai, &aitop)) != 0) {
+                if ( ai_result == EAI_SYSTEM )
+                        event_warn("getaddrinfo");
+                else
+                        event_warnx("getaddrinfo: %s", gai_strerror(ai_result));
+		return (NULL);
+        }
+#else
+	static int cur;
+	static struct addrinfo ai[2]; /* We will be returning the address of some of this memory so it has to last even after this call. */
+	if (++cur == 2) cur = 0;   /* allow calling this function twice */
+
+	if (fake_getaddrinfo(address, &ai[cur]) < 0) {
+		event_warn("fake_getaddrinfo");
+		return (NULL);
+	}
+	aitop = &ai[cur];
+	((struct sockaddr_in *) aitop->ai_addr)->sin_port = htons(port);
+#endif
+
+	return (aitop);
+}
+
+static int
+bind_socket(const char *address, u_short port, int reuse)
+{
+	int fd;
+	struct addrinfo *aitop = NULL;
+
+	/* just create an unbound socket */
+	if (address == NULL && port == 0)
+		return bind_socket_ai(NULL, 0);
+		
+	aitop = make_addrinfo(address, port);
+
+	if (aitop == NULL)
+		return (-1);
+
+	fd = bind_socket_ai(aitop, reuse);
+
+#ifdef HAVE_GETADDRINFO
+	freeaddrinfo(aitop);
+#else
+	fake_freeaddrinfo(aitop);
+#endif
+
+	return (fd);
+}
+
+static int
+socket_connect(int fd, const char *address, unsigned short port)
+{
+	struct addrinfo *ai = make_addrinfo(address, port);
+	int res = -1;
+
+	if (ai == NULL) {
+		event_debug(("%s: make_addrinfo: \"%s:%d\"",
+			__func__, address, port));
+		return (-1);
+	}
+
+	if (connect(fd, ai->ai_addr, ai->ai_addrlen) == -1) {
+#ifdef WIN32
+		int tmp_error = WSAGetLastError();
+		if (tmp_error != WSAEWOULDBLOCK && tmp_error != WSAEINVAL &&
+		    tmp_error != WSAEINPROGRESS) {
+			goto out;
+		}
+#else
+		if (errno != EINPROGRESS) {
+			goto out;
+		}
+#endif
+	}
+
+	/* everything is fine */
+	res = 0;
+
+out:
+#ifdef HAVE_GETADDRINFO
+	freeaddrinfo(ai);
+#else
+	fake_freeaddrinfo(ai);
+#endif
+
+	return (res);
+}
diff --git a/libevent/kqueue.c b/libevent/kqueue.c
new file mode 100644
index 00000000000..36eebe5fc6e
--- /dev/null
+++ b/libevent/kqueue.c
@@ -0,0 +1,449 @@
+/*	$OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $	*/
+
+/*
+ * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <sys/_time.h>
+#endif
+#include <sys/queue.h>
+#include <sys/event.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
+/* Some platforms apparently define the udata field of struct kevent as
+ * intptr_t, whereas others define it as void*.  There doesn't seem to be an
+ * easy way to tell them apart via autoconf, so we need to use OS macros. */
+#if defined(HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__)
+#define PTR_TO_UDATA(x)	((intptr_t)(x))
+#else
+#define PTR_TO_UDATA(x)	(x)
+#endif
+
+#include "event.h"
+#include "event-internal.h"
+#include "log.h"
+#include "event-internal.h"
+
+#define EVLIST_X_KQINKERNEL	0x1000
+
+#define NEVENT		64
+
+struct kqop {
+	struct kevent *changes;
+	int nchanges;
+	struct kevent *events;
+	struct event_list evsigevents[NSIG];
+	int nevents;
+	int kq;
+	pid_t pid;
+};
+
+static void *kq_init	(struct event_base *);
+static int kq_add	(void *, struct event *);
+static int kq_del	(void *, struct event *);
+static int kq_dispatch	(struct event_base *, void *, struct timeval *);
+static int kq_insert	(struct kqop *, struct kevent *);
+static void kq_dealloc (struct event_base *, void *);
+
+const struct eventop kqops = {
+	"kqueue",
+	kq_init,
+	kq_add,
+	kq_del,
+	kq_dispatch,
+	kq_dealloc,
+	1 /* need reinit */
+};
+
+static void *
+kq_init(struct event_base *base)
+{
+	int i, kq;
+	struct kqop *kqueueop;
+
+	/* Disable kqueue when this environment variable is set */
+	if (getenv("EVENT_NOKQUEUE"))
+		return (NULL);
+
+	if (!(kqueueop = calloc(1, sizeof(struct kqop))))
+		return (NULL);
+
+	/* Initalize the kernel queue */
+	
+	if ((kq = kqueue()) == -1) {
+		event_warn("kqueue");
+		free (kqueueop);
+		return (NULL);
+	}
+
+	kqueueop->kq = kq;
+
+	kqueueop->pid = getpid();
+
+	/* Initalize fields */
+	kqueueop->changes = malloc(NEVENT * sizeof(struct kevent));
+	if (kqueueop->changes == NULL) {
+		free (kqueueop);
+		return (NULL);
+	}
+	kqueueop->events = malloc(NEVENT * sizeof(struct kevent));
+	if (kqueueop->events == NULL) {
+		free (kqueueop->changes);
+		free (kqueueop);
+		return (NULL);
+	}
+	kqueueop->nevents = NEVENT;
+
+	/* we need to keep track of multiple events per signal */
+	for (i = 0; i < NSIG; ++i) {
+		TAILQ_INIT(&kqueueop->evsigevents[i]);
+	}
+
+	/* Check for Mac OS X kqueue bug. */
+	kqueueop->changes[0].ident = -1;
+	kqueueop->changes[0].filter = EVFILT_READ;
+	kqueueop->changes[0].flags = EV_ADD;
+	/* 
+	 * If kqueue works, then kevent will succeed, and it will
+	 * stick an error in events[0].  If kqueue is broken, then
+	 * kevent will fail.
+	 */
+	if (kevent(kq,
+		kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 ||
+	    kqueueop->events[0].ident != -1 ||
+	    kqueueop->events[0].flags != EV_ERROR) {
+		event_warn("%s: detected broken kqueue; not using.", __func__);
+		free(kqueueop->changes);
+		free(kqueueop->events);
+		free(kqueueop);
+		close(kq);
+		return (NULL);
+	}
+
+	return (kqueueop);
+}
+
+static int
+kq_insert(struct kqop *kqop, struct kevent *kev)
+{
+	int nevents = kqop->nevents;
+
+	if (kqop->nchanges == nevents) {
+		struct kevent *newchange;
+		struct kevent *newresult;
+
+		nevents *= 2;
+
+		newchange = realloc(kqop->changes,
+				    nevents * sizeof(struct kevent));
+		if (newchange == NULL) {
+			event_warn("%s: malloc", __func__);
+			return (-1);
+		}
+		kqop->changes = newchange;
+
+		newresult = realloc(kqop->events,
+				    nevents * sizeof(struct kevent));
+
+		/*
+		 * If we fail, we don't have to worry about freeing,
+		 * the next realloc will pick it up.
+		 */
+		if (newresult == NULL) {
+			event_warn("%s: malloc", __func__);
+			return (-1);
+		}
+		kqop->events = newresult;
+
+		kqop->nevents = nevents;
+	}
+
+	memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent));
+
+	event_debug(("%s: fd %d %s%s",
+		__func__, (int)kev->ident, 
+		kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE",
+		kev->flags == EV_DELETE ? " (del)" : ""));
+
+	return (0);
+}
+
+static void
+kq_sighandler(int sig)
+{
+	/* Do nothing here */
+}
+
+static int
+kq_dispatch(struct event_base *base, void *arg, struct timeval *tv)
+{
+	struct kqop *kqop = arg;
+	struct kevent *changes = kqop->changes;
+	struct kevent *events = kqop->events;
+	struct event *ev;
+	struct timespec ts, *ts_p = NULL;
+	int i, res;
+
+	if (tv != NULL) {
+		TIMEVAL_TO_TIMESPEC(tv, &ts);
+		ts_p = &ts;
+	}
+
+	res = kevent(kqop->kq, changes, kqop->nchanges,
+	    events, kqop->nevents, ts_p);
+	kqop->nchanges = 0;
+	if (res == -1) {
+		if (errno != EINTR) {
+                        event_warn("kevent");
+			return (-1);
+		}
+
+		return (0);
+	}
+
+	event_debug(("%s: kevent reports %d", __func__, res));
+
+	for (i = 0; i < res; i++) {
+		int which = 0;
+
+		if (events[i].flags & EV_ERROR) {
+			/* 
+			 * Error messages that can happen, when a delete fails.
+			 *   EBADF happens when the file discriptor has been
+			 *   closed,
+			 *   ENOENT when the file discriptor was closed and
+			 *   then reopened.
+			 *   EINVAL for some reasons not understood; EINVAL
+			 *   should not be returned ever; but FreeBSD does :-\
+			 * An error is also indicated when a callback deletes
+			 * an event we are still processing.  In that case
+			 * the data field is set to ENOENT.
+			 */
+			if (events[i].data == EBADF ||
+			    events[i].data == EINVAL ||
+			    events[i].data == ENOENT)
+				continue;
+			errno = events[i].data;
+			return (-1);
+		}
+
+		if (events[i].filter == EVFILT_READ) {
+			which |= EV_READ;
+		} else if (events[i].filter == EVFILT_WRITE) {
+			which |= EV_WRITE;
+		} else if (events[i].filter == EVFILT_SIGNAL) {
+			which |= EV_SIGNAL;
+		}
+
+		if (!which)
+			continue;
+
+		if (events[i].filter == EVFILT_SIGNAL) {
+			struct event_list *head =
+			    (struct event_list *)events[i].udata;
+			TAILQ_FOREACH(ev, head, ev_signal_next) {
+				event_active(ev, which, events[i].data);
+			}
+		} else {
+			ev = (struct event *)events[i].udata;
+
+			if (!(ev->ev_events & EV_PERSIST))
+				ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
+
+			event_active(ev, which, 1);
+		}
+	}
+
+	return (0);
+}
+
+
+static int
+kq_add(void *arg, struct event *ev)
+{
+	struct kqop *kqop = arg;
+	struct kevent kev;
+
+	if (ev->ev_events & EV_SIGNAL) {
+		int nsignal = EVENT_SIGNAL(ev);
+
+		assert(nsignal >= 0 && nsignal < NSIG);
+		if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
+			struct timespec timeout = { 0, 0 };
+			
+			memset(&kev, 0, sizeof(kev));
+			kev.ident = nsignal;
+			kev.filter = EVFILT_SIGNAL;
+			kev.flags = EV_ADD;
+			kev.udata = PTR_TO_UDATA(&kqop->evsigevents[nsignal]);
+			
+			/* Be ready for the signal if it is sent any
+			 * time between now and the next call to
+			 * kq_dispatch. */
+			if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
+				return (-1);
+			
+			if (_evsignal_set_handler(ev->ev_base, nsignal,
+				kq_sighandler) == -1)
+				return (-1);
+		}
+
+		TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev,
+		    ev_signal_next);
+		ev->ev_flags |= EVLIST_X_KQINKERNEL;
+		return (0);
+	}
+
+	if (ev->ev_events & EV_READ) {
+ 		memset(&kev, 0, sizeof(kev));
+		kev.ident = ev->ev_fd;
+		kev.filter = EVFILT_READ;
+#ifdef NOTE_EOF
+		/* Make it behave like select() and poll() */
+		kev.fflags = NOTE_EOF;
+#endif
+		kev.flags = EV_ADD;
+		if (!(ev->ev_events & EV_PERSIST))
+			kev.flags |= EV_ONESHOT;
+		kev.udata = PTR_TO_UDATA(ev);
+		
+		if (kq_insert(kqop, &kev) == -1)
+			return (-1);
+
+		ev->ev_flags |= EVLIST_X_KQINKERNEL;
+	}
+
+	if (ev->ev_events & EV_WRITE) {
+ 		memset(&kev, 0, sizeof(kev));
+		kev.ident = ev->ev_fd;
+		kev.filter = EVFILT_WRITE;
+		kev.flags = EV_ADD;
+		if (!(ev->ev_events & EV_PERSIST))
+			kev.flags |= EV_ONESHOT;
+		kev.udata = PTR_TO_UDATA(ev);
+		
+		if (kq_insert(kqop, &kev) == -1)
+			return (-1);
+
+		ev->ev_flags |= EVLIST_X_KQINKERNEL;
+	}
+
+	return (0);
+}
+
+static int
+kq_del(void *arg, struct event *ev)
+{
+	struct kqop *kqop = arg;
+	struct kevent kev;
+
+	if (!(ev->ev_flags & EVLIST_X_KQINKERNEL))
+		return (0);
+
+	if (ev->ev_events & EV_SIGNAL) {
+		int nsignal = EVENT_SIGNAL(ev);
+		struct timespec timeout = { 0, 0 };
+
+		assert(nsignal >= 0 && nsignal < NSIG);
+		TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next);
+		if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
+			memset(&kev, 0, sizeof(kev));
+			kev.ident = nsignal;
+			kev.filter = EVFILT_SIGNAL;
+			kev.flags = EV_DELETE;
+		
+			/* Because we insert signal events
+			 * immediately, we need to delete them
+			 * immediately, too */
+			if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
+				return (-1);
+
+			if (_evsignal_restore_handler(ev->ev_base,
+				nsignal) == -1)
+				return (-1);
+		}
+
+		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
+		return (0);
+	}
+
+	if (ev->ev_events & EV_READ) {
+ 		memset(&kev, 0, sizeof(kev));
+		kev.ident = ev->ev_fd;
+		kev.filter = EVFILT_READ;
+		kev.flags = EV_DELETE;
+		
+		if (kq_insert(kqop, &kev) == -1)
+			return (-1);
+
+		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
+	}
+
+	if (ev->ev_events & EV_WRITE) {
+ 		memset(&kev, 0, sizeof(kev));
+		kev.ident = ev->ev_fd;
+		kev.filter = EVFILT_WRITE;
+		kev.flags = EV_DELETE;
+		
+		if (kq_insert(kqop, &kev) == -1)
+			return (-1);
+
+		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
+	}
+
+	return (0);
+}
+
+static void
+kq_dealloc(struct event_base *base, void *arg)
+{
+	struct kqop *kqop = arg;
+
+	if (kqop->changes)
+		free(kqop->changes);
+	if (kqop->events)
+		free(kqop->events);
+	if (kqop->kq >= 0 && kqop->pid == getpid())
+		close(kqop->kq);
+	memset(kqop, 0, sizeof(struct kqop));
+	free(kqop);
+}
diff --git a/libevent/log.c b/libevent/log.c
new file mode 100644
index 00000000000..b62a61915d1
--- /dev/null
+++ b/libevent/log.c
@@ -0,0 +1,187 @@
+/*	$OpenBSD: err.c,v 1.2 2002/06/25 15:50:15 mickey Exp $	*/
+
+/*
+ * log.c
+ *
+ * Based on err.c, which was adapted from OpenBSD libc *err* *warn* code.
+ *
+ * Copyright (c) 2005 Nick Mathewson <nickm@freehaven.net>
+ *
+ * Copyright (c) 2000 Dug Song <dugsong@monkey.org>
+ *
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#endif
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <sys/_time.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include "event.h"
+
+#include "log.h"
+#include "evutil.h"
+
+static void _warn_helper(int severity, int log_errno, const char *fmt,
+                         va_list ap);
+static void event_log(int severity, const char *msg);
+
+void
+event_err(int eval, const char *fmt, ...)
+{
+	va_list ap;
+	
+	va_start(ap, fmt);
+	_warn_helper(_EVENT_LOG_ERR, errno, fmt, ap);
+	va_end(ap);
+	exit(eval);
+}
+
+void
+event_warn(const char *fmt, ...)
+{
+	va_list ap;
+	
+	va_start(ap, fmt);
+	_warn_helper(_EVENT_LOG_WARN, errno, fmt, ap);
+	va_end(ap);
+}
+
+void
+event_errx(int eval, const char *fmt, ...)
+{
+	va_list ap;
+	
+	va_start(ap, fmt);
+	_warn_helper(_EVENT_LOG_ERR, -1, fmt, ap);
+	va_end(ap);
+	exit(eval);
+}
+
+void
+event_warnx(const char *fmt, ...)
+{
+	va_list ap;
+	
+	va_start(ap, fmt);
+	_warn_helper(_EVENT_LOG_WARN, -1, fmt, ap);
+	va_end(ap);
+}
+
+void
+event_msgx(const char *fmt, ...)
+{
+	va_list ap;
+	
+	va_start(ap, fmt);
+	_warn_helper(_EVENT_LOG_MSG, -1, fmt, ap);
+	va_end(ap);
+}
+
+void
+_event_debugx(const char *fmt, ...)
+{
+	va_list ap;
+	
+	va_start(ap, fmt);
+	_warn_helper(_EVENT_LOG_DEBUG, -1, fmt, ap);
+	va_end(ap);
+}
+
+static void
+_warn_helper(int severity, int log_errno, const char *fmt, va_list ap)
+{
+	char buf[1024];
+	size_t len;
+
+	if (fmt != NULL)
+		evutil_vsnprintf(buf, sizeof(buf), fmt, ap);
+	else
+		buf[0] = '\0';
+
+	if (log_errno >= 0) {
+		len = strlen(buf);
+		if (len < sizeof(buf) - 3) {
+			evutil_snprintf(buf + len, sizeof(buf) - len, ": %s",
+			    strerror(log_errno));
+		}
+	}
+
+	event_log(severity, buf);
+}
+
+static event_log_cb log_fn = NULL;
+
+void
+event_set_log_callback(event_log_cb cb)
+{
+	log_fn = cb;
+}
+
+static void
+event_log(int severity, const char *msg)
+{
+	if (log_fn)
+		log_fn(severity, msg);
+	else {
+		const char *severity_str;
+		switch (severity) {
+		case _EVENT_LOG_DEBUG:
+			severity_str = "debug";
+			break;
+		case _EVENT_LOG_MSG:
+			severity_str = "msg";
+			break;
+		case _EVENT_LOG_WARN:
+			severity_str = "warn";
+			break;
+		case _EVENT_LOG_ERR:
+			severity_str = "err";
+			break;
+		default:
+			severity_str = "???";
+			break;
+		}
+		(void)fprintf(stderr, "[%s] %s\n", severity_str, msg);
+	}
+}
diff --git a/libevent/log.h b/libevent/log.h
new file mode 100644
index 00000000000..7bc6632b8dd
--- /dev/null
+++ b/libevent/log.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _LOG_H_
+#define _LOG_H_
+
+#ifdef __GNUC__
+#define EV_CHECK_FMT(a,b) __attribute__((format(printf, a, b)))
+#else
+#define EV_CHECK_FMT(a,b)
+#endif
+
+void event_err(int eval, const char *fmt, ...) EV_CHECK_FMT(2,3);
+void event_warn(const char *fmt, ...) EV_CHECK_FMT(1,2);
+void event_errx(int eval, const char *fmt, ...) EV_CHECK_FMT(2,3);
+void event_warnx(const char *fmt, ...) EV_CHECK_FMT(1,2);
+void event_msgx(const char *fmt, ...) EV_CHECK_FMT(1,2);
+void _event_debugx(const char *fmt, ...) EV_CHECK_FMT(1,2);
+
+#ifdef USE_DEBUG
+#define event_debug(x) _event_debugx x
+#else
+#define event_debug(x) do {;} while (0)
+#endif
+
+#undef EV_CHECK_FMT
+
+#endif
diff --git a/libevent/min_heap.h b/libevent/min_heap.h
new file mode 100644
index 00000000000..edaa5ae1270
--- /dev/null
+++ b/libevent/min_heap.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2006 Maxim Yegorushkin <maxim.yegorushkin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _MIN_HEAP_H_
+#define _MIN_HEAP_H_
+
+#include "event.h"
+#include "evutil.h"
+#include "stdlib.h"
+
+typedef struct min_heap
+{
+    struct event** p;
+    unsigned n, a;
+} min_heap_t;
+
+static inline void           min_heap_ctor(min_heap_t* s);
+static inline void           min_heap_dtor(min_heap_t* s);
+static inline void           min_heap_elem_init(struct event* e);
+static inline int            min_heap_elem_greater(struct event *a, struct event *b);
+static inline int            min_heap_empty(min_heap_t* s);
+static inline unsigned       min_heap_size(min_heap_t* s);
+static inline struct event*  min_heap_top(min_heap_t* s);
+static inline int            min_heap_reserve(min_heap_t* s, unsigned n);
+static inline int            min_heap_push(min_heap_t* s, struct event* e);
+static inline struct event*  min_heap_pop(min_heap_t* s);
+static inline int            min_heap_erase(min_heap_t* s, struct event* e);
+static inline void           min_heap_shift_up_(min_heap_t* s, unsigned hole_index, struct event* e);
+static inline void           min_heap_shift_down_(min_heap_t* s, unsigned hole_index, struct event* e);
+
+int min_heap_elem_greater(struct event *a, struct event *b)
+{
+    return evutil_timercmp(&a->ev_timeout, &b->ev_timeout, >);
+}
+
+void min_heap_ctor(min_heap_t* s) { s->p = 0; s->n = 0; s->a = 0; }
+void min_heap_dtor(min_heap_t* s) { free(s->p); }
+void min_heap_elem_init(struct event* e) { e->min_heap_idx = -1; }
+int min_heap_empty(min_heap_t* s) { return 0u == s->n; }
+unsigned min_heap_size(min_heap_t* s) { return s->n; }
+struct event* min_heap_top(min_heap_t* s) { return s->n ? *s->p : 0; }
+
+int min_heap_push(min_heap_t* s, struct event* e)
+{
+    if(min_heap_reserve(s, s->n + 1))
+        return -1;
+    min_heap_shift_up_(s, s->n++, e);
+    return 0;
+}
+
+struct event* min_heap_pop(min_heap_t* s)
+{
+    if(s->n)
+    {
+        struct event* e = *s->p;
+        min_heap_shift_down_(s, 0u, s->p[--s->n]);
+        e->min_heap_idx = -1;
+        return e;
+    }
+    return 0;
+}
+
+int min_heap_erase(min_heap_t* s, struct event* e)
+{
+    if(((unsigned int)-1) != e->min_heap_idx)
+    {
+        struct event *last = s->p[--s->n];
+        unsigned parent = (e->min_heap_idx - 1) / 2;
+	/* we replace e with the last element in the heap.  We might need to
+	   shift it upward if it is less than its parent, or downward if it is
+	   greater than one or both its children. Since the children are known
+	   to be less than the parent, it can't need to shift both up and
+	   down. */
+        if (e->min_heap_idx > 0 && min_heap_elem_greater(s->p[parent], last))
+             min_heap_shift_up_(s, e->min_heap_idx, last);
+        else
+             min_heap_shift_down_(s, e->min_heap_idx, last);
+        e->min_heap_idx = -1;
+        return 0;
+    }
+    return -1;
+}
+
+int min_heap_reserve(min_heap_t* s, unsigned n)
+{
+    if(s->a < n)
+    {
+        struct event** p;
+        unsigned a = s->a ? s->a * 2 : 8;
+        if(a < n)
+            a = n;
+        if(!(p = (struct event**)realloc(s->p, a * sizeof *p)))
+            return -1;
+        s->p = p;
+        s->a = a;
+    }
+    return 0;
+}
+
+void min_heap_shift_up_(min_heap_t* s, unsigned hole_index, struct event* e)
+{
+    unsigned parent = (hole_index - 1) / 2;
+    while(hole_index && min_heap_elem_greater(s->p[parent], e))
+    {
+        (s->p[hole_index] = s->p[parent])->min_heap_idx = hole_index;
+        hole_index = parent;
+        parent = (hole_index - 1) / 2;
+    }
+    (s->p[hole_index] = e)->min_heap_idx = hole_index;
+}
+
+void min_heap_shift_down_(min_heap_t* s, unsigned hole_index, struct event* e)
+{
+    unsigned min_child = 2 * (hole_index + 1);
+    while(min_child <= s->n)
+	{
+        min_child -= min_child == s->n || min_heap_elem_greater(s->p[min_child], s->p[min_child - 1]);
+        if(!(min_heap_elem_greater(e, s->p[min_child])))
+            break;
+        (s->p[hole_index] = s->p[min_child])->min_heap_idx = hole_index;
+        hole_index = min_child;
+        min_child = 2 * (hole_index + 1);
+	}
+    min_heap_shift_up_(s, hole_index,  e);
+}
+
+#endif /* _MIN_HEAP_H_ */
diff --git a/libevent/poll.c b/libevent/poll.c
new file mode 100644
index 00000000000..5d496618d29
--- /dev/null
+++ b/libevent/poll.c
@@ -0,0 +1,379 @@
+/*	$OpenBSD: poll.c,v 1.2 2002/06/25 15:50:15 mickey Exp $	*/
+
+/*
+ * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <sys/_time.h>
+#endif
+#include <sys/queue.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#ifdef CHECK_INVARIANTS
+#include <assert.h>
+#endif
+
+#include "event.h"
+#include "event-internal.h"
+#include "evsignal.h"
+#include "log.h"
+
+struct pollop {
+	int event_count;		/* Highest number alloc */
+	int nfds;                       /* Size of event_* */
+	int fd_count;                   /* Size of idxplus1_by_fd */
+	struct pollfd *event_set;
+	struct event **event_r_back;
+	struct event **event_w_back;
+	int *idxplus1_by_fd; /* Index into event_set by fd; we add 1 so
+			      * that 0 (which is easy to memset) can mean
+			      * "no entry." */
+};
+
+static void *poll_init	(struct event_base *);
+static int poll_add		(void *, struct event *);
+static int poll_del		(void *, struct event *);
+static int poll_dispatch	(struct event_base *, void *, struct timeval *);
+static void poll_dealloc	(struct event_base *, void *);
+
+const struct eventop pollops = {
+	"poll",
+	poll_init,
+	poll_add,
+	poll_del,
+	poll_dispatch,
+	poll_dealloc,
+    0
+};
+
+static void *
+poll_init(struct event_base *base)
+{
+	struct pollop *pollop;
+
+	/* Disable poll when this environment variable is set */
+	if (getenv("EVENT_NOPOLL"))
+		return (NULL);
+
+	if (!(pollop = calloc(1, sizeof(struct pollop))))
+		return (NULL);
+
+	evsignal_init(base);
+
+	return (pollop);
+}
+
+#ifdef CHECK_INVARIANTS
+static void
+poll_check_ok(struct pollop *pop)
+{
+	int i, idx;
+	struct event *ev;
+
+	for (i = 0; i < pop->fd_count; ++i) {
+		idx = pop->idxplus1_by_fd[i]-1;
+		if (idx < 0)
+			continue;
+		assert(pop->event_set[idx].fd == i);
+		if (pop->event_set[idx].events & POLLIN) {
+			ev = pop->event_r_back[idx];
+			assert(ev);
+			assert(ev->ev_events & EV_READ);
+			assert(ev->ev_fd == i);
+		}
+		if (pop->event_set[idx].events & POLLOUT) {
+			ev = pop->event_w_back[idx];
+			assert(ev);
+			assert(ev->ev_events & EV_WRITE);
+			assert(ev->ev_fd == i);
+		}
+	}
+	for (i = 0; i < pop->nfds; ++i) {
+		struct pollfd *pfd = &pop->event_set[i];
+		assert(pop->idxplus1_by_fd[pfd->fd] == i+1);
+	}
+}
+#else
+#define poll_check_ok(pop)
+#endif
+
+static int
+poll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
+{
+	int res, i, j, msec = -1, nfds;
+	struct pollop *pop = arg;
+
+	poll_check_ok(pop);
+
+	if (tv != NULL)
+		msec = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
+
+	nfds = pop->nfds;
+	res = poll(pop->event_set, nfds, msec);
+
+	if (res == -1) {
+		if (errno != EINTR) {
+                        event_warn("poll");
+			return (-1);
+		}
+
+		evsignal_process(base);
+		return (0);
+	} else if (base->sig.evsignal_caught) {
+		evsignal_process(base);
+	}
+
+	event_debug(("%s: poll reports %d", __func__, res));
+
+	if (res == 0 || nfds == 0)
+		return (0);
+
+	i = random() % nfds;
+	for (j = 0; j < nfds; j++) {
+		struct event *r_ev = NULL, *w_ev = NULL;
+		int what;
+		if (++i == nfds)
+			i = 0;
+		what = pop->event_set[i].revents;
+
+		if (!what)
+			continue;
+
+		res = 0;
+
+		/* If the file gets closed notify */
+		if (what & (POLLHUP|POLLERR))
+			what |= POLLIN|POLLOUT;
+		if (what & POLLIN) {
+			res |= EV_READ;
+			r_ev = pop->event_r_back[i];
+		}
+		if (what & POLLOUT) {
+			res |= EV_WRITE;
+			w_ev = pop->event_w_back[i];
+		}
+		if (res == 0)
+			continue;
+
+		if (r_ev && (res & r_ev->ev_events)) {
+			event_active(r_ev, res & r_ev->ev_events, 1);
+		}
+		if (w_ev && w_ev != r_ev && (res & w_ev->ev_events)) {
+			event_active(w_ev, res & w_ev->ev_events, 1);
+		}
+	}
+
+	return (0);
+}
+
+static int
+poll_add(void *arg, struct event *ev)
+{
+	struct pollop *pop = arg;
+	struct pollfd *pfd = NULL;
+	int i;
+
+	if (ev->ev_events & EV_SIGNAL)
+		return (evsignal_add(ev));
+	if (!(ev->ev_events & (EV_READ|EV_WRITE)))
+		return (0);
+
+	poll_check_ok(pop);
+	if (pop->nfds + 1 >= pop->event_count) {
+		struct pollfd *tmp_event_set;
+		struct event **tmp_event_r_back;
+		struct event **tmp_event_w_back;
+		int tmp_event_count;
+
+		if (pop->event_count < 32)
+			tmp_event_count = 32;
+		else
+			tmp_event_count = pop->event_count * 2;
+
+		/* We need more file descriptors */
+		tmp_event_set = realloc(pop->event_set,
+				 tmp_event_count * sizeof(struct pollfd));
+		if (tmp_event_set == NULL) {
+			event_warn("realloc");
+			return (-1);
+		}
+		pop->event_set = tmp_event_set;
+
+		tmp_event_r_back = realloc(pop->event_r_back,
+			    tmp_event_count * sizeof(struct event *));
+		if (tmp_event_r_back == NULL) {
+			/* event_set overallocated; that's okay. */
+			event_warn("realloc");
+			return (-1);
+		}
+		pop->event_r_back = tmp_event_r_back;
+
+		tmp_event_w_back = realloc(pop->event_w_back,
+			    tmp_event_count * sizeof(struct event *));
+		if (tmp_event_w_back == NULL) {
+			/* event_set and event_r_back overallocated; that's
+			 * okay. */
+			event_warn("realloc");
+			return (-1);
+		}
+		pop->event_w_back = tmp_event_w_back;
+
+		pop->event_count = tmp_event_count;
+	}
+	if (ev->ev_fd >= pop->fd_count) {
+		int *tmp_idxplus1_by_fd;
+		int new_count;
+		if (pop->fd_count < 32)
+			new_count = 32;
+		else
+			new_count = pop->fd_count * 2;
+		while (new_count <= ev->ev_fd)
+			new_count *= 2;
+		tmp_idxplus1_by_fd =
+			realloc(pop->idxplus1_by_fd, new_count * sizeof(int));
+		if (tmp_idxplus1_by_fd == NULL) {
+			event_warn("realloc");
+			return (-1);
+		}
+		pop->idxplus1_by_fd = tmp_idxplus1_by_fd;
+		memset(pop->idxplus1_by_fd + pop->fd_count,
+		       0, sizeof(int)*(new_count - pop->fd_count));
+		pop->fd_count = new_count;
+	}
+
+	i = pop->idxplus1_by_fd[ev->ev_fd] - 1;
+	if (i >= 0) {
+		pfd = &pop->event_set[i];
+	} else {
+		i = pop->nfds++;
+		pfd = &pop->event_set[i];
+		pfd->events = 0;
+		pfd->fd = ev->ev_fd;
+		pop->event_w_back[i] = pop->event_r_back[i] = NULL;
+		pop->idxplus1_by_fd[ev->ev_fd] = i + 1;
+	}
+
+	pfd->revents = 0;
+	if (ev->ev_events & EV_WRITE) {
+		pfd->events |= POLLOUT;
+		pop->event_w_back[i] = ev;
+	}
+	if (ev->ev_events & EV_READ) {
+		pfd->events |= POLLIN;
+		pop->event_r_back[i] = ev;
+	}
+	poll_check_ok(pop);
+
+	return (0);
+}
+
+/*
+ * Nothing to be done here.
+ */
+
+static int
+poll_del(void *arg, struct event *ev)
+{
+	struct pollop *pop = arg;
+	struct pollfd *pfd = NULL;
+	int i;
+
+	if (ev->ev_events & EV_SIGNAL)
+		return (evsignal_del(ev));
+
+	if (!(ev->ev_events & (EV_READ|EV_WRITE)))
+		return (0);
+
+	poll_check_ok(pop);
+	i = pop->idxplus1_by_fd[ev->ev_fd] - 1;
+	if (i < 0)
+		return (-1);
+
+	/* Do we still want to read or write? */
+	pfd = &pop->event_set[i];
+	if (ev->ev_events & EV_READ) {
+		pfd->events &= ~POLLIN;
+		pop->event_r_back[i] = NULL;
+	}
+	if (ev->ev_events & EV_WRITE) {
+		pfd->events &= ~POLLOUT;
+		pop->event_w_back[i] = NULL;
+	}
+	poll_check_ok(pop);
+	if (pfd->events)
+		/* Another event cares about that fd. */
+		return (0);
+
+	/* Okay, so we aren't interested in that fd anymore. */
+	pop->idxplus1_by_fd[ev->ev_fd] = 0;
+
+	--pop->nfds;
+	if (i != pop->nfds) {
+		/* 
+		 * Shift the last pollfd down into the now-unoccupied
+		 * position.
+		 */
+		memcpy(&pop->event_set[i], &pop->event_set[pop->nfds],
+		       sizeof(struct pollfd));
+		pop->event_r_back[i] = pop->event_r_back[pop->nfds];
+		pop->event_w_back[i] = pop->event_w_back[pop->nfds];
+		pop->idxplus1_by_fd[pop->event_set[i].fd] = i + 1;
+	}
+
+	poll_check_ok(pop);
+	return (0);
+}
+
+static void
+poll_dealloc(struct event_base *base, void *arg)
+{
+	struct pollop *pop = arg;
+
+	evsignal_dealloc(base);
+	if (pop->event_set)
+		free(pop->event_set);
+	if (pop->event_r_back)
+		free(pop->event_r_back);
+	if (pop->event_w_back)
+		free(pop->event_w_back);
+	if (pop->idxplus1_by_fd)
+		free(pop->idxplus1_by_fd);
+
+	memset(pop, 0, sizeof(struct pollop));
+	free(pop);
+}
diff --git a/libevent/sample/Makefile.am b/libevent/sample/Makefile.am
new file mode 100644
index 00000000000..2f4e26e2f3f
--- /dev/null
+++ b/libevent/sample/Makefile.am
@@ -0,0 +1,14 @@
+AUTOMAKE_OPTIONS = foreign no-dependencies
+
+LDADD = ../libevent.la
+AM_CFLAGS = -I$(top_srcdir) -I$(top_srcdir)/compat
+
+noinst_PROGRAMS = event-test time-test signal-test
+
+event_test_sources = event-test.c
+time_test_sources = time-test.c
+signal_test_sources = signal-test.c
+
+verify:
+
+DISTCLEANFILES = *~
diff --git a/libevent/sample/event-test.c b/libevent/sample/event-test.c
new file mode 100644
index 00000000000..2c6cb93864c
--- /dev/null
+++ b/libevent/sample/event-test.c
@@ -0,0 +1,139 @@
+/*
+ * Compile with:
+ * cc -I/usr/local/include -o event-test event-test.c -L/usr/local/lib -levent
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifndef WIN32
+#include <sys/queue.h>
+#include <unistd.h>
+#include <sys/time.h>
+#else
+#include <windows.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include <event.h>
+
+static void
+fifo_read(int fd, short event, void *arg)
+{
+	char buf[255];
+	int len;
+	struct event *ev = arg;
+#ifdef WIN32
+	DWORD dwBytesRead;
+#endif
+
+	/* Reschedule this event */
+	event_add(ev, NULL);
+
+	fprintf(stderr, "fifo_read called with fd: %d, event: %d, arg: %p\n",
+		fd, event, arg);
+#ifdef WIN32
+	len = ReadFile((HANDLE)fd, buf, sizeof(buf) - 1, &dwBytesRead, NULL);
+
+	// Check for end of file. 
+	if(len && dwBytesRead == 0) {
+		fprintf(stderr, "End Of File");
+		event_del(ev);
+		return;
+	}
+
+	buf[dwBytesRead] = '\0';
+#else
+	len = read(fd, buf, sizeof(buf) - 1);
+
+	if (len == -1) {
+		perror("read");
+		return;
+	} else if (len == 0) {
+		fprintf(stderr, "Connection closed\n");
+		return;
+	}
+
+	buf[len] = '\0';
+#endif
+	fprintf(stdout, "Read: %s\n", buf);
+}
+
+int
+main (int argc, char **argv)
+{
+	struct event evfifo;
+#ifdef WIN32
+	HANDLE socket;
+	// Open a file. 
+	socket = CreateFile("test.txt",     // open File 
+			GENERIC_READ,                 // open for reading 
+			0,                            // do not share 
+			NULL,                         // no security 
+			OPEN_EXISTING,                // existing file only 
+			FILE_ATTRIBUTE_NORMAL,        // normal file 
+			NULL);                        // no attr. template 
+
+	if(socket == INVALID_HANDLE_VALUE)
+		return 1;
+
+#else
+	struct stat st;
+	const char *fifo = "event.fifo";
+	int socket;
+ 
+	if (lstat (fifo, &st) == 0) {
+		if ((st.st_mode & S_IFMT) == S_IFREG) {
+			errno = EEXIST;
+			perror("lstat");
+			exit (1);
+		}
+	}
+
+	unlink (fifo);
+	if (mkfifo (fifo, 0600) == -1) {
+		perror("mkfifo");
+		exit (1);
+	}
+
+	/* Linux pipes are broken, we need O_RDWR instead of O_RDONLY */
+#ifdef __linux
+	socket = open (fifo, O_RDWR | O_NONBLOCK, 0);
+#else
+	socket = open (fifo, O_RDONLY | O_NONBLOCK, 0);
+#endif
+
+	if (socket == -1) {
+		perror("open");
+		exit (1);
+	}
+
+	fprintf(stderr, "Write data to %s\n", fifo);
+#endif
+	/* Initalize the event library */
+	event_init();
+
+	/* Initalize one event */
+#ifdef WIN32
+	event_set(&evfifo, (int)socket, EV_READ, fifo_read, &evfifo);
+#else
+	event_set(&evfifo, socket, EV_READ, fifo_read, &evfifo);
+#endif
+
+	/* Add it to the active events, without a timeout */
+	event_add(&evfifo, NULL);
+	
+	event_dispatch();
+#ifdef WIN32
+	CloseHandle(socket);
+#endif
+	return (0);
+}
+
diff --git a/libevent/sample/signal-test.c b/libevent/sample/signal-test.c
new file mode 100644
index 00000000000..9a131cb50c2
--- /dev/null
+++ b/libevent/sample/signal-test.c
@@ -0,0 +1,63 @@
+/*
+ * Compile with:
+ * cc -I/usr/local/include -o signal-test \
+ *   signal-test.c -L/usr/local/lib -levent
+ */
+
+#include <sys/types.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/stat.h>
+#ifndef WIN32
+#include <sys/queue.h>
+#include <unistd.h>
+#include <sys/time.h>
+#else
+#include <windows.h>
+#endif
+#include <signal.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include <event.h>
+
+int called = 0;
+
+static void
+signal_cb(int fd, short event, void *arg)
+{
+	struct event *signal = arg;
+
+	printf("%s: got signal %d\n", __func__, EVENT_SIGNAL(signal));
+
+	if (called >= 2)
+		event_del(signal);
+	
+	called++;
+}
+
+int
+main (int argc, char **argv)
+{
+	struct event signal_int;
+ 
+	/* Initalize the event library */
+	event_init();
+
+	/* Initalize one event */
+	event_set(&signal_int, SIGINT, EV_SIGNAL|EV_PERSIST, signal_cb,
+	    &signal_int);
+
+	event_add(&signal_int, NULL);
+
+	event_dispatch();
+
+	return (0);
+}
+
diff --git a/libevent/sample/time-test.c b/libevent/sample/time-test.c
new file mode 100644
index 00000000000..069d4f8f783
--- /dev/null
+++ b/libevent/sample/time-test.c
@@ -0,0 +1,70 @@
+/*
+ * Compile with:
+ * cc -I/usr/local/include -o time-test time-test.c -L/usr/local/lib -levent
+ */
+
+#include <sys/types.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/stat.h>
+#ifndef WIN32
+#include <sys/queue.h>
+#include <unistd.h>
+#endif
+#include <time.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include <event.h>
+#include <evutil.h>
+
+int lasttime;
+
+static void
+timeout_cb(int fd, short event, void *arg)
+{
+	struct timeval tv;
+	struct event *timeout = arg;
+	int newtime = time(NULL);
+
+	printf("%s: called at %d: %d\n", __func__, newtime,
+	    newtime - lasttime);
+	lasttime = newtime;
+
+	evutil_timerclear(&tv);
+	tv.tv_sec = 2;
+	event_add(timeout, &tv);
+}
+
+int
+main (int argc, char **argv)
+{
+	struct event timeout;
+	struct timeval tv;
+ 
+	/* Initalize the event library */
+	event_init();
+
+	/* Initalize one event */
+	evtimer_set(&timeout, timeout_cb, &timeout);
+
+	evutil_timerclear(&tv);
+	tv.tv_sec = 2;
+	event_add(&timeout, &tv);
+
+	lasttime = time(NULL);
+	
+	event_dispatch();
+
+	return (0);
+}
+
diff --git a/libevent/select.c b/libevent/select.c
new file mode 100644
index 00000000000..ca6639fd829
--- /dev/null
+++ b/libevent/select.c
@@ -0,0 +1,356 @@
+/*	$OpenBSD: select.c,v 1.2 2002/06/25 15:50:15 mickey Exp $	*/
+
+/*
+ * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <sys/_time.h>
+#endif
+#ifdef HAVE_SYS_SELECT_H
+#include <sys/select.h>
+#endif
+#include <sys/queue.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#ifdef CHECK_INVARIANTS
+#include <assert.h>
+#endif
+
+#include "event.h"
+#include "event-internal.h"
+#include "evsignal.h"
+#include "log.h"
+
+#ifndef howmany
+#define        howmany(x, y)   (((x)+((y)-1))/(y))
+#endif
+
+struct selectop {
+	int event_fds;		/* Highest fd in fd set */
+	int event_fdsz;
+	fd_set *event_readset_in;
+	fd_set *event_writeset_in;
+	fd_set *event_readset_out;
+	fd_set *event_writeset_out;
+	struct event **event_r_by_fd;
+	struct event **event_w_by_fd;
+};
+
+static void *select_init	(struct event_base *);
+static int select_add		(void *, struct event *);
+static int select_del		(void *, struct event *);
+static int select_dispatch	(struct event_base *, void *, struct timeval *);
+static void select_dealloc     (struct event_base *, void *);
+
+const struct eventop selectops = {
+	"select",
+	select_init,
+	select_add,
+	select_del,
+	select_dispatch,
+	select_dealloc,
+	0
+};
+
+static int select_resize(struct selectop *sop, int fdsz);
+
+static void *
+select_init(struct event_base *base)
+{
+	struct selectop *sop;
+
+	/* Disable select when this environment variable is set */
+	if (getenv("EVENT_NOSELECT"))
+		return (NULL);
+
+	if (!(sop = calloc(1, sizeof(struct selectop))))
+		return (NULL);
+
+	select_resize(sop, howmany(32 + 1, NFDBITS)*sizeof(fd_mask));
+
+	evsignal_init(base);
+
+	return (sop);
+}
+
+#ifdef CHECK_INVARIANTS
+static void
+check_selectop(struct selectop *sop)
+{
+	int i;
+	for (i = 0; i <= sop->event_fds; ++i) {
+		if (FD_ISSET(i, sop->event_readset_in)) {
+			assert(sop->event_r_by_fd[i]);
+			assert(sop->event_r_by_fd[i]->ev_events & EV_READ);
+			assert(sop->event_r_by_fd[i]->ev_fd == i);
+		} else {
+			assert(! sop->event_r_by_fd[i]);
+		}
+		if (FD_ISSET(i, sop->event_writeset_in)) {
+			assert(sop->event_w_by_fd[i]);
+			assert(sop->event_w_by_fd[i]->ev_events & EV_WRITE);
+			assert(sop->event_w_by_fd[i]->ev_fd == i);
+		} else {
+			assert(! sop->event_w_by_fd[i]);
+		}
+	}
+
+}
+#else
+#define check_selectop(sop) do { (void) sop; } while (0)
+#endif
+
+static int
+select_dispatch(struct event_base *base, void *arg, struct timeval *tv)
+{
+	int res, i, j;
+	struct selectop *sop = arg;
+
+	check_selectop(sop);
+
+	memcpy(sop->event_readset_out, sop->event_readset_in,
+	       sop->event_fdsz);
+	memcpy(sop->event_writeset_out, sop->event_writeset_in,
+	       sop->event_fdsz);
+
+	res = select(sop->event_fds + 1, sop->event_readset_out,
+	    sop->event_writeset_out, NULL, tv);
+
+	check_selectop(sop);
+
+	if (res == -1) {
+		if (errno != EINTR) {
+			event_warn("select");
+			return (-1);
+		}
+
+		evsignal_process(base);
+		return (0);
+	} else if (base->sig.evsignal_caught) {
+		evsignal_process(base);
+	}
+
+	event_debug(("%s: select reports %d", __func__, res));
+
+	check_selectop(sop);
+	i = random() % (sop->event_fds+1);
+	for (j = 0; j <= sop->event_fds; ++j) {
+		struct event *r_ev = NULL, *w_ev = NULL;
+		if (++i >= sop->event_fds+1)
+			i = 0;
+
+		res = 0;
+		if (FD_ISSET(i, sop->event_readset_out)) {
+			r_ev = sop->event_r_by_fd[i];
+			res |= EV_READ;
+		}
+		if (FD_ISSET(i, sop->event_writeset_out)) {
+			w_ev = sop->event_w_by_fd[i];
+			res |= EV_WRITE;
+		}
+		if (r_ev && (res & r_ev->ev_events)) {
+			event_active(r_ev, res & r_ev->ev_events, 1);
+		}
+		if (w_ev && w_ev != r_ev && (res & w_ev->ev_events)) {
+			event_active(w_ev, res & w_ev->ev_events, 1);
+		}
+	}
+	check_selectop(sop);
+
+	return (0);
+}
+
+
+static int
+select_resize(struct selectop *sop, int fdsz)
+{
+	int n_events, n_events_old;
+
+	fd_set *readset_in = NULL;
+	fd_set *writeset_in = NULL;
+	fd_set *readset_out = NULL;
+	fd_set *writeset_out = NULL;
+	struct event **r_by_fd = NULL;
+	struct event **w_by_fd = NULL;
+
+	n_events = (fdsz/sizeof(fd_mask)) * NFDBITS;
+	n_events_old = (sop->event_fdsz/sizeof(fd_mask)) * NFDBITS;
+
+	if (sop->event_readset_in)
+		check_selectop(sop);
+
+	if ((readset_in = realloc(sop->event_readset_in, fdsz)) == NULL)
+		goto error;
+	sop->event_readset_in = readset_in;
+	if ((readset_out = realloc(sop->event_readset_out, fdsz)) == NULL)
+		goto error;
+	sop->event_readset_out = readset_out;
+	if ((writeset_in = realloc(sop->event_writeset_in, fdsz)) == NULL)
+		goto error;
+	sop->event_writeset_in = writeset_in;
+	if ((writeset_out = realloc(sop->event_writeset_out, fdsz)) == NULL)
+		goto error;
+	sop->event_writeset_out = writeset_out;
+	if ((r_by_fd = realloc(sop->event_r_by_fd,
+		 n_events*sizeof(struct event*))) == NULL)
+		goto error;
+	sop->event_r_by_fd = r_by_fd;
+	if ((w_by_fd = realloc(sop->event_w_by_fd,
+		 n_events * sizeof(struct event*))) == NULL)
+		goto error;
+	sop->event_w_by_fd = w_by_fd;
+
+	memset((char *)sop->event_readset_in + sop->event_fdsz, 0,
+	    fdsz - sop->event_fdsz);
+	memset((char *)sop->event_writeset_in + sop->event_fdsz, 0,
+	    fdsz - sop->event_fdsz);
+	memset(sop->event_r_by_fd + n_events_old, 0,
+	    (n_events-n_events_old) * sizeof(struct event*));
+	memset(sop->event_w_by_fd + n_events_old, 0,
+	    (n_events-n_events_old) * sizeof(struct event*));
+
+	sop->event_fdsz = fdsz;
+	check_selectop(sop);
+
+	return (0);
+
+ error:
+	event_warn("malloc");
+	return (-1);
+}
+
+
+static int
+select_add(void *arg, struct event *ev)
+{
+	struct selectop *sop = arg;
+
+	if (ev->ev_events & EV_SIGNAL)
+		return (evsignal_add(ev));
+
+	check_selectop(sop);
+	/*
+	 * Keep track of the highest fd, so that we can calculate the size
+	 * of the fd_sets for select(2)
+	 */
+	if (sop->event_fds < ev->ev_fd) {
+		int fdsz = sop->event_fdsz;
+
+		if (fdsz < sizeof(fd_mask))
+			fdsz = sizeof(fd_mask);
+
+		while (fdsz <
+		    (howmany(ev->ev_fd + 1, NFDBITS) * sizeof(fd_mask)))
+			fdsz *= 2;
+
+		if (fdsz != sop->event_fdsz) {
+			if (select_resize(sop, fdsz)) {
+				check_selectop(sop);
+				return (-1);
+			}
+		}
+
+		sop->event_fds = ev->ev_fd;
+	}
+
+	if (ev->ev_events & EV_READ) {
+		FD_SET(ev->ev_fd, sop->event_readset_in);
+		sop->event_r_by_fd[ev->ev_fd] = ev;
+	}
+	if (ev->ev_events & EV_WRITE) {
+		FD_SET(ev->ev_fd, sop->event_writeset_in);
+		sop->event_w_by_fd[ev->ev_fd] = ev;
+	}
+	check_selectop(sop);
+
+	return (0);
+}
+
+/*
+ * Nothing to be done here.
+ */
+
+static int
+select_del(void *arg, struct event *ev)
+{
+	struct selectop *sop = arg;
+
+	check_selectop(sop);
+	if (ev->ev_events & EV_SIGNAL)
+		return (evsignal_del(ev));
+
+	if (sop->event_fds < ev->ev_fd) {
+		check_selectop(sop);
+		return (0);
+	}
+
+	if (ev->ev_events & EV_READ) {
+		FD_CLR(ev->ev_fd, sop->event_readset_in);
+		sop->event_r_by_fd[ev->ev_fd] = NULL;
+	}
+
+	if (ev->ev_events & EV_WRITE) {
+		FD_CLR(ev->ev_fd, sop->event_writeset_in);
+		sop->event_w_by_fd[ev->ev_fd] = NULL;
+	}
+
+	check_selectop(sop);
+	return (0);
+}
+
+static void
+select_dealloc(struct event_base *base, void *arg)
+{
+	struct selectop *sop = arg;
+
+	evsignal_dealloc(base);
+	if (sop->event_readset_in)
+		free(sop->event_readset_in);
+	if (sop->event_writeset_in)
+		free(sop->event_writeset_in);
+	if (sop->event_readset_out)
+		free(sop->event_readset_out);
+	if (sop->event_writeset_out)
+		free(sop->event_writeset_out);
+	if (sop->event_r_by_fd)
+		free(sop->event_r_by_fd);
+	if (sop->event_w_by_fd)
+		free(sop->event_w_by_fd);
+
+	memset(sop, 0, sizeof(struct selectop));
+	free(sop);
+}
diff --git a/libevent/signal.c b/libevent/signal.c
new file mode 100644
index 00000000000..74fa23f688a
--- /dev/null
+++ b/libevent/signal.c
@@ -0,0 +1,357 @@
+/*	$OpenBSD: select.c,v 1.2 2002/06/25 15:50:15 mickey Exp $	*/
+
+/*
+ * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <winsock2.h>
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#endif
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <sys/queue.h>
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <errno.h>
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#include <assert.h>
+
+#include "event.h"
+#include "event-internal.h"
+#include "evsignal.h"
+#include "evutil.h"
+#include "log.h"
+
+struct event_base *evsignal_base = NULL;
+
+static void evsignal_handler(int sig);
+
+/* Callback for when the signal handler write a byte to our signaling socket */
+static void
+evsignal_cb(int fd, short what, void *arg)
+{
+	static char signals[1];
+#ifdef WIN32
+	SSIZE_T n;
+#else
+	ssize_t n;
+#endif
+
+	n = recv(fd, signals, sizeof(signals), 0);
+	if (n == -1)
+		event_err(1, "%s: read", __func__);
+}
+
+#ifdef HAVE_SETFD
+#define FD_CLOSEONEXEC(x) do { \
+        if (fcntl(x, F_SETFD, 1) == -1) \
+                event_warn("fcntl(%d, F_SETFD)", x); \
+} while (0)
+#else
+#define FD_CLOSEONEXEC(x)
+#endif
+
+int
+evsignal_init(struct event_base *base)
+{
+	int i;
+
+	/* 
+	 * Our signal handler is going to write to one end of the socket
+	 * pair to wake up our event loop.  The event loop then scans for
+	 * signals that got delivered.
+	 */
+	if (evutil_socketpair(
+		    AF_UNIX, SOCK_STREAM, 0, base->sig.ev_signal_pair) == -1) {
+#ifdef WIN32
+		/* Make this nonfatal on win32, where sometimes people
+		   have localhost firewalled. */
+		event_warn("%s: socketpair", __func__);
+#else
+		event_err(1, "%s: socketpair", __func__);
+#endif
+		return -1;
+	}
+
+	FD_CLOSEONEXEC(base->sig.ev_signal_pair[0]);
+	FD_CLOSEONEXEC(base->sig.ev_signal_pair[1]);
+	base->sig.sh_old = NULL;
+	base->sig.sh_old_max = 0;
+	base->sig.evsignal_caught = 0;
+	memset(&base->sig.evsigcaught, 0, sizeof(sig_atomic_t)*NSIG);
+	/* initialize the queues for all events */
+	for (i = 0; i < NSIG; ++i)
+		TAILQ_INIT(&base->sig.evsigevents[i]);
+
+        evutil_make_socket_nonblocking(base->sig.ev_signal_pair[0]);
+
+	event_set(&base->sig.ev_signal, base->sig.ev_signal_pair[1],
+		EV_READ | EV_PERSIST, evsignal_cb, &base->sig.ev_signal);
+	base->sig.ev_signal.ev_base = base;
+	base->sig.ev_signal.ev_flags |= EVLIST_INTERNAL;
+
+	return 0;
+}
+
+/* Helper: set the signal handler for evsignal to handler in base, so that
+ * we can restore the original handler when we clear the current one. */
+int
+_evsignal_set_handler(struct event_base *base,
+		      int evsignal, void (*handler)(int))
+{
+#ifdef HAVE_SIGACTION
+	struct sigaction sa;
+#else
+	ev_sighandler_t sh;
+#endif
+	struct evsignal_info *sig = &base->sig;
+	void *p;
+
+	/*
+	 * resize saved signal handler array up to the highest signal number.
+	 * a dynamic array is used to keep footprint on the low side.
+	 */
+	if (evsignal >= sig->sh_old_max) {
+		int new_max = evsignal + 1;
+		event_debug(("%s: evsignal (%d) >= sh_old_max (%d), resizing",
+			    __func__, evsignal, sig->sh_old_max));
+		p = realloc(sig->sh_old, new_max * sizeof(*sig->sh_old));
+		if (p == NULL) {
+			event_warn("realloc");
+			return (-1);
+		}
+
+		memset((char *)p + sig->sh_old_max * sizeof(*sig->sh_old),
+		    0, (new_max - sig->sh_old_max) * sizeof(*sig->sh_old));
+
+		sig->sh_old_max = new_max;
+		sig->sh_old = p;
+	}
+
+	/* allocate space for previous handler out of dynamic array */
+	sig->sh_old[evsignal] = malloc(sizeof *sig->sh_old[evsignal]);
+	if (sig->sh_old[evsignal] == NULL) {
+		event_warn("malloc");
+		return (-1);
+	}
+
+	/* save previous handler and setup new handler */
+#ifdef HAVE_SIGACTION
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = handler;
+	sa.sa_flags |= SA_RESTART;
+	sigfillset(&sa.sa_mask);
+
+	if (sigaction(evsignal, &sa, sig->sh_old[evsignal]) == -1) {
+		event_warn("sigaction");
+		free(sig->sh_old[evsignal]);
+		return (-1);
+	}
+#else
+	if ((sh = signal(evsignal, handler)) == SIG_ERR) {
+		event_warn("signal");
+		free(sig->sh_old[evsignal]);
+		return (-1);
+	}
+	*sig->sh_old[evsignal] = sh;
+#endif
+
+	return (0);
+}
+
+int
+evsignal_add(struct event *ev)
+{
+	int evsignal;
+	struct event_base *base = ev->ev_base;
+	struct evsignal_info *sig = &ev->ev_base->sig;
+
+	if (ev->ev_events & (EV_READ|EV_WRITE))
+		event_errx(1, "%s: EV_SIGNAL incompatible use", __func__);
+	evsignal = EVENT_SIGNAL(ev);
+	assert(evsignal >= 0 && evsignal < NSIG);
+	if (TAILQ_EMPTY(&sig->evsigevents[evsignal])) {
+		event_debug(("%s: %p: changing signal handler", __func__, ev));
+		if (_evsignal_set_handler(
+			    base, evsignal, evsignal_handler) == -1)
+			return (-1);
+
+		/* catch signals if they happen quickly */
+		evsignal_base = base;
+
+		if (!sig->ev_signal_added) {
+			if (event_add(&sig->ev_signal, NULL))
+				return (-1);
+			sig->ev_signal_added = 1;
+		}
+	}
+
+	/* multiple events may listen to the same signal */
+	TAILQ_INSERT_TAIL(&sig->evsigevents[evsignal], ev, ev_signal_next);
+
+	return (0);
+}
+
+int
+_evsignal_restore_handler(struct event_base *base, int evsignal)
+{
+	int ret = 0;
+	struct evsignal_info *sig = &base->sig;
+#ifdef HAVE_SIGACTION
+	struct sigaction *sh;
+#else
+	ev_sighandler_t *sh;
+#endif
+
+	/* restore previous handler */
+	sh = sig->sh_old[evsignal];
+	sig->sh_old[evsignal] = NULL;
+#ifdef HAVE_SIGACTION
+	if (sigaction(evsignal, sh, NULL) == -1) {
+		event_warn("sigaction");
+		ret = -1;
+	}
+#else
+	if (signal(evsignal, *sh) == SIG_ERR) {
+		event_warn("signal");
+		ret = -1;
+	}
+#endif
+	free(sh);
+
+	return ret;
+}
+
+int
+evsignal_del(struct event *ev)
+{
+	struct event_base *base = ev->ev_base;
+	struct evsignal_info *sig = &base->sig;
+	int evsignal = EVENT_SIGNAL(ev);
+
+	assert(evsignal >= 0 && evsignal < NSIG);
+
+	/* multiple events may listen to the same signal */
+	TAILQ_REMOVE(&sig->evsigevents[evsignal], ev, ev_signal_next);
+
+	if (!TAILQ_EMPTY(&sig->evsigevents[evsignal]))
+		return (0);
+
+	event_debug(("%s: %p: restoring signal handler", __func__, ev));
+
+	return (_evsignal_restore_handler(ev->ev_base, EVENT_SIGNAL(ev)));
+}
+
+static void
+evsignal_handler(int sig)
+{
+	int save_errno = errno;
+
+	if (evsignal_base == NULL) {
+		event_warn(
+			"%s: received signal %d, but have no base configured",
+			__func__, sig);
+		return;
+	}
+
+	evsignal_base->sig.evsigcaught[sig]++;
+	evsignal_base->sig.evsignal_caught = 1;
+
+#ifndef HAVE_SIGACTION
+	signal(sig, evsignal_handler);
+#endif
+
+	/* Wake up our notification mechanism */
+	send(evsignal_base->sig.ev_signal_pair[0], "a", 1, 0);
+	errno = save_errno;
+}
+
+void
+evsignal_process(struct event_base *base)
+{
+	struct evsignal_info *sig = &base->sig;
+	struct event *ev, *next_ev;
+	sig_atomic_t ncalls;
+	int i;
+	
+	base->sig.evsignal_caught = 0;
+	for (i = 1; i < NSIG; ++i) {
+		ncalls = sig->evsigcaught[i];
+		if (ncalls == 0)
+			continue;
+		sig->evsigcaught[i] -= ncalls;
+
+		for (ev = TAILQ_FIRST(&sig->evsigevents[i]);
+		    ev != NULL; ev = next_ev) {
+			next_ev = TAILQ_NEXT(ev, ev_signal_next);
+			if (!(ev->ev_events & EV_PERSIST))
+				event_del(ev);
+			event_active(ev, EV_SIGNAL, ncalls);
+		}
+
+	}
+}
+
+void
+evsignal_dealloc(struct event_base *base)
+{
+	int i = 0;
+	if (base->sig.ev_signal_added) {
+		event_del(&base->sig.ev_signal);
+		base->sig.ev_signal_added = 0;
+	}
+	for (i = 0; i < NSIG; ++i) {
+		if (i < base->sig.sh_old_max && base->sig.sh_old[i] != NULL)
+			_evsignal_restore_handler(base, i);
+	}
+
+	EVUTIL_CLOSESOCKET(base->sig.ev_signal_pair[0]);
+	base->sig.ev_signal_pair[0] = -1;
+	EVUTIL_CLOSESOCKET(base->sig.ev_signal_pair[1]);
+	base->sig.ev_signal_pair[1] = -1;
+	base->sig.sh_old_max = 0;
+
+	/* per index frees are handled in evsignal_del() */
+	free(base->sig.sh_old);
+}
diff --git a/libevent/strlcpy-internal.h b/libevent/strlcpy-internal.h
new file mode 100644
index 00000000000..22b5f61d45e
--- /dev/null
+++ b/libevent/strlcpy-internal.h
@@ -0,0 +1,23 @@
+#ifndef _STRLCPY_INTERNAL_H_
+#define _STRLCPY_INTERNAL_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#ifndef HAVE_STRLCPY
+#include <string.h>
+size_t _event_strlcpy(char *dst, const char *src, size_t siz);
+#define strlcpy _event_strlcpy
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/libevent/strlcpy.c b/libevent/strlcpy.c
new file mode 100644
index 00000000000..5d194527c8c
--- /dev/null
+++ b/libevent/strlcpy.c
@@ -0,0 +1,76 @@
+/*	$OpenBSD: strlcpy.c,v 1.5 2001/05/13 15:40:16 deraadt Exp $	*/
+
+/*
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char *rcsid = "$OpenBSD: strlcpy.c,v 1.5 2001/05/13 15:40:16 deraadt Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#ifndef HAVE_STRLCPY
+#include "strlcpy-internal.h"
+
+/*
+ * Copy src to string dst of size siz.  At most siz-1 characters
+ * will be copied.  Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t
+_event_strlcpy(dst, src, siz)
+	char *dst;
+	const char *src;
+	size_t siz;
+{
+	register char *d = dst;
+	register const char *s = src;
+	register size_t n = siz;
+
+	/* Copy as many bytes as will fit */
+	if (n != 0 && --n != 0) {
+		do {
+			if ((*d++ = *s++) == 0)
+				break;
+		} while (--n != 0);
+	}
+
+	/* Not enough room in dst, add NUL and traverse rest of src */
+	if (n == 0) {
+		if (siz != 0)
+			*d = '\0';		/* NUL-terminate dst */
+		while (*s++)
+			;
+	}
+
+	return(s - src - 1);	/* count does not include NUL */
+}
+#endif
diff --git a/libevent/test/Makefile.am b/libevent/test/Makefile.am
new file mode 100644
index 00000000000..3558d02fd5a
--- /dev/null
+++ b/libevent/test/Makefile.am
@@ -0,0 +1,35 @@
+AUTOMAKE_OPTIONS = foreign no-dependencies
+
+AM_CFLAGS = -I$(top_srcdir) -I$(top_srcdir)/compat
+
+EXTRA_DIST = regress.rpc regress.gen.h regress.gen.c
+
+noinst_PROGRAMS = test-init test-eof test-weof test-time regress bench
+
+BUILT_SOURCES = regress.gen.c regress.gen.h
+test_init_SOURCES = test-init.c
+test_init_LDADD = ../libevent_core.la
+test_eof_SOURCES = test-eof.c
+test_eof_LDADD = ../libevent_core.la
+test_weof_SOURCES = test-weof.c
+test_weof_LDADD = ../libevent_core.la
+test_time_SOURCES = test-time.c
+test_time_LDADD = ../libevent_core.la
+regress_SOURCES = regress.c regress.h regress_http.c regress_dns.c \
+	regress_rpc.c \
+	regress.gen.c regress.gen.h
+regress_LDADD = ../libevent.la
+bench_SOURCES = bench.c
+bench_LDADD = ../libevent.la
+
+regress.gen.c regress.gen.h: regress.rpc $(top_srcdir)/event_rpcgen.py
+	$(top_srcdir)/event_rpcgen.py $(srcdir)/regress.rpc || echo "No Python installed"
+
+DISTCLEANFILES = *~
+
+test: test-init test-eof test-weof test-time regress
+
+verify: test
+	@$(srcdir)/test.sh
+
+bench test-init test-eof test-weof test-time: ../libevent.la
diff --git a/libevent/test/bench.c b/libevent/test/bench.c
new file mode 100644
index 00000000000..c976932fa80
--- /dev/null
+++ b/libevent/test/bench.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2003 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Mon 03/10/2003 - Modified by Davide Libenzi <davidel@xmailserver.org>
+ *
+ *     Added chain event propagation to improve the sensitivity of
+ *     the measure respect to the event loop efficency.
+ *
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#ifdef WIN32
+#include <windows.h>
+#else
+#include <sys/socket.h>
+#include <signal.h>
+#include <sys/resource.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <event.h>
+#include <evutil.h>
+
+
+static int count, writes, fired;
+static int *pipes;
+static int num_pipes, num_active, num_writes;
+static struct event *events;
+
+static void
+read_cb(int fd, short which, void *arg)
+{
+	long idx = (long) arg, widx = idx + 1;
+	u_char ch;
+
+	count += read(fd, &ch, sizeof(ch));
+	if (writes) {
+		if (widx >= num_pipes)
+			widx -= num_pipes;
+		write(pipes[2 * widx + 1], "e", 1);
+		writes--;
+		fired++;
+	}
+}
+
+static struct timeval *
+run_once(void)
+{
+	int *cp, space;
+	long i;
+	static struct timeval ts, te;
+
+	for (cp = pipes, i = 0; i < num_pipes; i++, cp += 2) {
+		event_del(&events[i]);
+		event_set(&events[i], cp[0], EV_READ | EV_PERSIST, read_cb, (void *) i);
+		event_add(&events[i], NULL);
+	}
+
+	event_loop(EVLOOP_ONCE | EVLOOP_NONBLOCK);
+
+	fired = 0;
+	space = num_pipes / num_active;
+	space = space * 2;
+	for (i = 0; i < num_active; i++, fired++)
+		write(pipes[i * space + 1], "e", 1);
+
+	count = 0;
+	writes = num_writes;
+	{ int xcount = 0;
+	gettimeofday(&ts, NULL);
+	do {
+		event_loop(EVLOOP_ONCE | EVLOOP_NONBLOCK);
+		xcount++;
+	} while (count != fired);
+	gettimeofday(&te, NULL);
+
+	if (xcount != count) fprintf(stderr, "Xcount: %d, Rcount: %d\n", xcount, count);
+	}
+
+	evutil_timersub(&te, &ts, &te);
+
+	return (&te);
+}
+
+int
+main (int argc, char **argv)
+{
+#ifndef WIN32
+	struct rlimit rl;
+#endif
+	int i, c;
+	struct timeval *tv;
+	int *cp;
+
+	num_pipes = 100;
+	num_active = 1;
+	num_writes = num_pipes;
+	while ((c = getopt(argc, argv, "n:a:w:")) != -1) {
+		switch (c) {
+		case 'n':
+			num_pipes = atoi(optarg);
+			break;
+		case 'a':
+			num_active = atoi(optarg);
+			break;
+		case 'w':
+			num_writes = atoi(optarg);
+			break;
+		default:
+			fprintf(stderr, "Illegal argument \"%c\"\n", c);
+			exit(1);
+		}
+	}
+
+#ifndef WIN32
+	rl.rlim_cur = rl.rlim_max = num_pipes * 2 + 50;
+	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
+		perror("setrlimit");
+		exit(1);
+	}
+#endif
+
+	events = calloc(num_pipes, sizeof(struct event));
+	pipes = calloc(num_pipes * 2, sizeof(int));
+	if (events == NULL || pipes == NULL) {
+		perror("malloc");
+		exit(1);
+	}
+
+	event_init();
+
+	for (cp = pipes, i = 0; i < num_pipes; i++, cp += 2) {
+#ifdef USE_PIPES
+		if (pipe(cp) == -1) {
+#else
+		if (evutil_socketpair(AF_UNIX, SOCK_STREAM, 0, cp) == -1) {
+#endif
+			perror("pipe");
+			exit(1);
+		}
+	}
+
+	for (i = 0; i < 25; i++) {
+		tv = run_once();
+		if (tv == NULL)
+			exit(1);
+		fprintf(stdout, "%ld\n",
+			tv->tv_sec * 1000000L + tv->tv_usec);
+	}
+
+	exit(0);
+}
diff --git a/libevent/test/regress.c b/libevent/test/regress.c
new file mode 100644
index 00000000000..0b7517d3aa4
--- /dev/null
+++ b/libevent/test/regress.c
@@ -0,0 +1,1703 @@
+/*
+ * Copyright (c) 2003, 2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef WIN32
+#include <winsock2.h>
+#include <windows.h>
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <sys/queue.h>
+#ifndef WIN32
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <unistd.h>
+#include <netdb.h>
+#endif
+#include <assert.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include "event.h"
+#include "evutil.h"
+#include "event-internal.h"
+#include "log.h"
+
+#include "regress.h"
+#ifndef WIN32
+#include "regress.gen.h"
+#endif
+
+int pair[2];
+int test_ok;
+static int called;
+static char wbuf[4096];
+static char rbuf[4096];
+static int woff;
+static int roff;
+static int usepersist;
+static struct timeval tset;
+static struct timeval tcalled;
+static struct event_base *global_base;
+
+#define TEST1	"this is a test"
+#define SECONDS	1
+
+#ifndef SHUT_WR
+#define SHUT_WR 1
+#endif
+
+#ifdef WIN32
+#define write(fd,buf,len) send((fd),(buf),(len),0)
+#define read(fd,buf,len) recv((fd),(buf),(len),0)
+#endif
+
+static void
+simple_read_cb(int fd, short event, void *arg)
+{
+	char buf[256];
+	int len;
+
+	if (arg == NULL)
+		return;
+
+	len = read(fd, buf, sizeof(buf));
+
+	if (len) {
+		if (!called) {
+			if (event_add(arg, NULL) == -1)
+				exit(1);
+		}
+	} else if (called == 1)
+		test_ok = 1;
+
+	called++;
+}
+
+static void
+simple_write_cb(int fd, short event, void *arg)
+{
+	int len;
+
+	if (arg == NULL)
+		return;
+
+	len = write(fd, TEST1, strlen(TEST1) + 1);
+	if (len == -1)
+		test_ok = 0;
+	else
+		test_ok = 1;
+}
+
+static void
+multiple_write_cb(int fd, short event, void *arg)
+{
+	struct event *ev = arg;
+	int len;
+
+	len = 128;
+	if (woff + len >= sizeof(wbuf))
+		len = sizeof(wbuf) - woff;
+
+	len = write(fd, wbuf + woff, len);
+	if (len == -1) {
+		fprintf(stderr, "%s: write\n", __func__);
+		if (usepersist)
+			event_del(ev);
+		return;
+	}
+
+	woff += len;
+
+	if (woff >= sizeof(wbuf)) {
+		shutdown(fd, SHUT_WR);
+		if (usepersist)
+			event_del(ev);
+		return;
+	}
+
+	if (!usepersist) {
+		if (event_add(ev, NULL) == -1)
+			exit(1);
+	}
+}
+
+static void
+multiple_read_cb(int fd, short event, void *arg)
+{
+	struct event *ev = arg;
+	int len;
+
+	len = read(fd, rbuf + roff, sizeof(rbuf) - roff);
+	if (len == -1)
+		fprintf(stderr, "%s: read\n", __func__);
+	if (len <= 0) {
+		if (usepersist)
+			event_del(ev);
+		return;
+	}
+
+	roff += len;
+	if (!usepersist) {
+		if (event_add(ev, NULL) == -1) 
+			exit(1);
+	}
+}
+
+static void
+timeout_cb(int fd, short event, void *arg)
+{
+	struct timeval tv;
+	int diff;
+
+	evutil_gettimeofday(&tcalled, NULL);
+	if (evutil_timercmp(&tcalled, &tset, >))
+		evutil_timersub(&tcalled, &tset, &tv);
+	else
+		evutil_timersub(&tset, &tcalled, &tv);
+
+	diff = tv.tv_sec*1000 + tv.tv_usec/1000 - SECONDS * 1000;
+	if (diff < 0)
+		diff = -diff;
+
+	if (diff < 100)
+		test_ok = 1;
+}
+
+#ifndef WIN32
+static void
+signal_cb_sa(int sig)
+{
+	test_ok = 2;
+}
+
+static void
+signal_cb(int fd, short event, void *arg)
+{
+	struct event *ev = arg;
+
+	signal_del(ev);
+	test_ok = 1;
+}
+#endif
+
+struct both {
+	struct event ev;
+	int nread;
+};
+
+static void
+combined_read_cb(int fd, short event, void *arg)
+{
+	struct both *both = arg;
+	char buf[128];
+	int len;
+
+	len = read(fd, buf, sizeof(buf));
+	if (len == -1)
+		fprintf(stderr, "%s: read\n", __func__);
+	if (len <= 0)
+		return;
+
+	both->nread += len;
+	if (event_add(&both->ev, NULL) == -1)
+		exit(1);
+}
+
+static void
+combined_write_cb(int fd, short event, void *arg)
+{
+	struct both *both = arg;
+	char buf[128];
+	int len;
+
+	len = sizeof(buf);
+	if (len > both->nread)
+		len = both->nread;
+
+	len = write(fd, buf, len);
+	if (len == -1)
+		fprintf(stderr, "%s: write\n", __func__);
+	if (len <= 0) {
+		shutdown(fd, SHUT_WR);
+		return;
+	}
+
+	both->nread -= len;
+	if (event_add(&both->ev, NULL) == -1)
+		exit(1);
+}
+
+/* Test infrastructure */
+
+static int
+setup_test(const char *name)
+{
+
+	fprintf(stdout, "%s", name);
+
+	if (evutil_socketpair(AF_UNIX, SOCK_STREAM, 0, pair) == -1) {
+		fprintf(stderr, "%s: socketpair\n", __func__);
+		exit(1);
+	}
+
+#ifdef HAVE_FCNTL
+        if (fcntl(pair[0], F_SETFL, O_NONBLOCK) == -1)
+		fprintf(stderr, "fcntl(O_NONBLOCK)");
+
+        if (fcntl(pair[1], F_SETFL, O_NONBLOCK) == -1)
+		fprintf(stderr, "fcntl(O_NONBLOCK)");
+#endif
+
+	test_ok = 0;
+	called = 0;
+	return (0);
+}
+
+static int
+cleanup_test(void)
+{
+#ifndef WIN32
+	close(pair[0]);
+	close(pair[1]);
+#else
+	CloseHandle((HANDLE)pair[0]);
+	CloseHandle((HANDLE)pair[1]);
+#endif
+	if (test_ok)
+		fprintf(stdout, "OK\n");
+	else {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+        test_ok = 0;
+	return (0);
+}
+
+static void
+test_registerfds(void)
+{
+	int i, j;
+	int pair[2];
+	struct event read_evs[512];
+	struct event write_evs[512];
+
+	struct event_base *base = event_base_new();
+
+	fprintf(stdout, "Testing register fds: ");
+
+	for (i = 0; i < 512; ++i) {
+		if (evutil_socketpair(AF_UNIX, SOCK_STREAM, 0, pair) == -1) {
+			/* run up to the limit of file descriptors */
+			break;
+		}
+		event_set(&read_evs[i], pair[0],
+		    EV_READ|EV_PERSIST, simple_read_cb, NULL);
+		event_base_set(base, &read_evs[i]);
+		event_add(&read_evs[i], NULL);
+		event_set(&write_evs[i], pair[1],
+		    EV_WRITE|EV_PERSIST, simple_write_cb, NULL);
+		event_base_set(base, &write_evs[i]);
+		event_add(&write_evs[i], NULL);
+
+		/* just loop once */
+		event_base_loop(base, EVLOOP_ONCE);
+	}
+
+	/* now delete everything */
+	for (j = 0; j < i; ++j) {
+		event_del(&read_evs[j]);
+		event_del(&write_evs[j]);
+#ifndef WIN32
+		close(read_evs[j].ev_fd);
+		close(write_evs[j].ev_fd);
+#else
+		CloseHandle((HANDLE)read_evs[j].ev_fd);
+		CloseHandle((HANDLE)write_evs[j].ev_fd);
+#endif
+
+		/* just loop once */
+		event_base_loop(base, EVLOOP_ONCE);
+	}
+
+	event_base_free(base);
+
+	fprintf(stdout, "OK\n");
+}
+
+static void
+test_simpleread(void)
+{
+	struct event ev;
+
+	/* Very simple read test */
+	setup_test("Simple read: ");
+	
+	write(pair[0], TEST1, strlen(TEST1)+1);
+	shutdown(pair[0], SHUT_WR);
+
+	event_set(&ev, pair[1], EV_READ, simple_read_cb, &ev);
+	if (event_add(&ev, NULL) == -1)
+		exit(1);
+	event_dispatch();
+
+	cleanup_test();
+}
+
+static void
+test_simplewrite(void)
+{
+	struct event ev;
+
+	/* Very simple write test */
+	setup_test("Simple write: ");
+	
+	event_set(&ev, pair[0], EV_WRITE, simple_write_cb, &ev);
+	if (event_add(&ev, NULL) == -1)
+		exit(1);
+	event_dispatch();
+
+	cleanup_test();
+}
+
+static void
+test_multiple(void)
+{
+	struct event ev, ev2;
+	int i;
+
+	/* Multiple read and write test */
+	setup_test("Multiple read/write: ");
+	memset(rbuf, 0, sizeof(rbuf));
+	for (i = 0; i < sizeof(wbuf); i++)
+		wbuf[i] = i;
+
+	roff = woff = 0;
+	usepersist = 0;
+
+	event_set(&ev, pair[0], EV_WRITE, multiple_write_cb, &ev);
+	if (event_add(&ev, NULL) == -1)
+		exit(1);
+	event_set(&ev2, pair[1], EV_READ, multiple_read_cb, &ev2);
+	if (event_add(&ev2, NULL) == -1)
+		exit(1);
+	event_dispatch();
+
+	if (roff == woff)
+		test_ok = memcmp(rbuf, wbuf, sizeof(wbuf)) == 0;
+
+	cleanup_test();
+}
+
+static void
+test_persistent(void)
+{
+	struct event ev, ev2;
+	int i;
+
+	/* Multiple read and write test with persist */
+	setup_test("Persist read/write: ");
+	memset(rbuf, 0, sizeof(rbuf));
+	for (i = 0; i < sizeof(wbuf); i++)
+		wbuf[i] = i;
+
+	roff = woff = 0;
+	usepersist = 1;
+
+	event_set(&ev, pair[0], EV_WRITE|EV_PERSIST, multiple_write_cb, &ev);
+	if (event_add(&ev, NULL) == -1)
+		exit(1);
+	event_set(&ev2, pair[1], EV_READ|EV_PERSIST, multiple_read_cb, &ev2);
+	if (event_add(&ev2, NULL) == -1)
+		exit(1);
+	event_dispatch();
+
+	if (roff == woff)
+		test_ok = memcmp(rbuf, wbuf, sizeof(wbuf)) == 0;
+
+	cleanup_test();
+}
+
+static void
+test_combined(void)
+{
+	struct both r1, r2, w1, w2;
+
+	setup_test("Combined read/write: ");
+	memset(&r1, 0, sizeof(r1));
+	memset(&r2, 0, sizeof(r2));
+	memset(&w1, 0, sizeof(w1));
+	memset(&w2, 0, sizeof(w2));
+
+	w1.nread = 4096;
+	w2.nread = 8192;
+
+	event_set(&r1.ev, pair[0], EV_READ, combined_read_cb, &r1);
+	event_set(&w1.ev, pair[0], EV_WRITE, combined_write_cb, &w1);
+	event_set(&r2.ev, pair[1], EV_READ, combined_read_cb, &r2);
+	event_set(&w2.ev, pair[1], EV_WRITE, combined_write_cb, &w2);
+	if (event_add(&r1.ev, NULL) == -1)
+		exit(1);
+	if (event_add(&w1.ev, NULL))
+		exit(1);
+	if (event_add(&r2.ev, NULL))
+		exit(1);
+	if (event_add(&w2.ev, NULL))
+		exit(1);
+
+	event_dispatch();
+
+	if (r1.nread == 8192 && r2.nread == 4096)
+		test_ok = 1;
+
+	cleanup_test();
+}
+
+static void
+test_simpletimeout(void)
+{
+	struct timeval tv;
+	struct event ev;
+
+	setup_test("Simple timeout: ");
+
+	tv.tv_usec = 0;
+	tv.tv_sec = SECONDS;
+	evtimer_set(&ev, timeout_cb, NULL);
+	evtimer_add(&ev, &tv);
+
+	evutil_gettimeofday(&tset, NULL);
+	event_dispatch();
+
+	cleanup_test();
+}
+
+#ifndef WIN32
+extern struct event_base *current_base;
+
+static void
+child_signal_cb(int fd, short event, void *arg)
+{
+	struct timeval tv;
+	int *pint = arg;
+
+	*pint = 1;
+
+	tv.tv_usec = 500000;
+	tv.tv_sec = 0;
+	event_loopexit(&tv);
+}
+
+static void
+test_fork(void)
+{
+	int status, got_sigchld = 0;
+	struct event ev, sig_ev;
+	pid_t pid;
+
+	setup_test("After fork: ");
+
+	write(pair[0], TEST1, strlen(TEST1)+1);
+
+	event_set(&ev, pair[1], EV_READ, simple_read_cb, &ev);
+	if (event_add(&ev, NULL) == -1)
+		exit(1);
+
+	signal_set(&sig_ev, SIGCHLD, child_signal_cb, &got_sigchld);
+	signal_add(&sig_ev, NULL);
+
+	if ((pid = fork()) == 0) {
+		/* in the child */
+		if (event_reinit(current_base) == -1) {
+			fprintf(stderr, "FAILED (reinit)\n");
+			exit(1);
+		}
+
+		signal_del(&sig_ev);
+
+		called = 0;
+
+		event_dispatch();
+
+		/* we do not send an EOF; simple_read_cb requires an EOF 
+		 * to set test_ok.  we just verify that the callback was
+		 * called. */
+		exit(test_ok != 0 || called != 2 ? -2 : 76);
+	}
+
+	/* wait for the child to read the data */
+	sleep(1);
+
+	write(pair[0], TEST1, strlen(TEST1)+1);
+
+	if (waitpid(pid, &status, 0) == -1) {
+		fprintf(stderr, "FAILED (fork)\n");
+		exit(1);
+	}
+	
+	if (WEXITSTATUS(status) != 76) {
+		fprintf(stderr, "FAILED (exit): %d\n", WEXITSTATUS(status));
+		exit(1);
+	}
+
+	/* test that the current event loop still works */
+	write(pair[0], TEST1, strlen(TEST1)+1);
+	shutdown(pair[0], SHUT_WR);
+
+	event_dispatch();
+
+	if (!got_sigchld) {
+		fprintf(stdout, "FAILED (sigchld)\n");
+		exit(1);
+	}
+
+	signal_del(&sig_ev);
+
+	cleanup_test();
+}
+
+static void
+test_simplesignal(void)
+{
+	struct event ev;
+	struct itimerval itv;
+
+	setup_test("Simple signal: ");
+	signal_set(&ev, SIGALRM, signal_cb, &ev);
+	signal_add(&ev, NULL);
+	/* find bugs in which operations are re-ordered */
+	signal_del(&ev);
+	signal_add(&ev, NULL);
+
+	memset(&itv, 0, sizeof(itv));
+	itv.it_value.tv_sec = 1;
+	if (setitimer(ITIMER_REAL, &itv, NULL) == -1)
+		goto skip_simplesignal;
+
+	event_dispatch();
+ skip_simplesignal:
+	if (signal_del(&ev) == -1)
+		test_ok = 0;
+
+	cleanup_test();
+}
+
+static void
+test_multiplesignal(void)
+{
+	struct event ev_one, ev_two;
+	struct itimerval itv;
+
+	setup_test("Multiple signal: ");
+
+	signal_set(&ev_one, SIGALRM, signal_cb, &ev_one);
+	signal_add(&ev_one, NULL);
+
+	signal_set(&ev_two, SIGALRM, signal_cb, &ev_two);
+	signal_add(&ev_two, NULL);
+
+	memset(&itv, 0, sizeof(itv));
+	itv.it_value.tv_sec = 1;
+	if (setitimer(ITIMER_REAL, &itv, NULL) == -1)
+		goto skip_simplesignal;
+
+	event_dispatch();
+
+ skip_simplesignal:
+	if (signal_del(&ev_one) == -1)
+		test_ok = 0;
+	if (signal_del(&ev_two) == -1)
+		test_ok = 0;
+
+	cleanup_test();
+}
+
+static void
+test_immediatesignal(void)
+{
+	struct event ev;
+
+	test_ok = 0;
+	printf("Immediate signal: ");
+	signal_set(&ev, SIGUSR1, signal_cb, &ev);
+	signal_add(&ev, NULL);
+	raise(SIGUSR1);
+	event_loop(EVLOOP_NONBLOCK);
+	signal_del(&ev);
+	cleanup_test();
+}
+
+static void
+test_signal_dealloc(void)
+{
+	/* make sure that signal_event is event_del'ed and pipe closed */
+	struct event ev;
+	struct event_base *base = event_init();
+	printf("Signal dealloc: ");
+	signal_set(&ev, SIGUSR1, signal_cb, &ev);
+	signal_add(&ev, NULL);
+	signal_del(&ev);
+	event_base_free(base);
+        /* If we got here without asserting, we're fine. */
+        test_ok = 1;
+	cleanup_test();
+}
+
+static void
+test_signal_pipeloss(void)
+{
+	/* make sure that the base1 pipe is closed correctly. */
+	struct event_base *base1, *base2;
+	int pipe1;
+	test_ok = 0;
+	printf("Signal pipeloss: ");
+	base1 = event_init();
+	pipe1 = base1->sig.ev_signal_pair[0];
+	base2 = event_init();
+	event_base_free(base2);
+	event_base_free(base1);
+	if (close(pipe1) != -1 || errno!=EBADF) {
+		/* fd must be closed, so second close gives -1, EBADF */
+		printf("signal pipe not closed. ");
+		test_ok = 0;
+	} else {
+		test_ok = 1;
+	}
+	cleanup_test();
+}
+
+/*
+ * make two bases to catch signals, use both of them.  this only works
+ * for event mechanisms that use our signal pipe trick.  kqueue handles
+ * signals internally, and all interested kqueues get all the signals.
+ */
+static void
+test_signal_switchbase(void)
+{
+	struct event ev1, ev2;
+	struct event_base *base1, *base2;
+        int is_kqueue;
+	test_ok = 0;
+	printf("Signal switchbase: ");
+	base1 = event_init();
+	base2 = event_init();
+        is_kqueue = !strcmp(event_get_method(),"kqueue");
+	signal_set(&ev1, SIGUSR1, signal_cb, &ev1);
+	signal_set(&ev2, SIGUSR1, signal_cb, &ev2);
+	if (event_base_set(base1, &ev1) ||
+	    event_base_set(base2, &ev2) ||
+	    event_add(&ev1, NULL) ||
+	    event_add(&ev2, NULL)) {
+		fprintf(stderr, "%s: cannot set base, add\n", __func__);
+		exit(1);
+	}
+
+	test_ok = 0;
+	/* can handle signal before loop is called */
+	raise(SIGUSR1);
+	event_base_loop(base2, EVLOOP_NONBLOCK);
+        if (is_kqueue) {
+                if (!test_ok)
+                        goto done;
+                test_ok = 0;
+        }
+	event_base_loop(base1, EVLOOP_NONBLOCK);
+	if (test_ok && !is_kqueue) {
+		test_ok = 0;
+
+		/* set base1 to handle signals */
+		event_base_loop(base1, EVLOOP_NONBLOCK);
+		raise(SIGUSR1);
+		event_base_loop(base1, EVLOOP_NONBLOCK);
+		event_base_loop(base2, EVLOOP_NONBLOCK);
+	}
+ done:
+	event_base_free(base1);
+	event_base_free(base2);
+	cleanup_test();
+}
+
+/*
+ * assert that a signal event removed from the event queue really is
+ * removed - with no possibility of it's parent handler being fired.
+ */
+static void
+test_signal_assert(void)
+{
+	struct event ev;
+	struct event_base *base = event_init();
+	test_ok = 0;
+	printf("Signal handler assert: ");
+	/* use SIGCONT so we don't kill ourselves when we signal to nowhere */
+	signal_set(&ev, SIGCONT, signal_cb, &ev);
+	signal_add(&ev, NULL);
+	/*
+	 * if signal_del() fails to reset the handler, it's current handler
+	 * will still point to evsignal_handler().
+	 */
+	signal_del(&ev);
+
+	raise(SIGCONT);
+	/* only way to verify we were in evsignal_handler() */
+	if (base->sig.evsignal_caught)
+		test_ok = 0;
+	else
+		test_ok = 1;
+
+	event_base_free(base);
+	cleanup_test();
+	return;
+}
+
+/*
+ * assert that we restore our previous signal handler properly.
+ */
+static void
+test_signal_restore(void)
+{
+	struct event ev;
+	struct event_base *base = event_init();
+#ifdef HAVE_SIGACTION
+	struct sigaction sa;
+#endif
+
+	test_ok = 0;
+	printf("Signal handler restore: ");
+#ifdef HAVE_SIGACTION
+	sa.sa_handler = signal_cb_sa;
+	sa.sa_flags = 0x0;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(SIGUSR1, &sa, NULL) == -1)
+		goto out;
+#else
+	if (signal(SIGUSR1, signal_cb_sa) == SIG_ERR)
+		goto out;
+#endif
+	signal_set(&ev, SIGUSR1, signal_cb, &ev);
+	signal_add(&ev, NULL);
+	signal_del(&ev);
+
+	raise(SIGUSR1);
+	/* 1 == signal_cb, 2 == signal_cb_sa, we want our previous handler */
+	if (test_ok != 2)
+		test_ok = 0;
+out:
+	event_base_free(base);
+	cleanup_test();
+	return;
+}
+
+static void
+signal_cb_swp(int sig, short event, void *arg)
+{
+	called++;
+	if (called < 5)
+		raise(sig);
+	else
+		event_loopexit(NULL);
+}
+static void
+timeout_cb_swp(int fd, short event, void *arg)
+{
+	if (called == -1) {
+		struct timeval tv = {5, 0};
+
+		called = 0;
+		evtimer_add((struct event *)arg, &tv);
+		raise(SIGUSR1);
+		return;
+	}
+	test_ok = 0;
+	event_loopexit(NULL);
+}
+
+static void
+test_signal_while_processing(void)
+{
+	struct event_base *base = event_init();
+	struct event ev, ev_timer;
+	struct timeval tv = {0, 0};
+
+	setup_test("Receiving a signal while processing other signal: ");
+
+	called = -1;
+	test_ok = 1;
+	signal_set(&ev, SIGUSR1, signal_cb_swp, NULL);
+	signal_add(&ev, NULL);
+	evtimer_set(&ev_timer, timeout_cb_swp, &ev_timer);
+	evtimer_add(&ev_timer, &tv);
+	event_dispatch();
+
+	event_base_free(base);
+	cleanup_test();
+	return;
+}
+#endif
+
+static void
+test_free_active_base(void)
+{
+	struct event_base *base1;
+	struct event ev1;
+	setup_test("Free active base: ");
+	base1 = event_init();
+	event_set(&ev1, pair[1], EV_READ, simple_read_cb, &ev1);
+	event_base_set(base1, &ev1);
+	event_add(&ev1, NULL);
+	/* event_del(&ev1); */
+	event_base_free(base1);
+	test_ok = 1;
+	cleanup_test();
+}
+
+static void
+test_event_base_new(void)
+{
+	struct event_base *base;
+	struct event ev1;
+	setup_test("Event base new: ");
+
+	write(pair[0], TEST1, strlen(TEST1)+1);
+	shutdown(pair[0], SHUT_WR);
+
+	base = event_base_new();
+	event_set(&ev1, pair[1], EV_READ, simple_read_cb, &ev1);
+	event_base_set(base, &ev1);
+	event_add(&ev1, NULL);
+
+	event_base_dispatch(base);
+
+	event_base_free(base);
+	test_ok = 1;
+	cleanup_test();
+}
+
+static void
+test_loopexit(void)
+{
+	struct timeval tv, tv_start, tv_end;
+	struct event ev;
+
+	setup_test("Loop exit: ");
+
+	tv.tv_usec = 0;
+	tv.tv_sec = 60*60*24;
+	evtimer_set(&ev, timeout_cb, NULL);
+	evtimer_add(&ev, &tv);
+
+	tv.tv_usec = 0;
+	tv.tv_sec = 1;
+	event_loopexit(&tv);
+
+	evutil_gettimeofday(&tv_start, NULL);
+	event_dispatch();
+	evutil_gettimeofday(&tv_end, NULL);
+	evutil_timersub(&tv_end, &tv_start, &tv_end);
+
+	evtimer_del(&ev);
+
+	if (tv.tv_sec < 2)
+		test_ok = 1;
+
+	cleanup_test();
+}
+
+static void
+test_loopexit_multiple(void)
+{
+	struct timeval tv;
+	struct event_base *base;
+
+	setup_test("Loop Multiple exit: ");
+
+	base = event_base_new();
+	
+	tv.tv_usec = 0;
+	tv.tv_sec = 1;
+	event_base_loopexit(base, &tv);
+
+	tv.tv_usec = 0;
+	tv.tv_sec = 2;
+	event_base_loopexit(base, &tv);
+
+	event_base_dispatch(base);
+
+	event_base_free(base);
+	
+	test_ok = 1;
+
+	cleanup_test();
+}
+
+static void
+break_cb(int fd, short events, void *arg)
+{
+	test_ok = 1;
+	event_loopbreak();
+}
+
+static void
+fail_cb(int fd, short events, void *arg)
+{
+	test_ok = 0;
+}
+
+static void
+test_loopbreak(void)
+{
+	struct event ev1, ev2;
+	struct timeval tv;
+
+	setup_test("Loop break: ");
+
+	tv.tv_sec = 0;
+	tv.tv_usec = 0;
+	evtimer_set(&ev1, break_cb, NULL);
+	evtimer_add(&ev1, &tv);
+	evtimer_set(&ev2, fail_cb, NULL);
+	evtimer_add(&ev2, &tv);
+
+	event_dispatch();
+
+	evtimer_del(&ev1);
+	evtimer_del(&ev2);
+
+	cleanup_test();
+}
+
+static void
+test_evbuffer(void) {
+
+	struct evbuffer *evb = evbuffer_new();
+	setup_test("Testing Evbuffer: ");
+
+	evbuffer_add_printf(evb, "%s/%d", "hello", 1);
+
+	if (EVBUFFER_LENGTH(evb) == 7 &&
+	    strcmp((char*)EVBUFFER_DATA(evb), "hello/1") == 0)
+	    test_ok = 1;
+	
+	evbuffer_free(evb);
+
+	cleanup_test();
+}
+
+static void
+test_evbuffer_find(void)
+{
+	u_char* p;
+	const char* test1 = "1234567890\r\n";
+	const char* test2 = "1234567890\r";
+#define EVBUFFER_INITIAL_LENGTH 256
+	char test3[EVBUFFER_INITIAL_LENGTH];
+	unsigned int i;
+	struct evbuffer * buf = evbuffer_new();
+
+	/* make sure evbuffer_find doesn't match past the end of the buffer */
+	fprintf(stdout, "Testing evbuffer_find 1: ");
+	evbuffer_add(buf, (u_char*)test1, strlen(test1));
+	evbuffer_drain(buf, strlen(test1));	  
+	evbuffer_add(buf, (u_char*)test2, strlen(test2));
+	p = evbuffer_find(buf, (u_char*)"\r\n", 2);
+	if (p == NULL) {
+		fprintf(stdout, "OK\n");
+	} else {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/*
+	 * drain the buffer and do another find; in r309 this would
+	 * read past the allocated buffer causing a valgrind error.
+	 */
+	fprintf(stdout, "Testing evbuffer_find 2: ");
+	evbuffer_drain(buf, strlen(test2));
+	for (i = 0; i < EVBUFFER_INITIAL_LENGTH; ++i)
+		test3[i] = 'a';
+	test3[EVBUFFER_INITIAL_LENGTH - 1] = 'x';
+	evbuffer_add(buf, (u_char *)test3, EVBUFFER_INITIAL_LENGTH);
+	p = evbuffer_find(buf, (u_char *)"xy", 2);
+	if (p == NULL) {
+		printf("OK\n");
+	} else {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/* simple test for match at end of allocated buffer */
+	fprintf(stdout, "Testing evbuffer_find 3: ");
+	p = evbuffer_find(buf, (u_char *)"ax", 2);
+	if (p != NULL && strncmp((char*)p, "ax", 2) == 0) {
+		printf("OK\n");
+	} else {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	evbuffer_free(buf);
+}
+
+/*
+ * simple bufferevent test
+ */
+
+static void
+readcb(struct bufferevent *bev, void *arg)
+{
+	if (EVBUFFER_LENGTH(bev->input) == 8333) {
+		bufferevent_disable(bev, EV_READ);
+		test_ok++;
+	}
+}
+
+static void
+writecb(struct bufferevent *bev, void *arg)
+{
+	if (EVBUFFER_LENGTH(bev->output) == 0)
+		test_ok++;
+}
+
+static void
+errorcb(struct bufferevent *bev, short what, void *arg)
+{
+	test_ok = -2;
+}
+
+static void
+test_bufferevent(void)
+{
+	struct bufferevent *bev1, *bev2;
+	char buffer[8333];
+	int i;
+
+	setup_test("Bufferevent: ");
+
+	bev1 = bufferevent_new(pair[0], readcb, writecb, errorcb, NULL);
+	bev2 = bufferevent_new(pair[1], readcb, writecb, errorcb, NULL);
+
+	bufferevent_disable(bev1, EV_READ);
+	bufferevent_enable(bev2, EV_READ);
+
+	for (i = 0; i < sizeof(buffer); i++)
+		buffer[i] = i;
+
+	bufferevent_write(bev1, buffer, sizeof(buffer));
+
+	event_dispatch();
+
+	bufferevent_free(bev1);
+	bufferevent_free(bev2);
+
+	if (test_ok != 2)
+		test_ok = 0;
+
+	cleanup_test();
+}
+
+/*
+ * test watermarks and bufferevent
+ */
+
+static void
+wm_readcb(struct bufferevent *bev, void *arg)
+{
+	int len = EVBUFFER_LENGTH(bev->input);
+	static int nread;
+
+	assert(len >= 10 && len <= 20);
+
+	evbuffer_drain(bev->input, len);
+
+	nread += len;
+	if (nread == 65000) {
+		bufferevent_disable(bev, EV_READ);
+		test_ok++;
+	}
+}
+
+static void
+wm_writecb(struct bufferevent *bev, void *arg)
+{
+	if (EVBUFFER_LENGTH(bev->output) == 0)
+		test_ok++;
+}
+
+static void
+wm_errorcb(struct bufferevent *bev, short what, void *arg)
+{
+	test_ok = -2;
+}
+
+static void
+test_bufferevent_watermarks(void)
+{
+	struct bufferevent *bev1, *bev2;
+	char buffer[65000];
+	int i;
+
+	setup_test("Bufferevent Watermarks: ");
+
+	bev1 = bufferevent_new(pair[0], NULL, wm_writecb, wm_errorcb, NULL);
+	bev2 = bufferevent_new(pair[1], wm_readcb, NULL, wm_errorcb, NULL);
+
+	bufferevent_disable(bev1, EV_READ);
+	bufferevent_enable(bev2, EV_READ);
+
+	for (i = 0; i < sizeof(buffer); i++)
+		buffer[i] = i;
+
+	bufferevent_write(bev1, buffer, sizeof(buffer));
+
+	/* limit the reading on the receiving bufferevent */
+	bufferevent_setwatermark(bev2, EV_READ, 10, 20);
+
+	event_dispatch();
+
+	bufferevent_free(bev1);
+	bufferevent_free(bev2);
+
+	if (test_ok != 2)
+		test_ok = 0;
+
+	cleanup_test();
+}
+
+struct test_pri_event {
+	struct event ev;
+	int count;
+};
+
+static void
+test_priorities_cb(int fd, short what, void *arg)
+{
+	struct test_pri_event *pri = arg;
+	struct timeval tv;
+
+	if (pri->count == 3) {
+		event_loopexit(NULL);
+		return;
+	}
+
+	pri->count++;
+
+	evutil_timerclear(&tv);
+	event_add(&pri->ev, &tv);
+}
+
+static void
+test_priorities(int npriorities)
+{
+	char buf[32];
+	struct test_pri_event one, two;
+	struct timeval tv;
+
+	evutil_snprintf(buf, sizeof(buf), "Testing Priorities %d: ", npriorities);
+	setup_test(buf);
+
+	event_base_priority_init(global_base, npriorities);
+
+	memset(&one, 0, sizeof(one));
+	memset(&two, 0, sizeof(two));
+
+	timeout_set(&one.ev, test_priorities_cb, &one);
+	if (event_priority_set(&one.ev, 0) == -1) {
+		fprintf(stderr, "%s: failed to set priority", __func__);
+		exit(1);
+	}
+
+	timeout_set(&two.ev, test_priorities_cb, &two);
+	if (event_priority_set(&two.ev, npriorities - 1) == -1) {
+		fprintf(stderr, "%s: failed to set priority", __func__);
+		exit(1);
+	}
+
+	evutil_timerclear(&tv);
+
+	if (event_add(&one.ev, &tv) == -1)
+		exit(1);
+	if (event_add(&two.ev, &tv) == -1)
+		exit(1);
+
+	event_dispatch();
+
+	event_del(&one.ev);
+	event_del(&two.ev);
+
+	if (npriorities == 1) {
+		if (one.count == 3 && two.count == 3)
+			test_ok = 1;
+	} else if (npriorities == 2) {
+		/* Two is called once because event_loopexit is priority 1 */
+		if (one.count == 3 && two.count == 1)
+			test_ok = 1;
+	} else {
+		if (one.count == 3 && two.count == 0)
+			test_ok = 1;
+	}
+
+	cleanup_test();
+}
+
+static void
+test_multiple_cb(int fd, short event, void *arg)
+{
+	if (event & EV_READ)
+		test_ok |= 1;
+	else if (event & EV_WRITE)
+		test_ok |= 2;
+}
+
+static void
+test_multiple_events_for_same_fd(void)
+{
+   struct event e1, e2;
+
+   setup_test("Multiple events for same fd: ");
+
+   event_set(&e1, pair[0], EV_READ, test_multiple_cb, NULL);
+   event_add(&e1, NULL);
+   event_set(&e2, pair[0], EV_WRITE, test_multiple_cb, NULL);
+   event_add(&e2, NULL);
+   event_loop(EVLOOP_ONCE);
+   event_del(&e2);
+   write(pair[1], TEST1, strlen(TEST1)+1);
+   event_loop(EVLOOP_ONCE);
+   event_del(&e1);
+   
+   if (test_ok != 3)
+	   test_ok = 0;
+
+   cleanup_test();
+}
+
+int evtag_decode_int(uint32_t *pnumber, struct evbuffer *evbuf);
+int evtag_encode_tag(struct evbuffer *evbuf, uint32_t number);
+int evtag_decode_tag(uint32_t *pnumber, struct evbuffer *evbuf);
+
+static void
+read_once_cb(int fd, short event, void *arg)
+{
+	char buf[256];
+	int len;
+
+	len = read(fd, buf, sizeof(buf));
+
+	if (called) {
+		test_ok = 0;
+	} else if (len) {
+		/* Assumes global pair[0] can be used for writing */
+		write(pair[0], TEST1, strlen(TEST1)+1);
+		test_ok = 1;
+	}
+
+	called++;
+}
+
+static void
+test_want_only_once(void)
+{
+	struct event ev;
+	struct timeval tv;
+
+	/* Very simple read test */
+	setup_test("Want read only once: ");
+	
+	write(pair[0], TEST1, strlen(TEST1)+1);
+
+	/* Setup the loop termination */
+	evutil_timerclear(&tv);
+	tv.tv_sec = 1;
+	event_loopexit(&tv);
+	
+	event_set(&ev, pair[1], EV_READ, read_once_cb, &ev);
+	if (event_add(&ev, NULL) == -1)
+		exit(1);
+	event_dispatch();
+
+	cleanup_test();
+}
+
+#define TEST_MAX_INT	6
+
+static void
+evtag_int_test(void)
+{
+	struct evbuffer *tmp = evbuffer_new();
+	uint32_t integers[TEST_MAX_INT] = {
+		0xaf0, 0x1000, 0x1, 0xdeadbeef, 0x00, 0xbef000
+	};
+	uint32_t integer;
+	int i;
+
+	for (i = 0; i < TEST_MAX_INT; i++) {
+		int oldlen, newlen;
+		oldlen = EVBUFFER_LENGTH(tmp);
+		encode_int(tmp, integers[i]);
+		newlen = EVBUFFER_LENGTH(tmp);
+		fprintf(stdout, "\t\tencoded 0x%08x with %d bytes\n",
+		    integers[i], newlen - oldlen);
+	}
+
+	for (i = 0; i < TEST_MAX_INT; i++) {
+		if (evtag_decode_int(&integer, tmp) == -1) {
+			fprintf(stderr, "decode %d failed", i);
+			exit(1);
+		}
+		if (integer != integers[i]) {
+			fprintf(stderr, "got %x, wanted %x",
+			    integer, integers[i]);
+			exit(1);
+		}
+	}
+
+	if (EVBUFFER_LENGTH(tmp) != 0) {
+		fprintf(stderr, "trailing data");
+		exit(1);
+	}
+	evbuffer_free(tmp);
+
+	fprintf(stdout, "\t%s: OK\n", __func__);
+}
+
+static void
+evtag_fuzz(void)
+{
+	u_char buffer[4096];
+	struct evbuffer *tmp = evbuffer_new();
+	struct timeval tv;
+	int i, j;
+
+	int not_failed = 0;
+	for (j = 0; j < 100; j++) {
+		for (i = 0; i < sizeof(buffer); i++)
+			buffer[i] = rand();
+		evbuffer_drain(tmp, -1);
+		evbuffer_add(tmp, buffer, sizeof(buffer));
+
+		if (evtag_unmarshal_timeval(tmp, 0, &tv) != -1)
+			not_failed++;
+	}
+
+	/* The majority of decodes should fail */
+	if (not_failed >= 10) {
+		fprintf(stderr, "evtag_unmarshal should have failed");
+		exit(1);
+	}
+
+	/* Now insert some corruption into the tag length field */
+	evbuffer_drain(tmp, -1);
+	evutil_timerclear(&tv);
+	tv.tv_sec = 1;
+	evtag_marshal_timeval(tmp, 0, &tv);
+	evbuffer_add(tmp, buffer, sizeof(buffer));
+
+	EVBUFFER_DATA(tmp)[1] = 0xff;
+	if (evtag_unmarshal_timeval(tmp, 0, &tv) != -1) {
+		fprintf(stderr, "evtag_unmarshal_timeval should have failed");
+		exit(1);
+	}
+
+	evbuffer_free(tmp);
+
+	fprintf(stdout, "\t%s: OK\n", __func__);
+}
+
+static void
+evtag_tag_encoding(void)
+{
+	struct evbuffer *tmp = evbuffer_new();
+	uint32_t integers[TEST_MAX_INT] = {
+		0xaf0, 0x1000, 0x1, 0xdeadbeef, 0x00, 0xbef000
+	};
+	uint32_t integer;
+	int i;
+
+	for (i = 0; i < TEST_MAX_INT; i++) {
+		int oldlen, newlen;
+		oldlen = EVBUFFER_LENGTH(tmp);
+		evtag_encode_tag(tmp, integers[i]);
+		newlen = EVBUFFER_LENGTH(tmp);
+		fprintf(stdout, "\t\tencoded 0x%08x with %d bytes\n",
+		    integers[i], newlen - oldlen);
+	}
+
+	for (i = 0; i < TEST_MAX_INT; i++) {
+		if (evtag_decode_tag(&integer, tmp) == -1) {
+			fprintf(stderr, "decode %d failed", i);
+			exit(1);
+		}
+		if (integer != integers[i]) {
+			fprintf(stderr, "got %x, wanted %x",
+			    integer, integers[i]);
+			exit(1);
+		}
+	}
+
+	if (EVBUFFER_LENGTH(tmp) != 0) {
+		fprintf(stderr, "trailing data");
+		exit(1);
+	}
+	evbuffer_free(tmp);
+
+	fprintf(stdout, "\t%s: OK\n", __func__);
+}
+
+static void
+evtag_test(void)
+{
+	fprintf(stdout, "Testing Tagging:\n");
+
+	evtag_init();
+	evtag_int_test();
+	evtag_fuzz();
+
+	evtag_tag_encoding();
+
+	fprintf(stdout, "OK\n");
+}
+
+#ifndef WIN32
+static void
+rpc_test(void)
+{
+	struct msg *msg, *msg2;
+	struct kill *attack;
+	struct run *run;
+	struct evbuffer *tmp = evbuffer_new();
+	struct timeval tv_start, tv_end;
+	uint32_t tag;
+	int i;
+
+	fprintf(stdout, "Testing RPC: ");
+
+	msg = msg_new();
+	EVTAG_ASSIGN(msg, from_name, "niels");
+	EVTAG_ASSIGN(msg, to_name, "phoenix");
+
+	if (EVTAG_GET(msg, attack, &attack) == -1) {
+		fprintf(stderr, "Failed to set kill message.\n");
+		exit(1);
+	}
+
+	EVTAG_ASSIGN(attack, weapon, "feather");
+	EVTAG_ASSIGN(attack, action, "tickle");
+
+	evutil_gettimeofday(&tv_start, NULL);
+	for (i = 0; i < 1000; ++i) {
+		run = EVTAG_ADD(msg, run);
+		if (run == NULL) {
+			fprintf(stderr, "Failed to add run message.\n");
+			exit(1);
+		}
+		EVTAG_ASSIGN(run, how, "very fast but with some data in it");
+		EVTAG_ASSIGN(run, fixed_bytes,
+		    (unsigned char*)"012345678901234567890123");
+	}
+
+	if (msg_complete(msg) == -1) {
+		fprintf(stderr, "Failed to make complete message.\n");
+		exit(1);
+	}
+
+	evtag_marshal_msg(tmp, 0xdeaf, msg);
+
+	if (evtag_peek(tmp, &tag) == -1) {
+		fprintf(stderr, "Failed to peak tag.\n");
+		exit (1);
+	}
+
+	if (tag != 0xdeaf) {
+		fprintf(stderr, "Got incorrect tag: %0x.\n", tag);
+		exit (1);
+	}
+
+	msg2 = msg_new();
+	if (evtag_unmarshal_msg(tmp, 0xdeaf, msg2) == -1) {
+		fprintf(stderr, "Failed to unmarshal message.\n");
+		exit(1);
+	}
+
+	evutil_gettimeofday(&tv_end, NULL);
+	evutil_timersub(&tv_end, &tv_start, &tv_end);
+	fprintf(stderr, "(%.1f us/add) ",
+	    (float)tv_end.tv_sec/(float)i * 1000000.0 +
+	    tv_end.tv_usec / (float)i);
+
+	if (!EVTAG_HAS(msg2, from_name) ||
+	    !EVTAG_HAS(msg2, to_name) ||
+	    !EVTAG_HAS(msg2, attack)) {
+		fprintf(stderr, "Missing data structures.\n");
+		exit(1);
+	}
+
+	if (EVTAG_LEN(msg2, run) != i) {
+		fprintf(stderr, "Wrong number of run messages.\n");
+		exit(1);
+	}
+
+	msg_free(msg);
+	msg_free(msg2);
+
+	evbuffer_free(tmp);
+
+	fprintf(stdout, "OK\n");
+}
+#endif
+
+static void
+test_evutil_strtoll(void)
+{
+        const char *s;
+        char *endptr;
+        setup_test("evutil_stroll: ");
+        test_ok = 0;
+
+        if (evutil_strtoll("5000000000", NULL, 10) != ((ev_int64_t)5000000)*1000)
+                goto err;
+        if (evutil_strtoll("-5000000000", NULL, 10) != ((ev_int64_t)5000000)*-1000)
+                goto err;
+        s = " 99999stuff";
+        if (evutil_strtoll(s, &endptr, 10) != (ev_int64_t)99999)
+                goto err;
+        if (endptr != s+6)
+                goto err;
+        if (evutil_strtoll("foo", NULL, 10) != 0)
+                goto err;
+
+        test_ok = 1;
+ err:
+        cleanup_test();
+}
+
+
+int
+main (int argc, char **argv)
+{
+#ifdef WIN32
+	WORD wVersionRequested;
+	WSADATA wsaData;
+	int	err;
+
+	wVersionRequested = MAKEWORD( 2, 2 );
+
+	err = WSAStartup( wVersionRequested, &wsaData );
+#endif
+
+#ifndef WIN32
+	if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
+		return (1);
+#endif
+	setvbuf(stdout, NULL, _IONBF, 0);
+
+	/* Initalize the event library */
+	global_base = event_init();
+
+	test_registerfds();
+
+        test_evutil_strtoll();
+
+	/* use the global event base and need to be called first */
+	test_priorities(1);
+	test_priorities(2);
+	test_priorities(3);
+
+	test_evbuffer();
+	test_evbuffer_find();
+	
+	test_bufferevent();
+	test_bufferevent_watermarks();
+
+	test_free_active_base();
+
+	test_event_base_new();
+
+	http_suite();
+
+#ifndef WIN32
+	rpc_suite();
+#endif
+
+	dns_suite();
+	
+#ifndef WIN32
+	test_fork();
+#endif
+
+	test_simpleread();
+
+	test_simplewrite();
+
+	test_multiple();
+
+	test_persistent();
+
+	test_combined();
+
+	test_simpletimeout();
+#ifndef WIN32
+	test_simplesignal();
+	test_multiplesignal();
+	test_immediatesignal();
+#endif
+	test_loopexit();
+	test_loopbreak();
+
+	test_loopexit_multiple();
+	
+	test_multiple_events_for_same_fd();
+
+	test_want_only_once();
+
+	evtag_test();
+
+#ifndef WIN32
+	rpc_test();
+
+	test_signal_dealloc();
+	test_signal_pipeloss();
+	test_signal_switchbase();
+	test_signal_restore();
+	test_signal_assert();
+	test_signal_while_processing();
+#endif
+	
+	return (0);
+}
+
diff --git a/libevent/test/regress.gen.c b/libevent/test/regress.gen.c
new file mode 100644
index 00000000000..ff31096a7c2
--- /dev/null
+++ b/libevent/test/regress.gen.c
@@ -0,0 +1,872 @@
+/*
+ * Automatically generated from ./regress.rpc
+ * by event_rpcgen.py/0.1.  DO NOT EDIT THIS FILE.
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <event.h>
+
+
+#include "./regress.gen.h"
+
+void event_err(int eval, const char *fmt, ...);
+void event_warn(const char *fmt, ...);
+void event_errx(int eval, const char *fmt, ...);
+void event_warnx(const char *fmt, ...);
+
+
+/*
+ * Implementation of msg
+ */
+
+static struct msg_access_ __msg_base = {
+  msg_from_name_assign,
+  msg_from_name_get,
+  msg_to_name_assign,
+  msg_to_name_get,
+  msg_attack_assign,
+  msg_attack_get,
+  msg_run_assign,
+  msg_run_get,
+  msg_run_add,
+};
+
+struct msg *
+msg_new(void)
+{
+  struct msg *tmp;
+  if ((tmp = malloc(sizeof(struct msg))) == NULL) {
+    event_warn("%s: malloc", __func__);
+    return (NULL);
+  }
+  tmp->base = &__msg_base;
+
+  tmp->from_name_data = NULL;
+  tmp->from_name_set = 0;
+
+  tmp->to_name_data = NULL;
+  tmp->to_name_set = 0;
+
+  tmp->attack_data = NULL;
+  tmp->attack_set = 0;
+
+  tmp->run_data = NULL;
+  tmp->run_length = 0;
+  tmp->run_num_allocated = 0;
+  tmp->run_set = 0;
+
+  return (tmp);
+}
+
+
+
+
+struct run *
+msg_run_add(struct msg *msg)
+{
+  if (++msg->run_length >= msg->run_num_allocated) {
+    int tobe_allocated = msg->run_num_allocated;
+    struct run ** new_data = NULL;
+    tobe_allocated = !tobe_allocated ? 1 : tobe_allocated << 1;
+    new_data = (struct run **) realloc(msg->run_data,
+        tobe_allocated * sizeof(struct run *));
+    if (new_data == NULL)
+      goto error;
+    msg->run_data = new_data;
+    msg->run_num_allocated = tobe_allocated;
+  }
+  msg->run_data[msg->run_length - 1] = run_new();
+  if (msg->run_data[msg->run_length - 1] == NULL)
+    goto error;
+  msg->run_set = 1;
+  return (msg->run_data[msg->run_length - 1]);
+error:
+  --msg->run_length;
+  return (NULL);
+}
+        
+
+int
+msg_from_name_assign(struct msg *msg,
+    const char * value)
+{
+  if (msg->from_name_data != NULL)
+    free(msg->from_name_data);
+  if ((msg->from_name_data = strdup(value)) == NULL)
+    return (-1);
+  msg->from_name_set = 1;
+  return (0);
+}
+
+int
+msg_to_name_assign(struct msg *msg,
+    const char * value)
+{
+  if (msg->to_name_data != NULL)
+    free(msg->to_name_data);
+  if ((msg->to_name_data = strdup(value)) == NULL)
+    return (-1);
+  msg->to_name_set = 1;
+  return (0);
+}
+
+int
+msg_attack_assign(struct msg *msg,
+    const struct kill* value)
+{
+   struct evbuffer *tmp = NULL;
+   if (msg->attack_set) {
+     kill_clear(msg->attack_data);
+     msg->attack_set = 0;
+   } else {
+     msg->attack_data = kill_new();
+     if (msg->attack_data == NULL) {
+       event_warn("%s: kill_new()", __func__);
+       goto error;
+     }
+   }
+   if ((tmp = evbuffer_new()) == NULL) {
+     event_warn("%s: evbuffer_new()", __func__);
+     goto error;
+   }
+   kill_marshal(tmp, value);
+   if (kill_unmarshal(msg->attack_data, tmp) == -1) {
+     event_warnx("%s: kill_unmarshal", __func__);
+     goto error;
+   }
+   msg->attack_set = 1;
+   evbuffer_free(tmp);
+   return (0);
+ error:
+   if (tmp != NULL)
+     evbuffer_free(tmp);
+   if (msg->attack_data != NULL) {
+     kill_free(msg->attack_data);
+     msg->attack_data = NULL;
+   }
+   return (-1);
+}
+
+int
+msg_run_assign(struct msg *msg, int off,
+    const struct run * value)
+{
+  struct evbuffer *tmp = NULL;
+  if (!msg->run_set || off < 0 || off >= msg->run_length)
+    return (-1);
+  run_clear(msg->run_data[off]);
+  if ((tmp = evbuffer_new()) == NULL) {
+    event_warn("%s: evbuffer_new()", __func__);
+    goto error;
+  }
+  run_marshal(tmp, value);
+  if (run_unmarshal(msg->run_data[off], tmp) == -1) {
+    event_warnx("%s: run_unmarshal", __func__);
+    goto error;
+  }
+  evbuffer_free(tmp);
+  return (0);
+error:
+  if (tmp != NULL)
+    evbuffer_free(tmp);
+  run_clear(msg->run_data[off]);
+  return (-1);
+}
+
+int
+msg_from_name_get(struct msg *msg, char * *value)
+{
+  if (msg->from_name_set != 1)
+    return (-1);
+  *value = msg->from_name_data;
+  return (0);
+}
+
+int
+msg_to_name_get(struct msg *msg, char * *value)
+{
+  if (msg->to_name_set != 1)
+    return (-1);
+  *value = msg->to_name_data;
+  return (0);
+}
+
+int
+msg_attack_get(struct msg *msg, struct kill* *value)
+{
+  if (msg->attack_set != 1) {
+    msg->attack_data = kill_new();
+    if (msg->attack_data == NULL)
+      return (-1);
+    msg->attack_set = 1;
+  }
+  *value = msg->attack_data;
+  return (0);
+}
+
+int
+msg_run_get(struct msg *msg, int offset,
+    struct run * *value)
+{
+  if (!msg->run_set || offset < 0 || offset >= msg->run_length)
+    return (-1);
+  *value = msg->run_data[offset];
+  return (0);
+}
+
+void
+msg_clear(struct msg *tmp)
+{
+  if (tmp->from_name_set == 1) {
+    free (tmp->from_name_data);
+    tmp->from_name_data = NULL;
+    tmp->from_name_set = 0;
+  }
+  if (tmp->to_name_set == 1) {
+    free (tmp->to_name_data);
+    tmp->to_name_data = NULL;
+    tmp->to_name_set = 0;
+  }
+  if (tmp->attack_set == 1) {
+    kill_free(tmp->attack_data);
+    tmp->attack_data = NULL;
+    tmp->attack_set = 0;
+  }
+  if (tmp->run_set == 1) {
+    int i;
+    for (i = 0; i < tmp->run_length; ++i) {
+      run_free(tmp->run_data[i]);
+    }
+    free(tmp->run_data);
+    tmp->run_data = NULL;
+    tmp->run_set = 0;
+    tmp->run_length = 0;
+    tmp->run_num_allocated = 0;
+  }
+}
+
+void
+msg_free(struct msg *tmp)
+{
+  if (tmp->from_name_data != NULL)
+      free (tmp->from_name_data); 
+  if (tmp->to_name_data != NULL)
+      free (tmp->to_name_data); 
+  if (tmp->attack_data != NULL)
+      kill_free(tmp->attack_data); 
+  if (tmp->run_data != NULL) {
+    int i;
+    for (i = 0; i < tmp->run_length; ++i) {
+      run_free(tmp->run_data[i]); 
+      tmp->run_data[i] = NULL;
+    }
+    free(tmp->run_data);
+    tmp->run_data = NULL;
+    tmp->run_length = 0;
+    tmp->run_num_allocated = 0;
+  }
+  free(tmp);
+}
+
+void
+msg_marshal(struct evbuffer *evbuf, const struct msg *tmp){
+  evtag_marshal_string(evbuf, MSG_FROM_NAME, tmp->from_name_data);
+  evtag_marshal_string(evbuf, MSG_TO_NAME, tmp->to_name_data);
+  if (tmp->attack_set) {
+    evtag_marshal_kill(evbuf, MSG_ATTACK, tmp->attack_data);
+  }
+  {
+    int i;
+    for (i = 0; i < tmp->run_length; ++i) {
+      evtag_marshal_run(evbuf, MSG_RUN, tmp->run_data[i]);
+    }
+  }
+}
+
+int
+msg_unmarshal(struct msg *tmp,  struct evbuffer *evbuf)
+{
+  uint32_t tag;
+  while (EVBUFFER_LENGTH(evbuf) > 0) {
+    if (evtag_peek(evbuf, &tag) == -1)
+      return (-1);
+    switch (tag) {
+
+      case MSG_FROM_NAME:
+
+        if (tmp->from_name_set)
+          return (-1);
+        if (evtag_unmarshal_string(evbuf, MSG_FROM_NAME, &tmp->from_name_data) == -1) {
+          event_warnx("%s: failed to unmarshal from_name", __func__);
+          return (-1);
+        }
+        tmp->from_name_set = 1;
+        break;
+
+      case MSG_TO_NAME:
+
+        if (tmp->to_name_set)
+          return (-1);
+        if (evtag_unmarshal_string(evbuf, MSG_TO_NAME, &tmp->to_name_data) == -1) {
+          event_warnx("%s: failed to unmarshal to_name", __func__);
+          return (-1);
+        }
+        tmp->to_name_set = 1;
+        break;
+
+      case MSG_ATTACK:
+
+        if (tmp->attack_set)
+          return (-1);
+        tmp->attack_data = kill_new();
+        if (tmp->attack_data == NULL)
+          return (-1);
+        if (evtag_unmarshal_kill(evbuf, MSG_ATTACK, tmp->attack_data) == -1) {
+          event_warnx("%s: failed to unmarshal attack", __func__);
+          return (-1);
+        }
+        tmp->attack_set = 1;
+        break;
+
+      case MSG_RUN:
+
+        if (msg_run_add(tmp) == NULL)
+          return (-1);
+        if (evtag_unmarshal_run(evbuf, MSG_RUN,
+          tmp->run_data[tmp->run_length - 1]) == -1) {
+          --tmp->run_length;
+          event_warnx("%s: failed to unmarshal run", __func__);
+          return (-1);
+        }
+        tmp->run_set = 1;
+        break;
+
+      default:
+        return -1;
+    }
+  }
+
+  if (msg_complete(tmp) == -1)
+    return (-1);
+  return (0);
+}
+
+int
+msg_complete(struct msg *msg)
+{
+  if (!msg->from_name_set)
+    return (-1);
+  if (!msg->to_name_set)
+    return (-1);
+  if (msg->attack_set && kill_complete(msg->attack_data) == -1)
+    return (-1);
+  {
+    int i;
+    for (i = 0; i < msg->run_length; ++i) {
+      if (run_complete(msg->run_data[i]) == -1)
+        return (-1);
+    }
+  }
+  return (0);
+}
+
+int
+evtag_unmarshal_msg(struct evbuffer *evbuf, uint32_t need_tag, struct msg *msg)
+{
+  uint32_t tag;
+  int res = -1;
+
+  struct evbuffer *tmp = evbuffer_new();
+
+  if (evtag_unmarshal(evbuf, &tag, tmp) == -1 || tag != need_tag)
+    goto error;
+
+  if (msg_unmarshal(msg, tmp) == -1)
+    goto error;
+
+  res = 0;
+
+ error:
+  evbuffer_free(tmp);
+  return (res);
+}
+
+void
+evtag_marshal_msg(struct evbuffer *evbuf, uint32_t tag, const struct msg *msg)
+{
+  struct evbuffer *_buf = evbuffer_new();
+  assert(_buf != NULL);
+  evbuffer_drain(_buf, -1);
+  msg_marshal(_buf, msg);
+  evtag_marshal(evbuf, tag, EVBUFFER_DATA(_buf), EVBUFFER_LENGTH(_buf));
+  evbuffer_free(_buf);
+}
+
+/*
+ * Implementation of kill
+ */
+
+static struct kill_access_ __kill_base = {
+  kill_weapon_assign,
+  kill_weapon_get,
+  kill_action_assign,
+  kill_action_get,
+  kill_how_often_assign,
+  kill_how_often_get,
+};
+
+struct kill *
+kill_new(void)
+{
+  struct kill *tmp;
+  if ((tmp = malloc(sizeof(struct kill))) == NULL) {
+    event_warn("%s: malloc", __func__);
+    return (NULL);
+  }
+  tmp->base = &__kill_base;
+
+  tmp->weapon_data = NULL;
+  tmp->weapon_set = 0;
+
+  tmp->action_data = NULL;
+  tmp->action_set = 0;
+
+  tmp->how_often_data = 0;
+  tmp->how_often_set = 0;
+
+  return (tmp);
+}
+
+
+
+
+int
+kill_weapon_assign(struct kill *msg,
+    const char * value)
+{
+  if (msg->weapon_data != NULL)
+    free(msg->weapon_data);
+  if ((msg->weapon_data = strdup(value)) == NULL)
+    return (-1);
+  msg->weapon_set = 1;
+  return (0);
+}
+
+int
+kill_action_assign(struct kill *msg,
+    const char * value)
+{
+  if (msg->action_data != NULL)
+    free(msg->action_data);
+  if ((msg->action_data = strdup(value)) == NULL)
+    return (-1);
+  msg->action_set = 1;
+  return (0);
+}
+
+int
+kill_how_often_assign(struct kill *msg, const uint32_t value)
+{
+  msg->how_often_set = 1;
+  msg->how_often_data = value;
+  return (0);
+}
+
+int
+kill_weapon_get(struct kill *msg, char * *value)
+{
+  if (msg->weapon_set != 1)
+    return (-1);
+  *value = msg->weapon_data;
+  return (0);
+}
+
+int
+kill_action_get(struct kill *msg, char * *value)
+{
+  if (msg->action_set != 1)
+    return (-1);
+  *value = msg->action_data;
+  return (0);
+}
+
+int
+kill_how_often_get(struct kill *msg, uint32_t *value)
+{
+  if (msg->how_often_set != 1)
+    return (-1);
+  *value = msg->how_often_data;
+  return (0);
+}
+
+void
+kill_clear(struct kill *tmp)
+{
+  if (tmp->weapon_set == 1) {
+    free (tmp->weapon_data);
+    tmp->weapon_data = NULL;
+    tmp->weapon_set = 0;
+  }
+  if (tmp->action_set == 1) {
+    free (tmp->action_data);
+    tmp->action_data = NULL;
+    tmp->action_set = 0;
+  }
+  tmp->how_often_set = 0;
+}
+
+void
+kill_free(struct kill *tmp)
+{
+  if (tmp->weapon_data != NULL)
+      free (tmp->weapon_data); 
+  if (tmp->action_data != NULL)
+      free (tmp->action_data); 
+  free(tmp);
+}
+
+void
+kill_marshal(struct evbuffer *evbuf, const struct kill *tmp){
+  evtag_marshal_string(evbuf, KILL_WEAPON, tmp->weapon_data);
+  evtag_marshal_string(evbuf, KILL_ACTION, tmp->action_data);
+  if (tmp->how_often_set) {
+    evtag_marshal_int(evbuf, KILL_HOW_OFTEN, tmp->how_often_data);
+  }
+}
+
+int
+kill_unmarshal(struct kill *tmp,  struct evbuffer *evbuf)
+{
+  uint32_t tag;
+  while (EVBUFFER_LENGTH(evbuf) > 0) {
+    if (evtag_peek(evbuf, &tag) == -1)
+      return (-1);
+    switch (tag) {
+
+      case KILL_WEAPON:
+
+        if (tmp->weapon_set)
+          return (-1);
+        if (evtag_unmarshal_string(evbuf, KILL_WEAPON, &tmp->weapon_data) == -1) {
+          event_warnx("%s: failed to unmarshal weapon", __func__);
+          return (-1);
+        }
+        tmp->weapon_set = 1;
+        break;
+
+      case KILL_ACTION:
+
+        if (tmp->action_set)
+          return (-1);
+        if (evtag_unmarshal_string(evbuf, KILL_ACTION, &tmp->action_data) == -1) {
+          event_warnx("%s: failed to unmarshal action", __func__);
+          return (-1);
+        }
+        tmp->action_set = 1;
+        break;
+
+      case KILL_HOW_OFTEN:
+
+        if (tmp->how_often_set)
+          return (-1);
+        if (evtag_unmarshal_int(evbuf, KILL_HOW_OFTEN, &tmp->how_often_data) == -1) {
+          event_warnx("%s: failed to unmarshal how_often", __func__);
+          return (-1);
+        }
+        tmp->how_often_set = 1;
+        break;
+
+      default:
+        return -1;
+    }
+  }
+
+  if (kill_complete(tmp) == -1)
+    return (-1);
+  return (0);
+}
+
+int
+kill_complete(struct kill *msg)
+{
+  if (!msg->weapon_set)
+    return (-1);
+  if (!msg->action_set)
+    return (-1);
+  return (0);
+}
+
+int
+evtag_unmarshal_kill(struct evbuffer *evbuf, uint32_t need_tag, struct kill *msg)
+{
+  uint32_t tag;
+  int res = -1;
+
+  struct evbuffer *tmp = evbuffer_new();
+
+  if (evtag_unmarshal(evbuf, &tag, tmp) == -1 || tag != need_tag)
+    goto error;
+
+  if (kill_unmarshal(msg, tmp) == -1)
+    goto error;
+
+  res = 0;
+
+ error:
+  evbuffer_free(tmp);
+  return (res);
+}
+
+void
+evtag_marshal_kill(struct evbuffer *evbuf, uint32_t tag, const struct kill *msg)
+{
+  struct evbuffer *_buf = evbuffer_new();
+  assert(_buf != NULL);
+  evbuffer_drain(_buf, -1);
+  kill_marshal(_buf, msg);
+  evtag_marshal(evbuf, tag, EVBUFFER_DATA(_buf), EVBUFFER_LENGTH(_buf));
+  evbuffer_free(_buf);
+}
+
+/*
+ * Implementation of run
+ */
+
+static struct run_access_ __run_base = {
+  run_how_assign,
+  run_how_get,
+  run_some_bytes_assign,
+  run_some_bytes_get,
+  run_fixed_bytes_assign,
+  run_fixed_bytes_get,
+};
+
+struct run *
+run_new(void)
+{
+  struct run *tmp;
+  if ((tmp = malloc(sizeof(struct run))) == NULL) {
+    event_warn("%s: malloc", __func__);
+    return (NULL);
+  }
+  tmp->base = &__run_base;
+
+  tmp->how_data = NULL;
+  tmp->how_set = 0;
+
+  tmp->some_bytes_data = NULL;
+  tmp->some_bytes_length = 0;
+  tmp->some_bytes_set = 0;
+
+  memset(tmp->fixed_bytes_data, 0, sizeof(tmp->fixed_bytes_data));
+  tmp->fixed_bytes_set = 0;
+
+  return (tmp);
+}
+
+
+
+
+int
+run_how_assign(struct run *msg,
+    const char * value)
+{
+  if (msg->how_data != NULL)
+    free(msg->how_data);
+  if ((msg->how_data = strdup(value)) == NULL)
+    return (-1);
+  msg->how_set = 1;
+  return (0);
+}
+
+int
+run_some_bytes_assign(struct run *msg, const uint8_t * value, uint32_t len)
+{
+  if (msg->some_bytes_data != NULL)
+    free (msg->some_bytes_data);
+  msg->some_bytes_data = malloc(len);
+  if (msg->some_bytes_data == NULL)
+    return (-1);
+  msg->some_bytes_set = 1;
+  msg->some_bytes_length = len;
+  memcpy(msg->some_bytes_data, value, len);
+  return (0);
+}
+
+int
+run_fixed_bytes_assign(struct run *msg, const uint8_t *value)
+{
+  msg->fixed_bytes_set = 1;
+  memcpy(msg->fixed_bytes_data, value, 24);
+  return (0);
+}
+
+int
+run_how_get(struct run *msg, char * *value)
+{
+  if (msg->how_set != 1)
+    return (-1);
+  *value = msg->how_data;
+  return (0);
+}
+
+int
+run_some_bytes_get(struct run *msg, uint8_t * *value, uint32_t *plen)
+{
+  if (msg->some_bytes_set != 1)
+    return (-1);
+  *value = msg->some_bytes_data;
+  *plen = msg->some_bytes_length;
+  return (0);
+}
+
+int
+run_fixed_bytes_get(struct run *msg, uint8_t **value)
+{
+  if (msg->fixed_bytes_set != 1)
+    return (-1);
+  *value = msg->fixed_bytes_data;
+  return (0);
+}
+
+void
+run_clear(struct run *tmp)
+{
+  if (tmp->how_set == 1) {
+    free (tmp->how_data);
+    tmp->how_data = NULL;
+    tmp->how_set = 0;
+  }
+  if (tmp->some_bytes_set == 1) {
+    free (tmp->some_bytes_data);
+    tmp->some_bytes_data = NULL;
+    tmp->some_bytes_length = 0;
+    tmp->some_bytes_set = 0;
+  }
+  tmp->fixed_bytes_set = 0;
+  memset(tmp->fixed_bytes_data, 0, sizeof(tmp->fixed_bytes_data));
+}
+
+void
+run_free(struct run *tmp)
+{
+  if (tmp->how_data != NULL)
+      free (tmp->how_data); 
+  if (tmp->some_bytes_data != NULL)
+      free (tmp->some_bytes_data); 
+  free(tmp);
+}
+
+void
+run_marshal(struct evbuffer *evbuf, const struct run *tmp){
+  evtag_marshal_string(evbuf, RUN_HOW, tmp->how_data);
+  if (tmp->some_bytes_set) {
+    evtag_marshal(evbuf, RUN_SOME_BYTES, tmp->some_bytes_data, tmp->some_bytes_length);
+  }
+  evtag_marshal(evbuf, RUN_FIXED_BYTES, tmp->fixed_bytes_data, sizeof(tmp->fixed_bytes_data));
+}
+
+int
+run_unmarshal(struct run *tmp,  struct evbuffer *evbuf)
+{
+  uint32_t tag;
+  while (EVBUFFER_LENGTH(evbuf) > 0) {
+    if (evtag_peek(evbuf, &tag) == -1)
+      return (-1);
+    switch (tag) {
+
+      case RUN_HOW:
+
+        if (tmp->how_set)
+          return (-1);
+        if (evtag_unmarshal_string(evbuf, RUN_HOW, &tmp->how_data) == -1) {
+          event_warnx("%s: failed to unmarshal how", __func__);
+          return (-1);
+        }
+        tmp->how_set = 1;
+        break;
+
+      case RUN_SOME_BYTES:
+
+        if (tmp->some_bytes_set)
+          return (-1);
+        if (evtag_payload_length(evbuf, &tmp->some_bytes_length) == -1)
+          return (-1);
+        if (tmp->some_bytes_length > EVBUFFER_LENGTH(evbuf))
+          return (-1);
+        if ((tmp->some_bytes_data = malloc(tmp->some_bytes_length)) == NULL)
+          return (-1);
+        if (evtag_unmarshal_fixed(evbuf, RUN_SOME_BYTES, tmp->some_bytes_data, tmp->some_bytes_length) == -1) {
+          event_warnx("%s: failed to unmarshal some_bytes", __func__);
+          return (-1);
+        }
+        tmp->some_bytes_set = 1;
+        break;
+
+      case RUN_FIXED_BYTES:
+
+        if (tmp->fixed_bytes_set)
+          return (-1);
+        if (evtag_unmarshal_fixed(evbuf, RUN_FIXED_BYTES, tmp->fixed_bytes_data, sizeof(tmp->fixed_bytes_data)) == -1) {
+          event_warnx("%s: failed to unmarshal fixed_bytes", __func__);
+          return (-1);
+        }
+        tmp->fixed_bytes_set = 1;
+        break;
+
+      default:
+        return -1;
+    }
+  }
+
+  if (run_complete(tmp) == -1)
+    return (-1);
+  return (0);
+}
+
+int
+run_complete(struct run *msg)
+{
+  if (!msg->how_set)
+    return (-1);
+  if (!msg->fixed_bytes_set)
+    return (-1);
+  return (0);
+}
+
+int
+evtag_unmarshal_run(struct evbuffer *evbuf, uint32_t need_tag, struct run *msg)
+{
+  uint32_t tag;
+  int res = -1;
+
+  struct evbuffer *tmp = evbuffer_new();
+
+  if (evtag_unmarshal(evbuf, &tag, tmp) == -1 || tag != need_tag)
+    goto error;
+
+  if (run_unmarshal(msg, tmp) == -1)
+    goto error;
+
+  res = 0;
+
+ error:
+  evbuffer_free(tmp);
+  return (res);
+}
+
+void
+evtag_marshal_run(struct evbuffer *evbuf, uint32_t tag, const struct run *msg)
+{
+  struct evbuffer *_buf = evbuffer_new();
+  assert(_buf != NULL);
+  evbuffer_drain(_buf, -1);
+  run_marshal(_buf, msg);
+  evtag_marshal(evbuf, tag, EVBUFFER_DATA(_buf), EVBUFFER_LENGTH(_buf));
+  evbuffer_free(_buf);
+}
+
diff --git a/libevent/test/regress.gen.h b/libevent/test/regress.gen.h
new file mode 100644
index 00000000000..09591f0584b
--- /dev/null
+++ b/libevent/test/regress.gen.h
@@ -0,0 +1,183 @@
+/*
+ * Automatically generated from ./regress.rpc
+ */
+
+#ifndef ___REGRESS_RPC_
+#define ___REGRESS_RPC_
+
+#include <event-config.h>
+#ifdef _EVENT_HAVE_STDINT_H
+#include <stdint.h>
+#endif
+#define EVTAG_HAS(msg, member) ((msg)->member##_set == 1)
+#ifdef __GNUC__
+#define EVTAG_ASSIGN(msg, member, args...) (*(msg)->base->member##_assign)(msg, ## args)
+#define EVTAG_GET(msg, member, args...) (*(msg)->base->member##_get)(msg, ## args)
+#else
+#define EVTAG_ASSIGN(msg, member, ...) (*(msg)->base->member##_assign)(msg, ## __VA_ARGS__)
+#define EVTAG_GET(msg, member, ...) (*(msg)->base->member##_get)(msg, ## __VA_ARGS__)
+#endif
+#define EVTAG_ADD(msg, member) (*(msg)->base->member##_add)(msg)
+#define EVTAG_LEN(msg, member) ((msg)->member##_length)
+
+struct msg;
+struct kill;
+struct run;
+
+/* Tag definition for msg */
+enum msg_ {
+  MSG_FROM_NAME=1,
+  MSG_TO_NAME=2,
+  MSG_ATTACK=3,
+  MSG_RUN=4,
+  MSG_MAX_TAGS
+};
+
+/* Structure declaration for msg */
+struct msg_access_ {
+  int (*from_name_assign)(struct msg *, const char *);
+  int (*from_name_get)(struct msg *, char * *);
+  int (*to_name_assign)(struct msg *, const char *);
+  int (*to_name_get)(struct msg *, char * *);
+  int (*attack_assign)(struct msg *, const struct kill*);
+  int (*attack_get)(struct msg *, struct kill* *);
+  int (*run_assign)(struct msg *, int, const struct run *);
+  int (*run_get)(struct msg *, int, struct run * *);
+  struct run * (*run_add)(struct msg *);
+};
+
+struct msg {
+  struct msg_access_ *base;
+
+  char *from_name_data;
+  char *to_name_data;
+  struct kill* attack_data;
+  struct run **run_data;
+  int run_length;
+  int run_num_allocated;
+
+  uint8_t from_name_set;
+  uint8_t to_name_set;
+  uint8_t attack_set;
+  uint8_t run_set;
+};
+
+struct msg *msg_new(void);
+void msg_free(struct msg *);
+void msg_clear(struct msg *);
+void msg_marshal(struct evbuffer *, const struct msg *);
+int msg_unmarshal(struct msg *, struct evbuffer *);
+int msg_complete(struct msg *);
+void evtag_marshal_msg(struct evbuffer *, uint32_t, 
+    const struct msg *);
+int evtag_unmarshal_msg(struct evbuffer *, uint32_t,
+    struct msg *);
+int msg_from_name_assign(struct msg *, const char *);
+int msg_from_name_get(struct msg *, char * *);
+int msg_to_name_assign(struct msg *, const char *);
+int msg_to_name_get(struct msg *, char * *);
+int msg_attack_assign(struct msg *, const struct kill*);
+int msg_attack_get(struct msg *, struct kill* *);
+int msg_run_assign(struct msg *, int, const struct run *);
+int msg_run_get(struct msg *, int, struct run * *);
+struct run * msg_run_add(struct msg *);
+/* --- msg done --- */
+
+/* Tag definition for kill */
+enum kill_ {
+  KILL_WEAPON=65825,
+  KILL_ACTION=2,
+  KILL_HOW_OFTEN=3,
+  KILL_MAX_TAGS
+};
+
+/* Structure declaration for kill */
+struct kill_access_ {
+  int (*weapon_assign)(struct kill *, const char *);
+  int (*weapon_get)(struct kill *, char * *);
+  int (*action_assign)(struct kill *, const char *);
+  int (*action_get)(struct kill *, char * *);
+  int (*how_often_assign)(struct kill *, const uint32_t);
+  int (*how_often_get)(struct kill *, uint32_t *);
+};
+
+struct kill {
+  struct kill_access_ *base;
+
+  char *weapon_data;
+  char *action_data;
+  uint32_t how_often_data;
+
+  uint8_t weapon_set;
+  uint8_t action_set;
+  uint8_t how_often_set;
+};
+
+struct kill *kill_new(void);
+void kill_free(struct kill *);
+void kill_clear(struct kill *);
+void kill_marshal(struct evbuffer *, const struct kill *);
+int kill_unmarshal(struct kill *, struct evbuffer *);
+int kill_complete(struct kill *);
+void evtag_marshal_kill(struct evbuffer *, uint32_t, 
+    const struct kill *);
+int evtag_unmarshal_kill(struct evbuffer *, uint32_t,
+    struct kill *);
+int kill_weapon_assign(struct kill *, const char *);
+int kill_weapon_get(struct kill *, char * *);
+int kill_action_assign(struct kill *, const char *);
+int kill_action_get(struct kill *, char * *);
+int kill_how_often_assign(struct kill *, const uint32_t);
+int kill_how_often_get(struct kill *, uint32_t *);
+/* --- kill done --- */
+
+/* Tag definition for run */
+enum run_ {
+  RUN_HOW=1,
+  RUN_SOME_BYTES=2,
+  RUN_FIXED_BYTES=3,
+  RUN_MAX_TAGS
+};
+
+/* Structure declaration for run */
+struct run_access_ {
+  int (*how_assign)(struct run *, const char *);
+  int (*how_get)(struct run *, char * *);
+  int (*some_bytes_assign)(struct run *, const uint8_t *, uint32_t);
+  int (*some_bytes_get)(struct run *, uint8_t * *, uint32_t *);
+  int (*fixed_bytes_assign)(struct run *, const uint8_t *);
+  int (*fixed_bytes_get)(struct run *, uint8_t **);
+};
+
+struct run {
+  struct run_access_ *base;
+
+  char *how_data;
+  uint8_t *some_bytes_data;
+  uint32_t some_bytes_length;
+  uint8_t fixed_bytes_data[24];
+
+  uint8_t how_set;
+  uint8_t some_bytes_set;
+  uint8_t fixed_bytes_set;
+};
+
+struct run *run_new(void);
+void run_free(struct run *);
+void run_clear(struct run *);
+void run_marshal(struct evbuffer *, const struct run *);
+int run_unmarshal(struct run *, struct evbuffer *);
+int run_complete(struct run *);
+void evtag_marshal_run(struct evbuffer *, uint32_t, 
+    const struct run *);
+int evtag_unmarshal_run(struct evbuffer *, uint32_t,
+    struct run *);
+int run_how_assign(struct run *, const char *);
+int run_how_get(struct run *, char * *);
+int run_some_bytes_assign(struct run *, const uint8_t *, uint32_t);
+int run_some_bytes_get(struct run *, uint8_t * *, uint32_t *);
+int run_fixed_bytes_assign(struct run *, const uint8_t *);
+int run_fixed_bytes_get(struct run *, uint8_t **);
+/* --- run done --- */
+
+#endif  /* ___REGRESS_RPC_ */
diff --git a/libevent/test/regress.h b/libevent/test/regress.h
new file mode 100644
index 00000000000..4060ff5c6ac
--- /dev/null
+++ b/libevent/test/regress.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _REGRESS_H_
+#define _REGRESS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void http_suite(void);
+void http_basic_test(void);
+
+void rpc_suite(void);
+
+void dns_suite(void);
+	
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _REGRESS_H_ */
diff --git a/libevent/test/regress.rpc b/libevent/test/regress.rpc
new file mode 100644
index 00000000000..65ca95de4cf
--- /dev/null
+++ b/libevent/test/regress.rpc
@@ -0,0 +1,20 @@
+/* tests data packing and unpacking */
+
+struct msg {
+	string from_name = 1;
+	string to_name = 2;
+	optional struct[kill] attack = 3;
+	array struct[run] run = 4;
+}
+
+struct kill {
+	string weapon = 0x10121;
+	string action = 2;
+	optional int how_often = 3;
+}
+
+struct run {
+	string how = 1;
+	optional bytes some_bytes = 2;
+	bytes fixed_bytes[24] = 3;
+}
diff --git a/libevent/test/regress_dns.c b/libevent/test/regress_dns.c
new file mode 100644
index 00000000000..129cdad498f
--- /dev/null
+++ b/libevent/test/regress_dns.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 2003-2006 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef WIN32
+#include <winsock2.h>
+#include <windows.h>
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <sys/queue.h>
+#ifndef WIN32
+#include <sys/socket.h>
+#include <signal.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#endif
+#ifdef HAVE_NETINET_IN6_H
+#include <netinet/in6.h>
+#endif
+#ifdef HAVE_NETDB_H
+#include <netdb.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include "event.h"
+#include "evdns.h"
+#include "log.h"
+
+static int dns_ok = 0;
+static int dns_err = 0;
+
+void dns_suite(void);
+
+static void
+dns_gethostbyname_cb(int result, char type, int count, int ttl,
+    void *addresses, void *arg)
+{
+	dns_ok = dns_err = 0;
+
+	if (result == DNS_ERR_TIMEOUT) {
+		fprintf(stdout, "[Timed out] ");
+		dns_err = result;
+		goto out;
+	}
+
+	if (result != DNS_ERR_NONE) {
+		fprintf(stdout, "[Error code %d] ", result);
+		goto out;
+	}
+
+	fprintf(stderr, "type: %d, count: %d, ttl: %d: ", type, count, ttl);
+
+	switch (type) {
+	case DNS_IPv6_AAAA: {
+#if defined(HAVE_STRUCT_IN6_ADDR) && defined(HAVE_INET_NTOP) && defined(INET6_ADDRSTRLEN)
+		struct in6_addr *in6_addrs = addresses;
+		char buf[INET6_ADDRSTRLEN+1];
+		int i;
+		/* a resolution that's not valid does not help */
+		if (ttl < 0)
+			goto out;
+		for (i = 0; i < count; ++i) {
+			const char *b = inet_ntop(AF_INET6, &in6_addrs[i], buf,sizeof(buf));
+			if (b)
+				fprintf(stderr, "%s ", b);
+			else
+				fprintf(stderr, "%s ", strerror(errno));
+		}
+#endif
+		break;
+	}
+	case DNS_IPv4_A: {
+		struct in_addr *in_addrs = addresses;
+		int i;
+		/* a resolution that's not valid does not help */
+		if (ttl < 0)
+			goto out;
+		for (i = 0; i < count; ++i)
+			fprintf(stderr, "%s ", inet_ntoa(in_addrs[i]));
+		break;
+	}
+	case DNS_PTR:
+		/* may get at most one PTR */
+		if (count != 1)
+			goto out;
+
+		fprintf(stderr, "%s ", *(char **)addresses);
+		break;
+	default:
+		goto out;
+	}
+
+	dns_ok = type;
+
+out:
+	event_loopexit(NULL);
+}
+
+static void
+dns_gethostbyname(void)
+{
+	fprintf(stdout, "Simple DNS resolve: ");
+	dns_ok = 0;
+	evdns_resolve_ipv4("www.monkey.org", 0, dns_gethostbyname_cb, NULL);
+	event_dispatch();
+
+	if (dns_ok == DNS_IPv4_A) {
+		fprintf(stdout, "OK\n");
+	} else {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+}
+
+static void
+dns_gethostbyname6(void)
+{
+	fprintf(stdout, "IPv6 DNS resolve: ");
+	dns_ok = 0;
+	evdns_resolve_ipv6("www.ietf.org", 0, dns_gethostbyname_cb, NULL);
+	event_dispatch();
+
+	if (dns_ok == DNS_IPv6_AAAA) {
+		fprintf(stdout, "OK\n");
+	} else if (!dns_ok && dns_err == DNS_ERR_TIMEOUT) {
+		fprintf(stdout, "SKIPPED\n");
+	} else {
+		fprintf(stdout, "FAILED (%d)\n", dns_ok);
+		exit(1);
+	}
+}
+
+static void
+dns_gethostbyaddr(void)
+{
+	struct in_addr in;
+	in.s_addr = htonl(0x7f000001ul); /* 127.0.0.1 */
+	fprintf(stdout, "Simple reverse DNS resolve: ");
+	dns_ok = 0;
+	evdns_resolve_reverse(&in, 0, dns_gethostbyname_cb, NULL);
+	event_dispatch();
+
+	if (dns_ok == DNS_PTR) {
+		fprintf(stdout, "OK\n");
+	} else {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+}
+
+static int n_server_responses = 0;
+
+static void
+dns_server_request_cb(struct evdns_server_request *req, void *data)
+{
+	int i, r;
+	const char TEST_ARPA[] = "11.11.168.192.in-addr.arpa";
+	for (i = 0; i < req->nquestions; ++i) {
+		struct in_addr ans;
+		ans.s_addr = htonl(0xc0a80b0bUL); /* 192.168.11.11 */
+		if (req->questions[i]->type == EVDNS_TYPE_A &&
+			req->questions[i]->dns_question_class == EVDNS_CLASS_INET &&
+			!strcmp(req->questions[i]->name, "zz.example.com")) {
+			r = evdns_server_request_add_a_reply(req, "zz.example.com",
+												 1, &ans.s_addr, 12345);
+			if (r<0)
+				dns_ok = 0;
+		} else if (req->questions[i]->type == EVDNS_TYPE_AAAA &&
+				   req->questions[i]->dns_question_class == EVDNS_CLASS_INET &&
+				   !strcmp(req->questions[i]->name, "zz.example.com")) {
+			char addr6[17] = "abcdefghijklmnop";
+			r = evdns_server_request_add_aaaa_reply(req, "zz.example.com",
+												 1, addr6, 123);
+			if (r<0)
+				dns_ok = 0;
+		} else if (req->questions[i]->type == EVDNS_TYPE_PTR &&
+				   req->questions[i]->dns_question_class == EVDNS_CLASS_INET &&
+				   !strcmp(req->questions[i]->name, TEST_ARPA)) {
+			r = evdns_server_request_add_ptr_reply(req, NULL, TEST_ARPA,
+					   "ZZ.EXAMPLE.COM", 54321);
+			if (r<0)
+				dns_ok = 0;
+		} else {
+			fprintf(stdout, "Unexpected question %d %d \"%s\" ",
+					req->questions[i]->type,
+					req->questions[i]->dns_question_class,
+					req->questions[i]->name);
+			dns_ok = 0;
+		}
+	}
+	r = evdns_server_request_respond(req, 0);
+	if (r<0) {
+		fprintf(stdout, "Couldn't send reply. ");
+		dns_ok = 0;
+	}
+}
+
+static void
+dns_server_gethostbyname_cb(int result, char type, int count, int ttl,
+							void *addresses, void *arg)
+{
+	if (result != DNS_ERR_NONE) {
+		fprintf(stdout, "Unexpected result %d. ", result);
+		dns_ok = 0;
+		goto out;
+	}
+	if (count != 1) {
+		fprintf(stdout, "Unexpected answer count %d. ", count);
+		dns_ok = 0;
+		goto out;
+	}
+	switch (type) {
+	case DNS_IPv4_A: {
+		struct in_addr *in_addrs = addresses;
+		if (in_addrs[0].s_addr != htonl(0xc0a80b0bUL) || ttl != 12345) {
+			fprintf(stdout, "Bad IPv4 response \"%s\" %d. ",
+					inet_ntoa(in_addrs[0]), ttl);
+			dns_ok = 0;
+			goto out;
+		}
+		break;
+	}
+	case DNS_IPv6_AAAA: {
+#if defined (HAVE_STRUCT_IN6_ADDR) && defined(HAVE_INET_NTOP) && defined(INET6_ADDRSTRLEN)
+		struct in6_addr *in6_addrs = addresses;
+		char buf[INET6_ADDRSTRLEN+1];
+		if (memcmp(&in6_addrs[0].s6_addr, "abcdefghijklmnop", 16)
+			|| ttl != 123) {
+			const char *b = inet_ntop(AF_INET6, &in6_addrs[0],buf,sizeof(buf));
+			fprintf(stdout, "Bad IPv6 response \"%s\" %d. ", b, ttl);
+			dns_ok = 0;
+			goto out;
+		}
+#endif
+		break;
+	}
+	case DNS_PTR: {
+		char **addrs = addresses;
+		if (strcmp(addrs[0], "ZZ.EXAMPLE.COM") || ttl != 54321) {
+			fprintf(stdout, "Bad PTR response \"%s\" %d. ",
+					addrs[0], ttl);
+			dns_ok = 0;
+			goto out;
+		}
+		break;
+	}
+	default:
+		fprintf(stdout, "Bad response type %d. ", type);
+		dns_ok = 0;
+	}
+
+ out:
+	if (++n_server_responses == 3) {
+		event_loopexit(NULL);
+	}
+}
+
+static void
+dns_server(void)
+{
+	int sock;
+	struct sockaddr_in my_addr;
+	struct evdns_server_port *port;
+	struct in_addr resolve_addr;
+
+	dns_ok = 1;
+	fprintf(stdout, "DNS server support: ");
+
+	/* Add ourself as the only nameserver, and make sure we really are
+	 * the only nameserver. */
+	evdns_nameserver_ip_add("127.0.0.1:35353");
+	if (evdns_count_nameservers() != 1) {
+		fprintf(stdout, "Couldn't set up.\n");
+		exit(1);
+	}
+
+	/* Now configure a nameserver port. */
+	sock = socket(AF_INET, SOCK_DGRAM, 0);
+	if (sock == -1) {
+		perror("socket");
+		exit(1);
+	}
+#ifdef WIN32
+	{
+		u_long nonblocking = 1;
+		ioctlsocket(sock, FIONBIO, &nonblocking);
+	}
+#else
+	fcntl(sock, F_SETFL, O_NONBLOCK);
+#endif
+	memset(&my_addr, 0, sizeof(my_addr));
+	my_addr.sin_family = AF_INET;
+	my_addr.sin_port = htons(35353);
+	my_addr.sin_addr.s_addr = htonl(0x7f000001UL);
+	if (bind(sock, (struct sockaddr*)&my_addr, sizeof(my_addr)) < 0) {
+		perror("bind");
+		exit (1);
+	}
+	port = evdns_add_server_port(sock, 0, dns_server_request_cb, NULL);
+
+	/* Send two queries. */
+	evdns_resolve_ipv4("zz.example.com", DNS_QUERY_NO_SEARCH,
+					   dns_server_gethostbyname_cb, NULL);
+	evdns_resolve_ipv6("zz.example.com", DNS_QUERY_NO_SEARCH,
+					   dns_server_gethostbyname_cb, NULL);
+	resolve_addr.s_addr = htonl(0xc0a80b0bUL); /* 192.168.11.11 */
+	evdns_resolve_reverse(&resolve_addr, 0,
+						  dns_server_gethostbyname_cb, NULL);
+
+	event_dispatch();
+
+	if (dns_ok) {
+		fprintf(stdout, "OK\n");
+	} else {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	evdns_close_server_port(port);
+	evdns_shutdown(0); /* remove ourself as nameserver. */
+#ifdef WIN32
+	closesocket(sock);
+#else
+	close(sock);
+#endif
+}
+
+void
+dns_suite(void)
+{
+	dns_server(); /* Do this before we call evdns_init. */
+
+	evdns_init();
+	dns_gethostbyname();
+	dns_gethostbyname6();
+	dns_gethostbyaddr();
+
+	evdns_shutdown(0);
+}
diff --git a/libevent/test/regress_http.c b/libevent/test/regress_http.c
new file mode 100644
index 00000000000..1e2a1eb062a
--- /dev/null
+++ b/libevent/test/regress_http.c
@@ -0,0 +1,1476 @@
+/*
+ * Copyright (c) 2003-2006 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef WIN32
+#include <winsock2.h>
+#include <windows.h>
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <sys/queue.h>
+#ifndef WIN32
+#include <sys/socket.h>
+#include <signal.h>
+#include <unistd.h>
+#include <netdb.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include "event.h"
+#include "evhttp.h"
+#include "log.h"
+#include "http-internal.h"
+
+extern int pair[];
+extern int test_ok;
+
+static struct evhttp *http;
+/* set if a test needs to call loopexit on a base */
+static struct event_base *base;
+
+void http_suite(void);
+
+void http_basic_cb(struct evhttp_request *req, void *arg);
+static void http_chunked_cb(struct evhttp_request *req, void *arg);
+void http_post_cb(struct evhttp_request *req, void *arg);
+void http_dispatcher_cb(struct evhttp_request *req, void *arg);
+static void http_large_delay_cb(struct evhttp_request *req, void *arg);
+
+static struct evhttp *
+http_setup(short *pport, struct event_base *base)
+{
+	int i;
+	struct evhttp *myhttp;
+	short port = -1;
+
+	/* Try a few different ports */
+	myhttp = evhttp_new(base);
+	for (i = 0; i < 50; ++i) {
+		if (evhttp_bind_socket(myhttp, "127.0.0.1", 8080 + i) != -1) {
+			port = 8080 + i;
+			break;
+		}
+	}
+
+	if (port == -1)
+		event_errx(1, "Could not start web server");
+
+	/* Register a callback for certain types of requests */
+	evhttp_set_cb(myhttp, "/test", http_basic_cb, NULL);
+	evhttp_set_cb(myhttp, "/chunked", http_chunked_cb, NULL);
+	evhttp_set_cb(myhttp, "/postit", http_post_cb, NULL);
+	evhttp_set_cb(myhttp, "/largedelay", http_large_delay_cb, NULL);
+	evhttp_set_cb(myhttp, "/", http_dispatcher_cb, NULL);
+
+	*pport = port;
+	return (myhttp);
+}
+
+#ifndef NI_MAXSERV
+#define NI_MAXSERV 1024
+#endif
+
+static int
+http_connect(const char *address, u_short port)
+{
+	/* Stupid code for connecting */
+#ifdef WIN32
+	struct hostent *he;
+	struct sockaddr_in sin;
+#else
+	struct addrinfo ai, *aitop;
+	char strport[NI_MAXSERV];
+#endif
+	struct sockaddr *sa;
+	int slen;
+	int fd;
+	
+#ifdef WIN32
+	if (!(he = gethostbyname(address))) {
+		event_warn("gethostbyname");
+	}
+	memcpy(&sin.sin_addr, he->h_addr_list[0], he->h_length);
+	sin.sin_family = AF_INET;
+	sin.sin_port = htons(port);
+	slen = sizeof(struct sockaddr_in);
+	sa = (struct sockaddr*)&sin;
+#else
+	memset(&ai, 0, sizeof (ai));
+	ai.ai_family = AF_INET;
+	ai.ai_socktype = SOCK_STREAM;
+	snprintf(strport, sizeof (strport), "%d", port);
+	if (getaddrinfo(address, strport, &ai, &aitop) != 0) {
+		event_warn("getaddrinfo");
+		return (-1);
+	}
+	sa = aitop->ai_addr;
+	slen = aitop->ai_addrlen;
+#endif
+        
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (fd == -1)
+		event_err(1, "socket failed");
+
+	if (connect(fd, sa, slen) == -1)
+		event_err(1, "connect failed");
+
+#ifndef WIN32
+	freeaddrinfo(aitop);
+#endif
+
+	return (fd);
+}
+
+static void
+http_readcb(struct bufferevent *bev, void *arg)
+{
+	const char *what = "This is funny";
+
+ 	event_debug(("%s: %s\n", __func__, EVBUFFER_DATA(bev->input)));
+	
+	if (evbuffer_find(bev->input,
+		(const unsigned char*) what, strlen(what)) != NULL) {
+		struct evhttp_request *req = evhttp_request_new(NULL, NULL);
+		enum message_read_status done;
+
+		req->kind = EVHTTP_RESPONSE;
+		done = evhttp_parse_firstline(req, bev->input);
+		if (done != ALL_DATA_READ)
+			goto out;
+
+		done = evhttp_parse_headers(req, bev->input);
+		if (done != ALL_DATA_READ)
+			goto out;
+
+		if (done == 1 &&
+		    evhttp_find_header(req->input_headers,
+			"Content-Type") != NULL)
+			test_ok++;
+
+	out:
+		evhttp_request_free(req);
+		bufferevent_disable(bev, EV_READ);
+		if (base)
+			event_base_loopexit(base, NULL);
+		else
+			event_loopexit(NULL);
+	}
+}
+
+static void
+http_writecb(struct bufferevent *bev, void *arg)
+{
+	if (EVBUFFER_LENGTH(bev->output) == 0) {
+		/* enable reading of the reply */
+		bufferevent_enable(bev, EV_READ);
+		test_ok++;
+	}
+}
+
+static void
+http_errorcb(struct bufferevent *bev, short what, void *arg)
+{
+	test_ok = -2;
+	event_loopexit(NULL);
+}
+
+void
+http_basic_cb(struct evhttp_request *req, void *arg)
+{
+	struct evbuffer *evb = evbuffer_new();
+	int empty = evhttp_find_header(req->input_headers, "Empty") != NULL;
+	event_debug(("%s: called\n", __func__));
+	evbuffer_add_printf(evb, "This is funny");
+	
+	/* For multi-line headers test */
+	{
+		const char *multi =
+		    evhttp_find_header(req->input_headers,"X-multi");
+		if (multi) {
+			if (strcmp("END", multi + strlen(multi) - 3) == 0)
+				test_ok++;
+			if (evhttp_find_header(req->input_headers, "X-Last"))
+				test_ok++;
+		}
+	}
+
+	/* injecting a bad content-length */
+	if (evhttp_find_header(req->input_headers, "X-Negative"))
+		evhttp_add_header(req->output_headers,
+		    "Content-Length", "-100");
+
+	/* allow sending of an empty reply */
+	evhttp_send_reply(req, HTTP_OK, "Everything is fine",
+	    !empty ? evb : NULL);
+
+	evbuffer_free(evb);
+}
+
+static char const* const CHUNKS[] = {
+	"This is funny",
+	"but not hilarious.",
+	"bwv 1052"
+};
+
+struct chunk_req_state {
+	struct evhttp_request *req;
+	int i;
+};
+
+static void
+http_chunked_trickle_cb(int fd, short events, void *arg)
+{
+	struct evbuffer *evb = evbuffer_new();
+	struct chunk_req_state *state = arg;
+	struct timeval when = { 0, 0 };
+
+	evbuffer_add_printf(evb, "%s", CHUNKS[state->i]);
+	evhttp_send_reply_chunk(state->req, evb);
+	evbuffer_free(evb);
+
+	if (++state->i < sizeof(CHUNKS)/sizeof(CHUNKS[0])) {
+		event_once(-1, EV_TIMEOUT,
+		    http_chunked_trickle_cb, state, &when);
+	} else {
+		evhttp_send_reply_end(state->req);
+		free(state);
+	}
+}
+
+static void
+http_chunked_cb(struct evhttp_request *req, void *arg)
+{
+	struct timeval when = { 0, 0 };
+	struct chunk_req_state *state = malloc(sizeof(struct chunk_req_state));
+	event_debug(("%s: called\n", __func__));
+
+	memset(state, 0, sizeof(struct chunk_req_state));
+	state->req = req;
+
+	/* generate a chunked reply */
+	evhttp_send_reply_start(req, HTTP_OK, "Everything is fine");
+
+	/* but trickle it across several iterations to ensure we're not
+	 * assuming it comes all at once */
+	event_once(-1, EV_TIMEOUT, http_chunked_trickle_cb, state, &when);
+}
+
+static void
+http_complete_write(int fd, short what, void *arg)
+{
+	struct bufferevent *bev = arg;
+	const char *http_request = "host\r\n"
+	    "Connection: close\r\n"
+	    "\r\n";
+	bufferevent_write(bev, http_request, strlen(http_request));
+}
+
+static void
+http_basic_test(void)
+{
+	struct timeval tv;
+	struct bufferevent *bev;
+	int fd;
+	const char *http_request;
+	short port = -1;
+
+	test_ok = 0;
+	fprintf(stdout, "Testing Basic HTTP Server: ");
+
+	http = http_setup(&port, NULL);
+
+	/* bind to a second socket */
+	if (evhttp_bind_socket(http, "127.0.0.1", port + 1) == -1) {
+		fprintf(stdout, "FAILED (bind)\n");
+		exit(1);
+	}
+	
+	fd = http_connect("127.0.0.1", port);
+
+	/* Stupid thing to send a request */
+	bev = bufferevent_new(fd, http_readcb, http_writecb,
+	    http_errorcb, NULL);
+
+	/* first half of the http request */
+	http_request =
+	    "GET /test HTTP/1.1\r\n"
+	    "Host: some";
+
+	bufferevent_write(bev, http_request, strlen(http_request));
+	timerclear(&tv);
+	tv.tv_usec = 10000;
+	event_once(-1, EV_TIMEOUT, http_complete_write, bev, &tv);
+	
+	event_dispatch();
+
+	if (test_ok != 3) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/* connect to the second port */
+	bufferevent_free(bev);
+	EVUTIL_CLOSESOCKET(fd);
+
+	fd = http_connect("127.0.0.1", port + 1);
+
+	/* Stupid thing to send a request */
+	bev = bufferevent_new(fd, http_readcb, http_writecb,
+	    http_errorcb, NULL);
+
+	http_request =
+	    "GET /test HTTP/1.1\r\n"
+	    "Host: somehost\r\n"
+	    "Connection: close\r\n"
+	    "\r\n";
+
+	bufferevent_write(bev, http_request, strlen(http_request));
+	
+	event_dispatch();
+
+	bufferevent_free(bev);
+	EVUTIL_CLOSESOCKET(fd);
+
+	evhttp_free(http);
+	
+	if (test_ok != 5) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	fprintf(stdout, "OK\n");
+}
+
+static struct evhttp_connection *delayed_client;
+
+static void
+http_delay_reply(int fd, short what, void *arg)
+{
+	struct evhttp_request *req = arg;
+
+	evhttp_send_reply(req, HTTP_OK, "Everything is fine", NULL);
+
+	++test_ok;
+}
+
+static void
+http_large_delay_cb(struct evhttp_request *req, void *arg)
+{
+	struct timeval tv;
+	timerclear(&tv);
+	tv.tv_sec = 3;
+
+	event_once(-1, EV_TIMEOUT, http_delay_reply, req, &tv);
+
+	/* here we close the client connection which will cause an EOF */
+	evhttp_connection_fail(delayed_client, EVCON_HTTP_EOF);
+}
+
+void http_request_done(struct evhttp_request *, void *);
+void http_request_empty_done(struct evhttp_request *, void *);
+
+static void
+http_connection_test(int persistent)
+{
+	short port = -1;
+	struct evhttp_connection *evcon = NULL;
+	struct evhttp_request *req = NULL;
+	
+	test_ok = 0;
+	fprintf(stdout, "Testing Request Connection Pipeline %s: ",
+	    persistent ? "(persistent)" : "");
+
+	http = http_setup(&port, NULL);
+
+	evcon = evhttp_connection_new("127.0.0.1", port);
+	if (evcon == NULL) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/*
+	 * At this point, we want to schedule a request to the HTTP
+	 * server using our make request method.
+	 */
+
+	req = evhttp_request_new(http_request_done, NULL);
+
+	/* Add the information that we care about */
+	evhttp_add_header(req->output_headers, "Host", "somehost");
+
+	/* We give ownership of the request to the connection */
+	if (evhttp_make_request(evcon, req, EVHTTP_REQ_GET, "/test") == -1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	event_dispatch();
+
+	if (test_ok != 1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/* try to make another request over the same connection */
+	test_ok = 0;
+	
+	req = evhttp_request_new(http_request_done, NULL);
+
+	/* Add the information that we care about */
+	evhttp_add_header(req->output_headers, "Host", "somehost");
+
+	/* 
+	 * if our connections are not supposed to be persistent; request
+	 * a close from the server.
+	 */
+	if (!persistent)
+		evhttp_add_header(req->output_headers, "Connection", "close");
+
+	/* We give ownership of the request to the connection */
+	if (evhttp_make_request(evcon, req, EVHTTP_REQ_GET, "/test") == -1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	event_dispatch();
+
+	/* make another request: request empty reply */
+	test_ok = 0;
+	
+	req = evhttp_request_new(http_request_empty_done, NULL);
+
+	/* Add the information that we care about */
+	evhttp_add_header(req->output_headers, "Empty", "itis");
+
+	/* We give ownership of the request to the connection */
+	if (evhttp_make_request(evcon, req, EVHTTP_REQ_GET, "/test") == -1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	event_dispatch();
+
+	if (test_ok != 1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	evhttp_connection_free(evcon);
+	evhttp_free(http);
+	
+	fprintf(stdout, "OK\n");
+}
+
+void
+http_request_done(struct evhttp_request *req, void *arg)
+{
+	const char *what = "This is funny";
+
+	if (req->response_code != HTTP_OK) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	if (evhttp_find_header(req->input_headers, "Content-Type") == NULL) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	if (EVBUFFER_LENGTH(req->input_buffer) != strlen(what)) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+	
+	if (memcmp(EVBUFFER_DATA(req->input_buffer), what, strlen(what)) != 0) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	test_ok = 1;
+	event_loopexit(NULL);
+}
+
+/* test date header and content length */
+
+void
+http_request_empty_done(struct evhttp_request *req, void *arg)
+{
+	if (req->response_code != HTTP_OK) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	if (evhttp_find_header(req->input_headers, "Date") == NULL) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	
+	if (evhttp_find_header(req->input_headers, "Content-Length") == NULL) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	if (strcmp(evhttp_find_header(req->input_headers, "Content-Length"),
+		"0")) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	if (EVBUFFER_LENGTH(req->input_buffer) != 0) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	test_ok = 1;
+	event_loopexit(NULL);
+}
+
+/*
+ * HTTP DISPATCHER test
+ */
+
+void
+http_dispatcher_cb(struct evhttp_request *req, void *arg)
+{
+
+	struct evbuffer *evb = evbuffer_new();
+	event_debug(("%s: called\n", __func__));
+	evbuffer_add_printf(evb, "DISPATCHER_TEST");
+
+	evhttp_send_reply(req, HTTP_OK, "Everything is fine", evb);
+
+	evbuffer_free(evb);
+}
+
+static void
+http_dispatcher_test_done(struct evhttp_request *req, void *arg)
+{
+	const char *what = "DISPATCHER_TEST";
+
+	if (req->response_code != HTTP_OK) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	if (evhttp_find_header(req->input_headers, "Content-Type") == NULL) {
+		fprintf(stderr, "FAILED (content type)\n");
+		exit(1);
+	}
+
+	if (EVBUFFER_LENGTH(req->input_buffer) != strlen(what)) {
+		fprintf(stderr, "FAILED (length %zu vs %zu)\n",
+		    EVBUFFER_LENGTH(req->input_buffer), strlen(what));
+		exit(1);
+	}
+	
+	if (memcmp(EVBUFFER_DATA(req->input_buffer), what, strlen(what)) != 0) {
+		fprintf(stderr, "FAILED (data)\n");
+		exit(1);
+	}
+
+	test_ok = 1;
+	event_loopexit(NULL);
+}
+
+static void
+http_dispatcher_test(void)
+{
+	short port = -1;
+	struct evhttp_connection *evcon = NULL;
+	struct evhttp_request *req = NULL;
+
+	test_ok = 0;
+	fprintf(stdout, "Testing HTTP Dispatcher: ");
+
+	http = http_setup(&port, NULL);
+
+	evcon = evhttp_connection_new("127.0.0.1", port);
+	if (evcon == NULL) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/* also bind to local host */
+	evhttp_connection_set_local_address(evcon, "127.0.0.1");
+
+	/*
+	 * At this point, we want to schedule an HTTP GET request
+	 * server using our make request method.
+	 */
+
+	req = evhttp_request_new(http_dispatcher_test_done, NULL);
+	if (req == NULL) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/* Add the information that we care about */
+	evhttp_add_header(req->output_headers, "Host", "somehost");
+	
+	if (evhttp_make_request(evcon, req, EVHTTP_REQ_GET, "/?arg=val") == -1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	event_dispatch();
+
+	evhttp_connection_free(evcon);
+	evhttp_free(http);
+	
+	if (test_ok != 1) {
+		fprintf(stdout, "FAILED: %d\n", test_ok);
+		exit(1);
+	}
+	
+	fprintf(stdout, "OK\n");
+}
+
+/*
+ * HTTP POST test.
+ */
+
+void http_postrequest_done(struct evhttp_request *, void *);
+
+#define POST_DATA "Okay.  Not really printf"
+
+static void
+http_post_test(void)
+{
+	short port = -1;
+	struct evhttp_connection *evcon = NULL;
+	struct evhttp_request *req = NULL;
+
+	test_ok = 0;
+	fprintf(stdout, "Testing HTTP POST Request: ");
+
+	http = http_setup(&port, NULL);
+
+	evcon = evhttp_connection_new("127.0.0.1", port);
+	if (evcon == NULL) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/*
+	 * At this point, we want to schedule an HTTP POST request
+	 * server using our make request method.
+	 */
+
+	req = evhttp_request_new(http_postrequest_done, NULL);
+	if (req == NULL) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/* Add the information that we care about */
+	evhttp_add_header(req->output_headers, "Host", "somehost");
+	evbuffer_add_printf(req->output_buffer, POST_DATA);
+	
+	if (evhttp_make_request(evcon, req, EVHTTP_REQ_POST, "/postit") == -1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	event_dispatch();
+
+	evhttp_connection_free(evcon);
+	evhttp_free(http);
+	
+	if (test_ok != 1) {
+		fprintf(stdout, "FAILED: %d\n", test_ok);
+		exit(1);
+	}
+	
+	fprintf(stdout, "OK\n");
+}
+
+void
+http_post_cb(struct evhttp_request *req, void *arg)
+{
+	struct evbuffer *evb;
+	event_debug(("%s: called\n", __func__));
+
+	/* Yes, we are expecting a post request */
+	if (req->type != EVHTTP_REQ_POST) {
+		fprintf(stdout, "FAILED (post type)\n");
+		exit(1);
+	}
+
+	if (EVBUFFER_LENGTH(req->input_buffer) != strlen(POST_DATA)) {
+		fprintf(stdout, "FAILED (length: %zu vs %zu)\n",
+		    EVBUFFER_LENGTH(req->input_buffer), strlen(POST_DATA));
+		exit(1);
+	}
+
+	if (memcmp(EVBUFFER_DATA(req->input_buffer), POST_DATA,
+		strlen(POST_DATA))) {
+		fprintf(stdout, "FAILED (data)\n");
+		fprintf(stdout, "Got :%s\n", EVBUFFER_DATA(req->input_buffer));
+		fprintf(stdout, "Want:%s\n", POST_DATA);
+		exit(1);
+	}
+	
+	evb = evbuffer_new();
+	evbuffer_add_printf(evb, "This is funny");
+
+	evhttp_send_reply(req, HTTP_OK, "Everything is fine", evb);
+
+	evbuffer_free(evb);
+}
+
+void
+http_postrequest_done(struct evhttp_request *req, void *arg)
+{
+	const char *what = "This is funny";
+
+	if (req == NULL) {
+		fprintf(stderr, "FAILED (timeout)\n");
+		exit(1);
+	}
+
+	if (req->response_code != HTTP_OK) {
+	
+		fprintf(stderr, "FAILED (response code)\n");
+		exit(1);
+	}
+
+	if (evhttp_find_header(req->input_headers, "Content-Type") == NULL) {
+		fprintf(stderr, "FAILED (content type)\n");
+		exit(1);
+	}
+
+	if (EVBUFFER_LENGTH(req->input_buffer) != strlen(what)) {
+		fprintf(stderr, "FAILED (length %zu vs %zu)\n",
+		    EVBUFFER_LENGTH(req->input_buffer), strlen(what));
+		exit(1);
+	}
+	
+	if (memcmp(EVBUFFER_DATA(req->input_buffer), what, strlen(what)) != 0) {
+		fprintf(stderr, "FAILED (data)\n");
+		exit(1);
+	}
+
+	test_ok = 1;
+	event_loopexit(NULL);
+}
+
+static void
+http_failure_readcb(struct bufferevent *bev, void *arg)
+{
+	const char *what = "400 Bad Request";
+	if (evbuffer_find(bev->input, (const unsigned char*) what, strlen(what)) != NULL) {
+		test_ok = 2;
+		bufferevent_disable(bev, EV_READ);
+		event_loopexit(NULL);
+	}
+}
+
+/*
+ * Testing that the HTTP server can deal with a malformed request.
+ */
+static void
+http_failure_test(void)
+{
+	struct bufferevent *bev;
+	int fd;
+	const char *http_request;
+	short port = -1;
+
+	test_ok = 0;
+	fprintf(stdout, "Testing Bad HTTP Request: ");
+
+	http = http_setup(&port, NULL);
+	
+	fd = http_connect("127.0.0.1", port);
+
+	/* Stupid thing to send a request */
+	bev = bufferevent_new(fd, http_failure_readcb, http_writecb,
+	    http_errorcb, NULL);
+
+	http_request = "illegal request\r\n";
+
+	bufferevent_write(bev, http_request, strlen(http_request));
+	
+	event_dispatch();
+
+	bufferevent_free(bev);
+	EVUTIL_CLOSESOCKET(fd);
+
+	evhttp_free(http);
+	
+	if (test_ok != 2) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+	
+	fprintf(stdout, "OK\n");
+}
+
+static void
+close_detect_done(struct evhttp_request *req, void *arg)
+{
+	struct timeval tv;
+	if (req == NULL || req->response_code != HTTP_OK) {
+	
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	test_ok = 1;
+
+	timerclear(&tv);
+	tv.tv_sec = 3;   /* longer than the http time out */
+
+	event_loopexit(&tv);
+}
+
+static void
+close_detect_launch(int fd, short what, void *arg)
+{
+	struct evhttp_connection *evcon = arg;
+	struct evhttp_request *req;
+
+	req = evhttp_request_new(close_detect_done, NULL);
+
+	/* Add the information that we care about */
+	evhttp_add_header(req->output_headers, "Host", "somehost");
+
+	/* We give ownership of the request to the connection */
+	if (evhttp_make_request(evcon, req, EVHTTP_REQ_GET, "/test") == -1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+}
+
+static void
+close_detect_cb(struct evhttp_request *req, void *arg)
+{
+	struct evhttp_connection *evcon = arg;
+	struct timeval tv;
+
+	if (req != NULL && req->response_code != HTTP_OK) {
+	
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	timerclear(&tv);
+	tv.tv_sec = 3;   /* longer than the http time out */
+
+	/* launch a new request on the persistent connection in 6 seconds */
+	event_once(-1, EV_TIMEOUT, close_detect_launch, evcon, &tv);
+}
+
+
+static void
+http_close_detection(int with_delay)
+{
+	short port = -1;
+	struct evhttp_connection *evcon = NULL;
+	struct evhttp_request *req = NULL;
+	
+	test_ok = 0;
+	fprintf(stdout, "Testing Connection Close Detection%s: ",
+		with_delay ? " (with delay)" : "");
+
+	http = http_setup(&port, NULL);
+
+	/* 2 second timeout */
+	evhttp_set_timeout(http, 2);
+
+	evcon = evhttp_connection_new("127.0.0.1", port);
+	if (evcon == NULL) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	delayed_client = evcon;
+
+	/*
+	 * At this point, we want to schedule a request to the HTTP
+	 * server using our make request method.
+	 */
+
+	req = evhttp_request_new(close_detect_cb, evcon);
+
+	/* Add the information that we care about */
+	evhttp_add_header(req->output_headers, "Host", "somehost");
+
+	/* We give ownership of the request to the connection */
+	if (evhttp_make_request(evcon,
+	    req, EVHTTP_REQ_GET, with_delay ? "/largedelay" : "/test") == -1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	event_dispatch();
+
+	if (test_ok != 1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/* at this point, the http server should have no connection */
+	if (TAILQ_FIRST(&http->connections) != NULL) {
+		fprintf(stdout, "FAILED (left connections)\n");
+		exit(1);
+	}
+
+	evhttp_connection_free(evcon);
+	evhttp_free(http);
+	
+	fprintf(stdout, "OK\n");
+}
+
+static void
+http_highport_test(void)
+{
+	int i = -1;
+	struct evhttp *myhttp = NULL;
+ 
+	fprintf(stdout, "Testing HTTP Server with high port: ");
+
+	/* Try a few different ports */
+	for (i = 0; i < 50; ++i) {
+		myhttp = evhttp_start("127.0.0.1", 65535 - i);
+		if (myhttp != NULL) {
+			fprintf(stdout, "OK\n");
+			evhttp_free(myhttp);
+			return;
+		}
+	}
+
+	fprintf(stdout, "FAILED\n");
+	exit(1);
+}
+
+static void
+http_bad_header_test(void)
+{
+	struct evkeyvalq headers;
+
+	fprintf(stdout, "Testing HTTP Header filtering: ");
+
+	TAILQ_INIT(&headers);
+
+	if (evhttp_add_header(&headers, "One", "Two") != 0)
+		goto fail;
+	
+	if (evhttp_add_header(&headers, "One\r", "Two") != -1)
+		goto fail;
+	if (evhttp_add_header(&headers, "One", "Two") != 0)
+		goto fail;
+	if (evhttp_add_header(&headers, "One", "Two\r\n Three") != 0)
+		goto fail;
+	if (evhttp_add_header(&headers, "One\r", "Two") != -1)
+		goto fail;
+	if (evhttp_add_header(&headers, "One\n", "Two") != -1)
+		goto fail;
+	if (evhttp_add_header(&headers, "One", "Two\r") != -1)
+		goto fail;
+	if (evhttp_add_header(&headers, "One", "Two\n") != -1)
+		goto fail;
+
+	evhttp_clear_headers(&headers);
+
+	fprintf(stdout, "OK\n");
+	return;
+fail:
+	fprintf(stdout, "FAILED\n");
+	exit(1);
+}
+
+static int validate_header(
+	const struct evkeyvalq* headers,
+	const char *key, const char *value) 
+{
+	const char *real_val = evhttp_find_header(headers, key);
+	if (real_val == NULL)
+		return (-1);
+	if (strcmp(real_val, value) != 0)
+		return (-1);
+	return (0);
+}
+
+static void
+http_parse_query_test(void)
+{
+	struct evkeyvalq headers;
+
+	fprintf(stdout, "Testing HTTP query parsing: ");
+
+	TAILQ_INIT(&headers);
+	
+	evhttp_parse_query("http://www.test.com/?q=test", &headers);
+	if (validate_header(&headers, "q", "test") != 0)
+		goto fail;
+	evhttp_clear_headers(&headers);
+
+	evhttp_parse_query("http://www.test.com/?q=test&foo=bar", &headers);
+	if (validate_header(&headers, "q", "test") != 0)
+		goto fail;
+	if (validate_header(&headers, "foo", "bar") != 0)
+		goto fail;
+	evhttp_clear_headers(&headers);
+
+	evhttp_parse_query("http://www.test.com/?q=test+foo", &headers);
+	if (validate_header(&headers, "q", "test foo") != 0)
+		goto fail;
+	evhttp_clear_headers(&headers);
+
+	evhttp_parse_query("http://www.test.com/?q=test%0Afoo", &headers);
+	if (validate_header(&headers, "q", "test\nfoo") != 0)
+		goto fail;
+	evhttp_clear_headers(&headers);
+
+	evhttp_parse_query("http://www.test.com/?q=test%0Dfoo", &headers);
+	if (validate_header(&headers, "q", "test\rfoo") != 0)
+		goto fail;
+	evhttp_clear_headers(&headers);
+
+	fprintf(stdout, "OK\n");
+	return;
+fail:
+	fprintf(stdout, "FAILED\n");
+	exit(1);
+}
+
+static void
+http_base_test(void)
+{
+	struct bufferevent *bev;
+	int fd;
+	const char *http_request;
+	short port = -1;
+
+	test_ok = 0;
+	fprintf(stdout, "Testing HTTP Server Event Base: ");
+
+	base = event_init();
+
+	/* 
+	 * create another bogus base - which is being used by all subsequen
+	 * tests - yuck!
+	 */
+	event_init();
+
+	http = http_setup(&port, base);
+	
+	fd = http_connect("127.0.0.1", port);
+
+	/* Stupid thing to send a request */
+	bev = bufferevent_new(fd, http_readcb, http_writecb,
+	    http_errorcb, NULL);
+	bufferevent_base_set(base, bev);
+
+	http_request =
+	    "GET /test HTTP/1.1\r\n"
+	    "Host: somehost\r\n"
+	    "Connection: close\r\n"
+	    "\r\n";
+
+	bufferevent_write(bev, http_request, strlen(http_request));
+	
+	event_base_dispatch(base);
+
+	bufferevent_free(bev);
+	EVUTIL_CLOSESOCKET(fd);
+
+	evhttp_free(http);
+
+	event_base_free(base);
+	base = NULL;
+	
+	if (test_ok != 2) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+	
+	fprintf(stdout, "OK\n");
+}
+
+/*
+ * the server is going to reply with chunked data.
+ */
+
+static void
+http_chunked_readcb(struct bufferevent *bev, void *arg)
+{
+	/* nothing here */
+}
+
+static void
+http_chunked_errorcb(struct bufferevent *bev, short what, void *arg)
+{
+	if (!test_ok)
+		goto out;
+
+	test_ok = -1;
+
+	if ((what & EVBUFFER_EOF) != 0) {
+		struct evhttp_request *req = evhttp_request_new(NULL, NULL);
+		const char *header;
+		enum message_read_status done;
+		
+		req->kind = EVHTTP_RESPONSE;
+		done = evhttp_parse_firstline(req, EVBUFFER_INPUT(bev));
+		if (done != ALL_DATA_READ)
+			goto out;
+
+		done = evhttp_parse_headers(req, EVBUFFER_INPUT(bev));
+		if (done != ALL_DATA_READ)
+			goto out;
+
+		header = evhttp_find_header(req->input_headers, "Transfer-Encoding");
+		if (header == NULL || strcmp(header, "chunked"))
+			goto out;
+
+		header = evhttp_find_header(req->input_headers, "Connection");
+		if (header == NULL || strcmp(header, "close"))
+			goto out;
+
+		header = evbuffer_readline(EVBUFFER_INPUT(bev));
+		if (header == NULL)
+			goto out;
+		/* 13 chars */
+		if (strcmp(header, "d"))
+			goto out;
+		free((char*)header);
+
+		if (strncmp((char *)EVBUFFER_DATA(EVBUFFER_INPUT(bev)),
+			"This is funny", 13))
+			goto out;
+
+		evbuffer_drain(EVBUFFER_INPUT(bev), 13 + 2);
+
+		header = evbuffer_readline(EVBUFFER_INPUT(bev));
+		if (header == NULL)
+			goto out;
+		/* 18 chars */
+		if (strcmp(header, "12"))
+			goto out;
+		free((char *)header);
+
+		if (strncmp((char *)EVBUFFER_DATA(EVBUFFER_INPUT(bev)),
+			"but not hilarious.", 18))
+			goto out;
+
+		evbuffer_drain(EVBUFFER_INPUT(bev), 18 + 2);
+
+		header = evbuffer_readline(EVBUFFER_INPUT(bev));
+		if (header == NULL)
+			goto out;
+		/* 8 chars */
+		if (strcmp(header, "8"))
+			goto out;
+		free((char *)header);
+
+		if (strncmp((char *)EVBUFFER_DATA(EVBUFFER_INPUT(bev)),
+			"bwv 1052.", 8))
+			goto out;
+
+		evbuffer_drain(EVBUFFER_INPUT(bev), 8 + 2);
+
+		header = evbuffer_readline(EVBUFFER_INPUT(bev));
+		if (header == NULL)
+			goto out;
+		/* 0 chars */
+		if (strcmp(header, "0"))
+			goto out;
+		free((char *)header);
+
+		test_ok = 2;
+	}
+
+out:
+	event_loopexit(NULL);
+}
+
+static void
+http_chunked_writecb(struct bufferevent *bev, void *arg)
+{
+	if (EVBUFFER_LENGTH(EVBUFFER_OUTPUT(bev)) == 0) {
+		/* enable reading of the reply */
+		bufferevent_enable(bev, EV_READ);
+		test_ok++;
+	}
+}
+
+static void
+http_chunked_request_done(struct evhttp_request *req, void *arg)
+{
+	if (req->response_code != HTTP_OK) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	if (evhttp_find_header(req->input_headers,
+		"Transfer-Encoding") == NULL) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	if (EVBUFFER_LENGTH(req->input_buffer) != 13 + 18 + 8) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	if (strncmp((char *)EVBUFFER_DATA(req->input_buffer),
+		"This is funnybut not hilarious.bwv 1052",
+		13 + 18 + 8)) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+	
+	test_ok = 1;
+	event_loopexit(NULL);
+}
+
+static void
+http_chunked_test(void)
+{
+	struct bufferevent *bev;
+	int fd;
+	const char *http_request;
+	short port = -1;
+	struct timeval tv_start, tv_end;
+	struct evhttp_connection *evcon = NULL;
+	struct evhttp_request *req = NULL;
+	int i;
+
+	test_ok = 0;
+	fprintf(stdout, "Testing Chunked HTTP Reply: ");
+
+	http = http_setup(&port, NULL);
+
+	fd = http_connect("127.0.0.1", port);
+
+	/* Stupid thing to send a request */
+	bev = bufferevent_new(fd, 
+	    http_chunked_readcb, http_chunked_writecb,
+	    http_chunked_errorcb, NULL);
+
+	http_request =
+	    "GET /chunked HTTP/1.1\r\n"
+	    "Host: somehost\r\n"
+	    "Connection: close\r\n"
+	    "\r\n";
+
+	bufferevent_write(bev, http_request, strlen(http_request));
+
+	evutil_gettimeofday(&tv_start, NULL);
+	
+	event_dispatch();
+
+	evutil_gettimeofday(&tv_end, NULL);
+	evutil_timersub(&tv_end, &tv_start, &tv_end);
+
+	if (tv_end.tv_sec >= 1) {
+		fprintf(stdout, "FAILED (time)\n");
+		exit (1);
+	}
+
+
+	if (test_ok != 2) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/* now try again with the regular connection object */
+	evcon = evhttp_connection_new("127.0.0.1", port);
+	if (evcon == NULL) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/* make two requests to check the keepalive behavior */
+	for (i = 0; i < 2; i++) {
+		test_ok = 0;
+		req = evhttp_request_new(http_chunked_request_done, NULL);
+
+		/* Add the information that we care about */
+		evhttp_add_header(req->output_headers, "Host", "somehost");
+
+		/* We give ownership of the request to the connection */
+		if (evhttp_make_request(evcon, req,
+			EVHTTP_REQ_GET, "/chunked") == -1) {
+			fprintf(stdout, "FAILED\n");
+			exit(1);
+		}
+
+		event_dispatch();
+
+		if (test_ok != 1) {
+			fprintf(stdout, "FAILED\n");
+			exit(1);
+		}
+	}
+
+	evhttp_connection_free(evcon);
+	evhttp_free(http);
+	
+	fprintf(stdout, "OK\n");
+}
+
+static void
+http_multi_line_header_test(void)
+{
+	struct bufferevent *bev;
+	int fd;
+	const char *http_start_request;
+	short port = -1;
+	
+	test_ok = 0;
+	fprintf(stdout, "Testing HTTP Server with multi line: ");
+
+	http = http_setup(&port, NULL);
+	
+	fd = http_connect("127.0.0.1", port);
+
+	/* Stupid thing to send a request */
+	bev = bufferevent_new(fd, http_readcb, http_writecb,
+	    http_errorcb, NULL);
+
+	http_start_request =
+	    "GET /test HTTP/1.1\r\n"
+	    "Host: somehost\r\n"
+	    "Connection: close\r\n"
+	    "X-Multi:  aaaaaaaa\r\n"
+	    " a\r\n"
+	    "\tEND\r\n"
+	    "X-Last: last\r\n"
+	    "\r\n";
+		
+	bufferevent_write(bev, http_start_request, strlen(http_start_request));
+
+	event_dispatch();
+	
+	bufferevent_free(bev);
+	EVUTIL_CLOSESOCKET(fd);
+
+	evhttp_free(http);
+
+	if (test_ok != 4) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+	
+	fprintf(stdout, "OK\n");
+}
+
+static void
+http_request_bad(struct evhttp_request *req, void *arg)
+{
+	if (req != NULL) {
+		fprintf(stderr, "FAILED\n");
+		exit(1);
+	}
+
+	test_ok = 1;
+	event_loopexit(NULL);
+}
+
+static void
+http_negative_content_length_test(void)
+{
+	short port = -1;
+	struct evhttp_connection *evcon = NULL;
+	struct evhttp_request *req = NULL;
+	
+	test_ok = 0;
+	fprintf(stdout, "Testing HTTP Negative Content Length: ");
+
+	http = http_setup(&port, NULL);
+
+	evcon = evhttp_connection_new("127.0.0.1", port);
+	if (evcon == NULL) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/*
+	 * At this point, we want to schedule a request to the HTTP
+	 * server using our make request method.
+	 */
+
+	req = evhttp_request_new(http_request_bad, NULL);
+
+	/* Cause the response to have a negative content-length */
+	evhttp_add_header(req->output_headers, "X-Negative", "makeitso");
+
+	/* We give ownership of the request to the connection */
+	if (evhttp_make_request(evcon, req, EVHTTP_REQ_GET, "/test") == -1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	event_dispatch();
+
+	evhttp_free(http);
+
+	if (test_ok != 1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	fprintf(stdout, "OK\n");
+}
+
+void
+http_suite(void)
+{
+	http_base_test();
+	http_bad_header_test();
+	http_parse_query_test();
+	http_basic_test();
+	http_connection_test(0 /* not-persistent */);
+	http_connection_test(1 /* persistent */);
+	http_close_detection(0 /* with delay */);
+	http_close_detection(1 /* with delay */);
+	http_post_test();
+	http_failure_test();
+	http_highport_test();
+	http_dispatcher_test();
+
+	http_multi_line_header_test();
+	http_negative_content_length_test();
+
+	http_chunked_test();
+}
diff --git a/libevent/test/regress_rpc.c b/libevent/test/regress_rpc.c
new file mode 100644
index 00000000000..760934766a1
--- /dev/null
+++ b/libevent/test/regress_rpc.c
@@ -0,0 +1,631 @@
+/*
+ * Copyright (c) 2003-2006 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef WIN32
+#include <winsock2.h>
+#include <windows.h>
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <sys/queue.h>
+#ifndef WIN32
+#include <sys/socket.h>
+#include <signal.h>
+#include <unistd.h>
+#include <netdb.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "event.h"
+#include "evhttp.h"
+#include "log.h"
+#include "evrpc.h"
+
+#include "regress.gen.h"
+
+void rpc_suite(void);
+
+extern int test_ok;
+
+static struct evhttp *
+http_setup(short *pport)
+{
+	int i;
+	struct evhttp *myhttp;
+	short port = -1;
+
+	/* Try a few different ports */
+	for (i = 0; i < 50; ++i) {
+		myhttp = evhttp_start("127.0.0.1", 8080 + i);
+		if (myhttp != NULL) {
+			port = 8080 + i;
+			break;
+		}
+	}
+
+	if (port == -1)
+		event_errx(1, "Could not start web server");
+
+	*pport = port;
+	return (myhttp);
+}
+
+EVRPC_HEADER(Message, msg, kill);
+EVRPC_HEADER(NeverReply, msg, kill);
+
+EVRPC_GENERATE(Message, msg, kill);
+EVRPC_GENERATE(NeverReply, msg, kill);
+
+static int need_input_hook = 0;
+static int need_output_hook = 0;
+
+static void
+MessageCb(EVRPC_STRUCT(Message)* rpc, void *arg)
+{
+	struct kill* kill_reply = rpc->reply;
+
+	if (need_input_hook) {
+		struct evhttp_request* req = EVRPC_REQUEST_HTTP(rpc);
+		const char *header = evhttp_find_header(
+			req->input_headers, "X-Hook");
+		assert(strcmp(header, "input") == 0);
+	}
+
+	/* we just want to fill in some non-sense */
+	EVTAG_ASSIGN(kill_reply, weapon, "dagger");
+	EVTAG_ASSIGN(kill_reply, action, "wave around like an idiot");
+
+	/* no reply to the RPC */
+	EVRPC_REQUEST_DONE(rpc);
+}
+
+static EVRPC_STRUCT(NeverReply) *saved_rpc;
+
+static void
+NeverReplyCb(EVRPC_STRUCT(NeverReply)* rpc, void *arg)
+{
+	test_ok += 1;
+	saved_rpc = rpc;
+}
+
+static void
+rpc_setup(struct evhttp **phttp, short *pport, struct evrpc_base **pbase)
+{
+	short port;
+	struct evhttp *http = NULL;
+	struct evrpc_base *base = NULL;
+
+	http = http_setup(&port);
+	base = evrpc_init(http);
+	
+	EVRPC_REGISTER(base, Message, msg, kill, MessageCb, NULL);
+	EVRPC_REGISTER(base, NeverReply, msg, kill, NeverReplyCb, NULL);
+
+	*phttp = http;
+	*pport = port;
+	*pbase = base;
+
+	need_input_hook = 0;
+	need_output_hook = 0;
+}
+
+static void
+rpc_teardown(struct evrpc_base *base)
+{
+	assert(EVRPC_UNREGISTER(base, Message) == 0);
+	assert(EVRPC_UNREGISTER(base, NeverReply) == 0);
+
+	evrpc_free(base);
+}
+
+static void
+rpc_postrequest_failure(struct evhttp_request *req, void *arg)
+{
+	if (req->response_code != HTTP_SERVUNAVAIL) {
+	
+		fprintf(stderr, "FAILED (response code)\n");
+		exit(1);
+	}
+
+	test_ok = 1;
+	event_loopexit(NULL);
+}
+
+/*
+ * Test a malformed payload submitted as an RPC
+ */
+
+static void
+rpc_basic_test(void)
+{
+	short port;
+	struct evhttp *http = NULL;
+	struct evrpc_base *base = NULL;
+	struct evhttp_connection *evcon = NULL;
+	struct evhttp_request *req = NULL;
+
+	fprintf(stdout, "Testing Basic RPC Support: ");
+
+	rpc_setup(&http, &port, &base);
+
+	evcon = evhttp_connection_new("127.0.0.1", port);
+	if (evcon == NULL) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/*
+	 * At this point, we want to schedule an HTTP POST request
+	 * server using our make request method.
+	 */
+
+	req = evhttp_request_new(rpc_postrequest_failure, NULL);
+	if (req == NULL) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/* Add the information that we care about */
+	evhttp_add_header(req->output_headers, "Host", "somehost");
+	evbuffer_add_printf(req->output_buffer, "Some Nonsense");
+	
+	if (evhttp_make_request(evcon, req,
+		EVHTTP_REQ_POST,
+		"/.rpc.Message") == -1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	test_ok = 0;
+
+	event_dispatch();
+
+	evhttp_connection_free(evcon);
+
+	rpc_teardown(base);
+	
+	if (test_ok != 1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	fprintf(stdout, "OK\n");
+
+	evhttp_free(http);
+}
+
+static void
+rpc_postrequest_done(struct evhttp_request *req, void *arg)
+{
+	struct kill* kill_reply = NULL;
+
+	if (req->response_code != HTTP_OK) {
+	
+		fprintf(stderr, "FAILED (response code)\n");
+		exit(1);
+	}
+
+	kill_reply = kill_new();
+
+	if ((kill_unmarshal(kill_reply, req->input_buffer)) == -1) {
+		fprintf(stderr, "FAILED (unmarshal)\n");
+		exit(1);
+	}
+	
+	kill_free(kill_reply);
+
+	test_ok = 1;
+	event_loopexit(NULL);
+}
+
+static void
+rpc_basic_message(void)
+{
+	short port;
+	struct evhttp *http = NULL;
+	struct evrpc_base *base = NULL;
+	struct evhttp_connection *evcon = NULL;
+	struct evhttp_request *req = NULL;
+	struct msg *msg;
+
+	fprintf(stdout, "Testing Good RPC Post: ");
+
+	rpc_setup(&http, &port, &base);
+
+	evcon = evhttp_connection_new("127.0.0.1", port);
+	if (evcon == NULL) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/*
+	 * At this point, we want to schedule an HTTP POST request
+	 * server using our make request method.
+	 */
+
+	req = evhttp_request_new(rpc_postrequest_done, NULL);
+	if (req == NULL) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	/* Add the information that we care about */
+	evhttp_add_header(req->output_headers, "Host", "somehost");
+
+	/* set up the basic message */
+	msg = msg_new();
+	EVTAG_ASSIGN(msg, from_name, "niels");
+	EVTAG_ASSIGN(msg, to_name, "tester");
+	msg_marshal(req->output_buffer, msg);
+	msg_free(msg);
+
+	if (evhttp_make_request(evcon, req,
+		EVHTTP_REQ_POST,
+		"/.rpc.Message") == -1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	test_ok = 0;
+
+	event_dispatch();
+
+	evhttp_connection_free(evcon);
+	
+	rpc_teardown(base);
+	
+	if (test_ok != 1) {
+		fprintf(stdout, "FAILED\n");
+		exit(1);
+	}
+
+	fprintf(stdout, "OK\n");
+
+	evhttp_free(http);
+}
+
+static struct evrpc_pool *
+rpc_pool_with_connection(short port)
+{
+	struct evhttp_connection *evcon;
+	struct evrpc_pool *pool;
+
+	pool = evrpc_pool_new(NULL);
+	assert(pool != NULL);
+
+	evcon = evhttp_connection_new("127.0.0.1", port);
+	assert(evcon != NULL);
+
+	evrpc_pool_add_connection(pool, evcon);
+	
+	return (pool);
+}
+
+static void
+GotKillCb(struct evrpc_status *status,
+    struct msg *msg, struct kill *kill, void *arg)
+{
+	char *weapon;
+	char *action;
+
+	if (need_output_hook) {
+		struct evhttp_request *req = status->http_req;
+		const char *header = evhttp_find_header(
+			req->input_headers, "X-Pool-Hook");
+		assert(strcmp(header, "ran") == 0);
+	}
+
+	if (status->error != EVRPC_STATUS_ERR_NONE)
+		goto done;
+
+	if (EVTAG_GET(kill, weapon, &weapon) == -1) {
+		fprintf(stderr, "get weapon\n");
+		goto done;
+	}
+	if (EVTAG_GET(kill, action, &action) == -1) {
+		fprintf(stderr, "get action\n");
+		goto done;
+	}
+
+	if (strcmp(weapon, "dagger"))
+		goto done;
+
+	if (strcmp(action, "wave around like an idiot"))
+		goto done;
+
+	test_ok += 1;
+
+done:
+	event_loopexit(NULL);
+}
+
+static void
+GotKillCbTwo(struct evrpc_status *status,
+    struct msg *msg, struct kill *kill, void *arg)
+{
+	char *weapon;
+	char *action;
+
+	if (status->error != EVRPC_STATUS_ERR_NONE)
+		goto done;
+
+	if (EVTAG_GET(kill, weapon, &weapon) == -1) {
+		fprintf(stderr, "get weapon\n");
+		goto done;
+	}
+	if (EVTAG_GET(kill, action, &action) == -1) {
+		fprintf(stderr, "get action\n");
+		goto done;
+	}
+
+	if (strcmp(weapon, "dagger"))
+		goto done;
+
+	if (strcmp(action, "wave around like an idiot"))
+		goto done;
+
+	test_ok += 1;
+
+done:
+	if (test_ok == 2)
+		event_loopexit(NULL);
+}
+
+static int
+rpc_hook_add_header(struct evhttp_request *req,
+    struct evbuffer *evbuf, void *arg)
+{
+	const char *hook_type = arg;
+	if (strcmp("input", hook_type) == 0)
+		evhttp_add_header(req->input_headers, "X-Hook", hook_type);
+	else 
+		evhttp_add_header(req->output_headers, "X-Hook", hook_type);
+	return (0);
+}
+
+static int
+rpc_hook_remove_header(struct evhttp_request *req,
+    struct evbuffer *evbuf, void *arg)
+{
+	const char *header = evhttp_find_header(req->input_headers, "X-Hook");
+	assert(header != NULL);
+	assert(strcmp(header, arg) == 0);
+	evhttp_remove_header(req->input_headers, "X-Hook");
+	evhttp_add_header(req->input_headers, "X-Pool-Hook", "ran");
+
+	return (0);
+}
+
+static void
+rpc_basic_client(void)
+{
+	short port;
+	struct evhttp *http = NULL;
+	struct evrpc_base *base = NULL;
+	struct evrpc_pool *pool = NULL;
+	struct msg *msg;
+	struct kill *kill;
+
+	fprintf(stdout, "Testing RPC Client: ");
+
+	rpc_setup(&http, &port, &base);
+
+	need_input_hook = 1;
+	need_output_hook = 1;
+
+	assert(evrpc_add_hook(base, EVRPC_INPUT, rpc_hook_add_header, (void*)"input")
+	    != NULL);
+	assert(evrpc_add_hook(base, EVRPC_OUTPUT, rpc_hook_add_header, (void*)"output")
+	    != NULL);
+
+	pool = rpc_pool_with_connection(port);
+
+	assert(evrpc_add_hook(pool, EVRPC_INPUT, rpc_hook_remove_header, (void*)"output"));
+
+	/* set up the basic message */
+	msg = msg_new();
+	EVTAG_ASSIGN(msg, from_name, "niels");
+	EVTAG_ASSIGN(msg, to_name, "tester");
+
+	kill = kill_new();
+
+	EVRPC_MAKE_REQUEST(Message, pool, msg, kill,  GotKillCb, NULL);
+
+	test_ok = 0;
+
+	event_dispatch();
+	
+	if (test_ok != 1) {
+		fprintf(stdout, "FAILED (1)\n");
+		exit(1);
+	}
+
+	/* we do it twice to make sure that reuse works correctly */
+	kill_clear(kill);
+
+	EVRPC_MAKE_REQUEST(Message, pool, msg, kill,  GotKillCb, NULL);
+
+	event_dispatch();
+	
+	rpc_teardown(base);
+	
+	if (test_ok != 2) {
+		fprintf(stdout, "FAILED (2)\n");
+		exit(1);
+	}
+
+	fprintf(stdout, "OK\n");
+
+	msg_free(msg);
+	kill_free(kill);
+
+	evrpc_pool_free(pool);
+	evhttp_free(http);
+}
+
+/* 
+ * We are testing that the second requests gets send over the same
+ * connection after the first RPCs completes.
+ */
+static void
+rpc_basic_queued_client(void)
+{
+	short port;
+	struct evhttp *http = NULL;
+	struct evrpc_base *base = NULL;
+	struct evrpc_pool *pool = NULL;
+	struct msg *msg;
+	struct kill *kill_one, *kill_two;
+
+	fprintf(stdout, "Testing RPC (Queued) Client: ");
+
+	rpc_setup(&http, &port, &base);
+
+	pool = rpc_pool_with_connection(port);
+
+	/* set up the basic message */
+	msg = msg_new();
+	EVTAG_ASSIGN(msg, from_name, "niels");
+	EVTAG_ASSIGN(msg, to_name, "tester");
+
+	kill_one = kill_new();
+	kill_two = kill_new();
+
+	EVRPC_MAKE_REQUEST(Message, pool, msg, kill_one,  GotKillCbTwo, NULL);
+	EVRPC_MAKE_REQUEST(Message, pool, msg, kill_two,  GotKillCb, NULL);
+
+	test_ok = 0;
+
+	event_dispatch();
+	
+	rpc_teardown(base);
+	
+	if (test_ok != 2) {
+		fprintf(stdout, "FAILED (1)\n");
+		exit(1);
+	}
+
+	fprintf(stdout, "OK\n");
+
+	msg_free(msg);
+	kill_free(kill_one);
+	kill_free(kill_two);
+
+	evrpc_pool_free(pool);
+	evhttp_free(http);
+}
+
+static void
+GotErrorCb(struct evrpc_status *status,
+    struct msg *msg, struct kill *kill, void *arg)
+{
+	if (status->error != EVRPC_STATUS_ERR_TIMEOUT)
+		goto done;
+
+	/* should never be complete but just to check */
+	if (kill_complete(kill) == 0)
+		goto done;
+
+	test_ok += 1;
+
+done:
+	event_loopexit(NULL);
+}
+
+static void
+rpc_client_timeout(void)
+{
+	short port;
+	struct evhttp *http = NULL;
+	struct evrpc_base *base = NULL;
+	struct evrpc_pool *pool = NULL;
+	struct msg *msg;
+	struct kill *kill;
+
+	fprintf(stdout, "Testing RPC Client Timeout: ");
+
+	rpc_setup(&http, &port, &base);
+
+	pool = rpc_pool_with_connection(port);
+
+	/* set the timeout to 5 seconds */
+	evrpc_pool_set_timeout(pool, 5);
+
+	/* set up the basic message */
+	msg = msg_new();
+	EVTAG_ASSIGN(msg, from_name, "niels");
+	EVTAG_ASSIGN(msg, to_name, "tester");
+
+	kill = kill_new();
+
+	EVRPC_MAKE_REQUEST(NeverReply, pool, msg, kill, GotErrorCb, NULL);
+
+	test_ok = 0;
+
+	event_dispatch();
+	
+	/* free the saved RPC structure up */
+	EVRPC_REQUEST_DONE(saved_rpc);
+
+	rpc_teardown(base);
+	
+	if (test_ok != 2) {
+		fprintf(stdout, "FAILED (1)\n");
+		exit(1);
+	}
+
+	fprintf(stdout, "OK\n");
+
+	msg_free(msg);
+	kill_free(kill);
+
+	evrpc_pool_free(pool);
+	evhttp_free(http);
+}
+
+void
+rpc_suite(void)
+{
+	rpc_basic_test();
+	rpc_basic_message();
+	rpc_basic_client();
+	rpc_basic_queued_client();
+	rpc_client_timeout();
+}
diff --git a/libevent/test/test-eof.c b/libevent/test/test-eof.c
new file mode 100644
index 00000000000..4fc1a19f224
--- /dev/null
+++ b/libevent/test/test-eof.c
@@ -0,0 +1,82 @@
+/*
+ * Compile with:
+ * cc -I/usr/local/include -o time-test time-test.c -L/usr/local/lib -levent
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#ifdef WIN32
+#include <winsock2.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <event.h>
+#include <evutil.h>
+
+int test_okay = 1;
+int called = 0;
+
+static void
+read_cb(int fd, short event, void *arg)
+{
+	char buf[256];
+	int len;
+
+	len = read(fd, buf, sizeof(buf));
+
+	printf("%s: read %d%s\n", __func__,
+	    len, len ? "" : " - means EOF");
+
+	if (len) {
+		if (!called)
+			event_add(arg, NULL);
+	} else if (called == 1)
+		test_okay = 0;
+
+	called++;
+}
+
+#ifndef SHUT_WR
+#define SHUT_WR 1
+#endif
+
+int
+main (int argc, char **argv)
+{
+	struct event ev;
+	const char *test = "test string";
+	int pair[2];
+
+	if (evutil_socketpair(AF_UNIX, SOCK_STREAM, 0, pair) == -1)
+		return (1);
+
+	
+	write(pair[0], test, strlen(test)+1);
+	shutdown(pair[0], SHUT_WR);
+
+	/* Initalize the event library */
+	event_init();
+
+	/* Initalize one event */
+	event_set(&ev, pair[1], EV_READ, read_cb, &ev);
+
+	event_add(&ev, NULL);
+
+	event_dispatch();
+
+	return (test_okay);
+}
+
diff --git a/libevent/test/test-init.c b/libevent/test/test-init.c
new file mode 100644
index 00000000000..c368715fd67
--- /dev/null
+++ b/libevent/test/test-init.c
@@ -0,0 +1,33 @@
+/*
+ * Compile with:
+ * cc -I/usr/local/include -o time-test time-test.c -L/usr/local/lib -levent
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <event.h>
+
+int
+main(int argc, char **argv)
+{
+	/* Initalize the event library */
+	event_init();
+
+	return (0);
+}
+
diff --git a/libevent/test/test-time.c b/libevent/test/test-time.c
new file mode 100644
index 00000000000..a847d55ef38
--- /dev/null
+++ b/libevent/test/test-time.c
@@ -0,0 +1,82 @@
+/*
+ * Compile with:
+ * cc -I/usr/local/include -o time-test time-test.c -L/usr/local/lib -levent
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <event.h>
+
+int called = 0;
+
+#define NEVENT	20000
+
+struct event *ev[NEVENT];
+
+static int
+rand_int(int n)
+{
+#ifdef WIN32
+	return (int)(rand() * n);
+#else
+	return (int)(random() % n);
+#endif
+}
+
+static void
+time_cb(int fd, short event, void *arg)
+{
+	struct timeval tv;
+	int i, j;
+
+	called++;
+
+	if (called < 10*NEVENT) {
+		for (i = 0; i < 10; i++) {
+			j = rand_int(NEVENT);
+			tv.tv_sec = 0;
+			tv.tv_usec = rand_int(50000);
+			if (tv.tv_usec % 2)
+				evtimer_add(ev[j], &tv);
+			else
+				evtimer_del(ev[j]);
+		}
+	}
+}
+
+int
+main (int argc, char **argv)
+{
+	struct timeval tv;
+	int i;
+
+	/* Initalize the event library */
+	event_init();
+
+	for (i = 0; i < NEVENT; i++) {
+		ev[i] = malloc(sizeof(struct event));
+
+		/* Initalize one event */
+		evtimer_set(ev[i], time_cb, ev[i]);
+		tv.tv_sec = 0;
+		tv.tv_usec = rand_int(50000);
+		evtimer_add(ev[i], &tv);
+	}
+
+	event_dispatch();
+
+	return (called < NEVENT);
+}
+
diff --git a/libevent/test/test-weof.c b/libevent/test/test-weof.c
new file mode 100644
index 00000000000..5d87ceb8eb7
--- /dev/null
+++ b/libevent/test/test-weof.c
@@ -0,0 +1,80 @@
+/*
+ * Compile with:
+ * cc -I/usr/local/include -o time-test time-test.c -L/usr/local/lib -levent
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#ifdef WIN32
+#include <winsock2.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <event.h>
+#include <evutil.h>
+
+int pair[2];
+int test_okay = 1;
+int called = 0;
+
+static void
+write_cb(int fd, short event, void *arg)
+{
+	const char *test = "test string";
+	int len;
+
+	len = write(fd, test, strlen(test) + 1);
+
+	printf("%s: write %d%s\n", __func__,
+	    len, len ? "" : " - means EOF");
+
+	if (len > 0) {
+		if (!called)
+			event_add(arg, NULL);
+		close(pair[0]);
+	} else if (called == 1)
+		test_okay = 0;
+
+	called++;
+}
+
+int
+main (int argc, char **argv)
+{
+	struct event ev;
+
+#ifndef WIN32
+	if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
+		return (1);
+#endif
+
+	if (evutil_socketpair(AF_UNIX, SOCK_STREAM, 0, pair) == -1)
+		return (1);
+
+	/* Initalize the event library */
+	event_init();
+
+	/* Initalize one event */
+	event_set(&ev, pair[1], EV_WRITE, write_cb, &ev);
+
+	event_add(&ev, NULL);
+
+	event_dispatch();
+
+	return (test_okay);
+}
+
diff --git a/libevent/test/test.sh b/libevent/test/test.sh
new file mode 100644
index 00000000000..506a1988c34
--- /dev/null
+++ b/libevent/test/test.sh
@@ -0,0 +1,91 @@
+#!/bin/sh
+
+setup () {
+	 EVENT_NOKQUEUE=yes; export EVENT_NOKQUEUE
+	 EVENT_NODEVPOLL=yes; export EVENT_NODEVPOLL
+	 EVENT_NOPOLL=yes; export EVENT_NOPOLL
+	 EVENT_NOSELECT=yes; export EVENT_NOSELECT
+	 EVENT_NOEPOLL=yes; export EVENT_NOEPOLL
+	 EVENT_NOEVPORT=yes; export EVENT_NOEVPORT
+}
+
+test () {
+	if ./test-init 2>/dev/null ;
+	then
+	        true
+	else
+		echo Skipping test
+		return
+	fi	
+
+echo -n " test-eof: "
+if ./test-eof >/dev/null ; 
+then 
+	echo OKAY ; 
+else 
+	echo FAILED ; 
+fi
+echo -n " test-weof: "
+if ./test-weof >/dev/null ; 
+then 
+	echo OKAY ; 
+else 
+	echo FAILED ; 
+fi
+echo -n " test-time: "
+if ./test-time >/dev/null ; 
+then 
+	echo OKAY ; 
+else 
+	echo FAILED ; 
+fi
+echo -n " regress: "
+if ./regress >/dev/null ; 
+then 
+	echo OKAY ; 
+else 
+	echo FAILED ; 
+fi
+}
+
+echo "Running tests:"
+
+# Need to do this by hand?
+setup
+unset EVENT_NOKQUEUE
+export EVENT_NOKQUEUE
+echo "KQUEUE"
+test
+
+setup
+unset EVENT_NODEVPOLL
+export EVENT_NODEVPOLL
+echo "DEVPOLL"
+test
+
+setup
+unset EVENT_NOPOLL
+export EVENT_NOPOLL
+echo "POLL"
+test
+
+setup
+unset EVENT_NOSELECT
+export EVENT_NOSELECT
+echo "SELECT"
+test
+
+setup
+unset EVENT_NOEPOLL
+export EVENT_NOEPOLL
+echo "EPOLL"
+test
+
+setup
+unset EVENT_NOEVPORT
+export EVENT_NOEVPORT
+echo "EVPORT"
+test
+
+
+
diff --git a/libmysql/CMakeLists.txt b/libmysql/CMakeLists.txt
index 9996b167323..e178d546590 100644
--- a/libmysql/CMakeLists.txt
+++ b/libmysql/CMakeLists.txt
@@ -324,7 +324,8 @@ SET(CLIENT_SOURCES
   ../sql-common/client.c 
   ../sql-common/mysql_async.c
   ../sql-common/my_time.c 
-  ../sql-common/client_plugin.c 
+  ../sql-common/client_plugin.c
+  ../sql-common/client_authentication.cc
   ../sql/net_serv.cc
   ../sql-common/pack.c 
   ../sql/password.c
@@ -334,7 +335,7 @@ ADD_CONVENIENCE_LIBRARY(clientlib ${CLIENT_SOURCES})
 DTRACE_INSTRUMENT(clientlib)
 ADD_DEPENDENCIES(clientlib GenError)
 
-SET(LIBS clientlib dbug strings vio mysys ${ZLIB_LIBRARY} ${SSL_LIBRARIES} ${LIBDL})
+SET(LIBS clientlib dbug strings vio mysys mysys_ssl ${ZLIB_LIBRARY} ${SSL_LIBRARIES} ${LIBDL})
 
 # Merge several convenience libraries into one big mysqlclient
 # and link them together into shared library.
diff --git a/libmysql/errmsg.c b/libmysql/errmsg.c
index 4c4485f7ec4..9985fa2233c 100644
--- a/libmysql/errmsg.c
+++ b/libmysql/errmsg.c
@@ -85,6 +85,8 @@ const char *client_errors[]=
   "The number of columns in the result set differs from the number of bound buffers. You must reset the statement, rebind the result set columns, and execute the statement again",
   "This handle is already connected. Use a separate handle for each connection.",
   "Authentication plugin '%s' cannot be loaded: %s",
+  "There is an attribute with the same name already",
+  "Authentication plugin '%s' reported error: %s",
   ""
 };
 
diff --git a/libmysql/libmysql.c b/libmysql/libmysql.c
index 69fce429ab9..251c8f29b70 100644
--- a/libmysql/libmysql.c
+++ b/libmysql/libmysql.c
@@ -1139,7 +1139,7 @@ void my_net_local_init(NET *net)
   my_net_set_read_timeout(net, CLIENT_NET_READ_TIMEOUT);
   my_net_set_write_timeout(net, CLIENT_NET_WRITE_TIMEOUT);
   net->retry_count=  1;
-  net->max_packet_size= max(net_buffer_length, max_allowed_packet);
+  net->max_packet_size= MY_MAX(net_buffer_length, max_allowed_packet);
 }
 
 /*
@@ -3228,7 +3228,7 @@ static void fetch_string_with_conversion(MYSQL_BIND *param, char *value,
       copy_length= end - start;
       /* We've got some data beyond offset: copy up to buffer_length bytes */
       if (param->buffer_length)
-        memcpy(buffer, start, min(copy_length, param->buffer_length));
+        memcpy(buffer, start, MY_MIN(copy_length, param->buffer_length));
     }
     else
       copy_length= 0;
@@ -3455,7 +3455,7 @@ static void fetch_float_with_conversion(MYSQL_BIND *param, MYSQL_FIELD *field,
     size_t len;
     if (field->decimals >= NOT_FIXED_DEC)
       len= my_gcvt(value, type,
-                   (int) min(sizeof(buff)-1, param->buffer_length),
+                   (int) MY_MIN(sizeof(buff)-1, param->buffer_length),
                    buff, NULL);
     else
       len= my_fcvt(value, (int) field->decimals, buff, NULL);
@@ -3765,7 +3765,7 @@ static void fetch_result_bin(MYSQL_BIND *param,
                              uchar **row)
 {
   ulong length= net_field_length(row);
-  ulong copy_length= min(length, param->buffer_length);
+  ulong copy_length= MY_MIN(length, param->buffer_length);
   memcpy(param->buffer, (char *)*row, copy_length);
   *param->length= length;
   *param->error= copy_length < length;
@@ -3777,7 +3777,7 @@ static void fetch_result_str(MYSQL_BIND *param,
                              uchar **row)
 {
   ulong length= net_field_length(row);
-  ulong copy_length= min(length, param->buffer_length);
+  ulong copy_length= MY_MIN(length, param->buffer_length);
   memcpy(param->buffer, (char *)*row, copy_length);
   /* Add an end null if there is room in the buffer */
   if (copy_length != param->buffer_length)
diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt
index 05606942d8e..a1aa5e38580 100644
--- a/mysys/CMakeLists.txt
+++ b/mysys/CMakeLists.txt
@@ -15,15 +15,14 @@
 
 INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR} ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/mysys)
 
-SET(MYSYS_SOURCES  array.c charset-def.c charset.c checksum.c default.c
+SET(MYSYS_SOURCES  array.c charset-def.c charset.c checksum.c my_default.c
 				errors.c hash.c list.c
-                                md5.c md5_compute.cc
                                 mf_cache.c mf_dirname.c mf_fn_ext.c
 				mf_format.c mf_getdate.c mf_iocache.c mf_iocache2.c mf_keycache.c 
 				mf_keycaches.c mf_loadpath.c mf_pack.c mf_path.c mf_qsort.c mf_qsort2.c
 				mf_radix.c mf_same.c mf_sort.c mf_soundex.c mf_arr_appstr.c mf_tempdir.c
 				mf_tempfile.c mf_unixpath.c mf_wcomp.c mulalloc.c my_access.c
-				my_aes.c my_alloc.c my_bit.c my_bitmap.c my_chsize.c
+				my_alloc.c my_bit.c my_bitmap.c my_chsize.c
 				my_compress.c my_copy.c  my_create.c my_delete.c
 				my_div.c my_error.c my_file.c my_fopen.c my_fstream.c 
 				my_gethwaddr.c my_getopt.c my_getsystime.c my_getwd.c my_compare.c my_init.c
@@ -33,7 +32,7 @@ SET(MYSYS_SOURCES  array.c charset-def.c charset.c checksum.c default.c
 				my_static.c my_symlink.c my_symlink2.c my_sync.c my_thr_init.c 
 				my_basename.c
 				my_write.c ptr_cmp.c queues.c stacktrace.c
-				rijndael.c sha1.c string.c thr_alarm.c thr_lock.c thr_mutex.c
+				string.c thr_alarm.c thr_lock.c thr_mutex.c
 				thr_rwlock.c tree.c typelib.c base64.c my_memmem.c my_getpagesize.c
 				lf_alloc-pin.c lf_dynarray.c lf_hash.c
                 safemalloc.c my_new.cc
diff --git a/mysys/array.c b/mysys/array.c
index 60f2202f5b3..cf377f77676 100644
--- a/mysys/array.c
+++ b/mysys/array.c
@@ -48,7 +48,7 @@ my_bool my_init_dynamic_array2(DYNAMIC_ARRAY *array, uint element_size,
   DBUG_ENTER("my_init_dynamic_array2");
   if (!alloc_increment)
   {
-    alloc_increment=max((8192-MALLOC_OVERHEAD)/element_size,16);
+    alloc_increment=MY_MAX((8192-MALLOC_OVERHEAD)/element_size,16);
     if (init_alloc > 8 && alloc_increment > init_alloc * 2)
       alloc_increment=init_alloc*2;
   }
@@ -333,7 +333,7 @@ void delete_dynamic_element(DYNAMIC_ARRAY *array, uint idx)
 
 void freeze_size(DYNAMIC_ARRAY *array)
 {
-  uint elements=max(array->elements,1);
+  uint elements=MY_MAX(array->elements,1);
 
   /*
     Do nothing if we are using a static buffer
diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c
index 6ab6ba3aae0..788537ec87b 100644
--- a/mysys/lf_alloc-pin.c
+++ b/mysys/lf_alloc-pin.c
@@ -287,7 +287,7 @@ struct st_harvester {
 static int harvest_pins(LF_PINS *el, struct st_harvester *hv)
 {
   int i;
-  LF_PINS *el_end= el+min(hv->npins, LF_DYNARRAY_LEVEL_LENGTH);
+  LF_PINS *el_end= el+MY_MIN(hv->npins, LF_DYNARRAY_LEVEL_LENGTH);
   for (; el < el_end; el++)
   {
     for (i= 0; i < LF_PINBOX_PINS; i++)
diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c
index 3d072fd063e..16a77c0fa1a 100644
--- a/mysys/lf_dynarray.c
+++ b/mysys/lf_dynarray.c
@@ -124,7 +124,7 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx)
   {
     uchar *alloc, *data;
     alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element +
-                    max(array->size_of_element, sizeof(void *)),
+                    MY_MAX(array->size_of_element, sizeof(void *)),
                     MYF(MY_WME|MY_ZEROFILL));
     if (unlikely(!alloc))
       return(NULL);
diff --git a/mysys/ma_dyncol.c b/mysys/ma_dyncol.c
index 8a224f1c5e8..33627b85f92 100644
--- a/mysys/ma_dyncol.c
+++ b/mysys/ma_dyncol.c
@@ -3853,20 +3853,19 @@ mariadb_dyncol_val_str(DYNAMIC_STRING *str, DYNAMIC_COLUMN_VALUE *val,
           if (!quote)
           {
             /* convert to the destination */
-            str->length+= copy_and_convert_extended(str->str, bufflen,
-                                                    cs,
-                                                    from, (uint32)len,
-                                                    val->x.string.charset,
-                                                    &dummy_errors);
+            str->length+= my_convert(str->str, bufflen,
+                                     cs,
+                                     from, (uint32)len,
+                                     val->x.string.charset,
+                                     &dummy_errors);
             return ER_DYNCOL_OK;
           }
           if ((alloc= (char *)my_malloc(bufflen, MYF(0))))
           {
-            len=
-              copy_and_convert_extended(alloc, bufflen, cs,
-                                        from, (uint32)len,
-                                        val->x.string.charset,
-                                        &dummy_errors);
+            len= my_convert(alloc, bufflen, cs,
+                            from, (uint32)len,
+                            val->x.string.charset,
+                            &dummy_errors);
             from= alloc;
           }
           else
diff --git a/mysys/mf_dirname.c b/mysys/mf_dirname.c
index 569293f5401..bc827f60d44 100644
--- a/mysys/mf_dirname.c
+++ b/mysys/mf_dirname.c
@@ -78,7 +78,7 @@ size_t dirname_part(char *to, const char *name, size_t *to_res_length)
   SYNPOSIS
     convert_dirname()
     to				Store result here. Must be at least of size
-    				min(FN_REFLEN, strlen(from) + 1) to make room
+    				MY_MIN(FN_REFLEN, strlen(from) + 1) to make room
     				for adding FN_LIBCHAR at the end.
     from			Original filename. May be == to
     from_end			Pointer at end of filename (normally end \0)
diff --git a/mysys/mf_format.c b/mysys/mf_format.c
index 2b2356c08df..3b5b0aa8a4a 100644
--- a/mysys/mf_format.c
+++ b/mysys/mf_format.c
@@ -85,7 +85,7 @@ char * fn_format(char * to, const char *name, const char *dir,
     tmp_length= strlength(startpos);
     DBUG_PRINT("error",("dev: '%s'  ext: '%s'  length: %u",dev,ext,
                         (uint) length));
-    (void) strmake(to,startpos,min(tmp_length,FN_REFLEN-1));
+    (void) strmake(to,startpos,MY_MIN(tmp_length,FN_REFLEN-1));
   }
   else
   {
diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c
index 02e5c5373ae..3fa6ec28f7d 100644
--- a/mysys/mf_iocache.c
+++ b/mysys/mf_iocache.c
@@ -1127,7 +1127,7 @@ static void copy_to_read_buffer(IO_CACHE *write_cache,
   */
   while (write_length)
   {
-    size_t copy_length= min(write_length, write_cache->buffer_length);
+    size_t copy_length= MY_MIN(write_length, write_cache->buffer_length);
     int  __attribute__((unused)) rc;
 
     rc= lock_io_cache(write_cache, write_cache->pos_in_file);
@@ -1285,7 +1285,7 @@ read_append_buffer:
       TODO: figure out if the assert below is needed or correct.
     */
     DBUG_ASSERT(pos_in_file == info->end_of_file);
-    copy_len=min(Count, len_in_buff);
+    copy_len=MY_MIN(Count, len_in_buff);
     memcpy(Buffer, info->append_read_pos, copy_len);
     info->append_read_pos += copy_len;
     Count -= copy_len;
@@ -1394,7 +1394,7 @@ int _my_b_async_read(register IO_CACHE *info, uchar *Buffer, size_t Count)
     }
 #endif
 	/* Copy found bytes to buffer */
-    length=min(Count,read_length);
+    length=MY_MIN(Count,read_length);
     memcpy(Buffer,info->read_pos,(size_t) length);
     Buffer+=length;
     Count-=length;
@@ -1428,7 +1428,7 @@ int _my_b_async_read(register IO_CACHE *info, uchar *Buffer, size_t Count)
       if ((read_length=mysql_file_read(info->file,info->request_pos,
 			               read_length, info->myflags)) == (size_t) -1)
         return info->error= -1;
-      use_length=min(Count,read_length);
+      use_length=MY_MIN(Count,read_length);
       memcpy(Buffer,info->request_pos,(size_t) use_length);
       info->read_pos=info->request_pos+Count;
       info->read_end=info->request_pos+read_length;
diff --git a/mysys/my_aes.c b/mysys/my_aes.c
deleted file mode 100644
index 575d4702dee..00000000000
--- a/mysys/my_aes.c
+++ /dev/null
@@ -1,227 +0,0 @@
-/* Copyright (C) 2002 MySQL AB
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
-
-
-/*
-  Implementation of AES Encryption for MySQL
-  Initial version by Peter Zaitsev  June 2002
-*/
-
-
-#include <my_global.h>
-#include <m_string.h>
-#include "my_aes.h"
-
-enum encrypt_dir { AES_ENCRYPT, AES_DECRYPT };
-
-#define AES_BLOCK_SIZE 16	/* Block size in bytes */
-
-#define AES_BAD_DATA  -1	/* If bad data discovered during decoding */
-
-
-/* The structure for key information */
-typedef struct {
-  int	nr;				/* Number of rounds */
-  uint32   rk[4*(AES_MAXNR + 1)];	/* key schedule */
-} KEYINSTANCE;
-
-
-/*
-  This is internal function just keeps joint code of Key generation
-
-  SYNOPSIS
-    my_aes_create_key()
-    aes_key		Address of Key Instance to be created
-    direction		Direction (are we encoding or decoding)
-    key			Key to use for real key creation
-    key_length		Length of the key
-
-  DESCRIPTION
-
-  RESULT
-    0	ok
-    -1	Error		Note: The current impementation never returns this
-*/
-
-static int my_aes_create_key(KEYINSTANCE *aes_key,
-			     enum encrypt_dir direction, const char *key,
-			     int key_length)
-{
-  uint8 rkey[AES_KEY_LENGTH/8];	 /* The real key to be used for encryption */
-  uint8 *rkey_end=rkey+AES_KEY_LENGTH/8; /* Real key boundary */
-  uint8 *ptr;			/* Start of the real key*/
-  const char *sptr;			/* Start of the working key */
-  const char *key_end=key+key_length;	/* Working key boundary*/
-
-  bzero((char*) rkey,AES_KEY_LENGTH/8);      /* Set initial key  */
-
-  for (ptr= rkey, sptr= key; sptr < key_end; ptr++,sptr++)
-  {
-    if (ptr == rkey_end)
-      ptr= rkey;  /*  Just loop over tmp_key until we used all key */
-    *ptr^= (uint8) *sptr;
-  }
-#ifdef AES_USE_KEY_BITS
-  /*
-   This block is intended to allow more weak encryption if application 
-   build with libmysqld needs to correspond to export regulations
-   It should be never used in normal distribution as does not give 
-   any speed improvement.
-   To get worse security define AES_USE_KEY_BITS to number of bits
-   you want key to be. It should be divisible by 8
-   
-   WARNING: Changing this value results in changing of enryption for 
-   all key lengths  so altering this value will result in impossibility
-   to decrypt data encrypted with previous value       
-  */
-#define AES_USE_KEY_BYTES (AES_USE_KEY_BITS/8)
-  /*
-   To get weaker key we use first AES_USE_KEY_BYTES bytes of created key 
-   and cyclically copy them until we created all required key length
-  */  
-  for (ptr= rkey+AES_USE_KEY_BYTES, sptr=rkey ; ptr < rkey_end; 
-       ptr++,sptr++)
-  {
-    if (sptr == rkey+AES_USE_KEY_BYTES)
-      sptr=rkey;
-    *ptr=*sptr;   
-  }      
-#endif
-  if (direction == AES_DECRYPT)
-     aes_key->nr = rijndaelKeySetupDec(aes_key->rk, rkey, AES_KEY_LENGTH);
-  else
-     aes_key->nr = rijndaelKeySetupEnc(aes_key->rk, rkey, AES_KEY_LENGTH);
-  return 0;
-}
-
-
-/*
-  Crypt buffer with AES encryption algorithm.
-
-  SYNOPSIS
-     my_aes_encrypt()
-     source		Pointer to data for encryption
-     source_length	Size of encryption data
-     dest		Buffer to place encrypted data (must be large enough)
-     key		Key to be used for encryption
-     key_length		Length of the key. Will handle keys of any length
-
-  RETURN
-    >= 0	Size of encrypted data
-    < 0		Error
-*/
-
-int my_aes_encrypt(const char* source, int source_length, char* dest,
-		   const char* key, int key_length)
-{
-  KEYINSTANCE aes_key;
-  uint8 block[AES_BLOCK_SIZE];	/* 128 bit block used for padding */
-  int rc;			/* result codes */
-  int num_blocks;		/* number of complete blocks */
-  char pad_len;			/* pad size for the last block */
-  int i;
-
-  if ((rc= my_aes_create_key(&aes_key,AES_ENCRYPT,key,key_length)))
-    return rc;
-
-  num_blocks = source_length/AES_BLOCK_SIZE;
-
-  for (i = num_blocks; i > 0; i--)   /* Encode complete blocks */
-  {
-    rijndaelEncrypt(aes_key.rk, aes_key.nr, (const uint8*) source,
-		    (uint8*) dest);
-    source+= AES_BLOCK_SIZE;
-    dest+= AES_BLOCK_SIZE;
-  }
-
-  /* Encode the rest. We always have incomplete block */
-  pad_len = AES_BLOCK_SIZE - (source_length - AES_BLOCK_SIZE*num_blocks);
-  memcpy(block, source, 16 - pad_len);
-  bfill(block + AES_BLOCK_SIZE - pad_len, pad_len, pad_len);
-  rijndaelEncrypt(aes_key.rk, aes_key.nr, block, (uint8*) dest);
-  return AES_BLOCK_SIZE*(num_blocks + 1);
-}
-
-
-/*
-  DeCrypt buffer with AES encryption algorithm.
-
-  SYNOPSIS
-    my_aes_decrypt()
-    source		Pointer to data for decryption
-    source_length	Size of encrypted data
-    dest		Buffer to place decrypted data (must be large enough)
-    key			Key to be used for decryption
-    key_length		Length of the key. Will handle keys of any length
-
-  RETURN
-    >= 0	Size of encrypted data
-    < 0		Error
-*/
-
-int my_aes_decrypt(const char *source, int source_length, char *dest,
-		   const char *key, int key_length)
-{
-  KEYINSTANCE aes_key;
-  uint8 block[AES_BLOCK_SIZE];	/* 128 bit block used for padding */
-  int rc;			/* Result codes */
-  int num_blocks;		/* Number of complete blocks */
-  uint pad_len;			/* Pad size for the last block */
-  int i;
-
-  if ((rc=my_aes_create_key(&aes_key,AES_DECRYPT,key,key_length)))
-    return rc;
-
-  num_blocks = source_length/AES_BLOCK_SIZE;
-
-  if ((source_length != num_blocks*AES_BLOCK_SIZE) || num_blocks ==0 )
-    return AES_BAD_DATA; /* Input size has to be even and at least one block */
-
-  for (i = num_blocks-1; i > 0; i--)   /* Decode all but last blocks */
-  {
-    rijndaelDecrypt(aes_key.rk, aes_key.nr, (const uint8*) source,
-		    (uint8*) dest);
-    source+= AES_BLOCK_SIZE;
-    dest+= AES_BLOCK_SIZE;
-  }
-
-  rijndaelDecrypt(aes_key.rk, aes_key.nr, (const uint8*) source, block);
-  /* Use last char in the block as size */
-  pad_len = (uint) (uchar) block[AES_BLOCK_SIZE-1];
-
-  if (pad_len > AES_BLOCK_SIZE)
-    return AES_BAD_DATA;
-  /* We could also check whole padding but we do not really need this */
-
-  memcpy(dest, block, AES_BLOCK_SIZE - pad_len);
-  return AES_BLOCK_SIZE*num_blocks - pad_len;
-}
-
-
-/*
-  Get size of buffer which will be large enough for encrypted data
-
-  SYNOPSIS
-    my_aes_get_size()
-    source_length		Length of data to be encrypted
-
- RETURN
-   Size of buffer required to store encrypted data
-*/
-
-int my_aes_get_size(int source_length)
-{
-  return AES_BLOCK_SIZE*(source_length/AES_BLOCK_SIZE)+AES_BLOCK_SIZE;
-}
diff --git a/mysys/my_alloc.c b/mysys/my_alloc.c
index 6c8a73df4a7..d61c7e171d0 100644
--- a/mysys/my_alloc.c
+++ b/mysys/my_alloc.c
@@ -228,7 +228,7 @@ void *alloc_root(MEM_ROOT *mem_root, size_t length)
   {						/* Time to alloc new block */
     block_size= (mem_root->block_size & ~1) * (mem_root->block_num >> 2);
     get_size= length+ALIGN_SIZE(sizeof(USED_MEM));
-    get_size= max(get_size, block_size);
+    get_size= MY_MAX(get_size, block_size);
 
     if (!(next = (USED_MEM*) my_malloc(get_size,
                                        MYF(MY_WME | ME_FATALERROR |
diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c
index 83d03177eba..8b4dd83ab21 100644
--- a/mysys/my_bitmap.c
+++ b/mysys/my_bitmap.c
@@ -405,7 +405,7 @@ void bitmap_intersect(MY_BITMAP *map, const MY_BITMAP *map2)
 
   DBUG_ASSERT(map->bitmap && map2->bitmap);
 
-  end= to+min(len,len2);
+  end= to+MY_MIN(len,len2);
   while (to < end)
     *to++ &= *from++;
 
diff --git a/mysys/my_compare.c b/mysys/my_compare.c
index 9e192e52fb7..670d377d4a4 100644
--- a/mysys/my_compare.c
+++ b/mysys/my_compare.c
@@ -36,7 +36,7 @@ static int compare_bin(const uchar *a, uint a_length,
                        const uchar *b, uint b_length,
                        my_bool part_key, my_bool skip_end_space)
 {
-  uint length= min(a_length,b_length);
+  uint length= MY_MIN(a_length,b_length);
   const uchar *end= a+ length;
   int flag;
 
@@ -171,7 +171,7 @@ int ha_key_cmp(HA_KEYSEG *keyseg, const uchar *a,
         continue;                               /* To next key part */
       }
     }
-    end= a+ min(keyseg->length,key_length);
+    end= a+ MY_MIN(keyseg->length,key_length);
     next_key_length=key_length-keyseg->length;
 
     switch ((enum ha_base_keytype) keyseg->type) {
diff --git a/mysys/my_compress.c b/mysys/my_compress.c
index ea56900db05..6b223d2b354 100644
--- a/mysys/my_compress.c
+++ b/mysys/my_compress.c
@@ -311,7 +311,7 @@ int unpackfrm(uchar **unpack_data, size_t *unpack_len,
 
    if (ver != 1)
      DBUG_RETURN(1);
-   if (!(data= my_malloc(max(orglen, complen), MYF(MY_WME))))
+   if (!(data= my_malloc(MY_MAX(orglen, complen), MYF(MY_WME))))
      DBUG_RETURN(2);
    memcpy(data, pack_data + BLOB_HEADER, complen);
 
diff --git a/mysys/my_conio.c b/mysys/my_conio.c
index 5dbd31193a9..67b1a319f57 100644
--- a/mysys/my_conio.c
+++ b/mysys/my_conio.c
@@ -165,13 +165,13 @@ char* my_cgets(char *buffer, size_t clen, size_t* plen)
     though it is known it should not be more than 64K               
     so we cut 64K and try first size of screen buffer               
     if it is still to large we cut half of it and try again         
-    later we may want to cycle from min(clen, 65535) to allowed size
+    later we may want to cycle from MY_MIN(clen, 65535) to allowed size
     with small decrement to determine exact allowed buffer           
   */
-  clen= min(clen, 65535);
+  clen= MY_MIN(clen, 65535);
   do
   {
-    clen= min(clen, (size_t) csbi.dwSize.X*csbi.dwSize.Y);
+    clen= MY_MIN(clen, (size_t) csbi.dwSize.X*csbi.dwSize.Y);
     if (!ReadConsole((HANDLE)my_coninpfh, (LPVOID)buffer, (DWORD) clen - 1, &plen_res,
                      NULL))
     {
diff --git a/mysys/default.c b/mysys/my_default.c
index a90f428eca5..f03a22b598e 100644
--- a/mysys/default.c
+++ b/mysys/my_default.c
@@ -34,8 +34,9 @@
 ****************************************************************************/
 
 #include "mysys_priv.h"
-#include "m_string.h"
-#include "m_ctype.h"
+#include <my_default.h>
+#include <m_string.h>
+#include <m_ctype.h>
 #include <my_dir.h>
 #ifdef __WIN__
 #include <winbase.h>
@@ -899,7 +900,7 @@ static int search_default_file_with_ext(Process_option_func opt_handler,
       for ( ; my_isspace(&my_charset_latin1,end[-1]) ; end--) ;
       end[0]=0;
 
-      strmake(curr_gr, ptr, min((size_t) (end-ptr)+1, sizeof(curr_gr)-1));
+      strmake(curr_gr, ptr, MY_MIN((size_t) (end-ptr)+1, sizeof(curr_gr)-1));
 
       /* signal that a new group is found */
       opt_handler(handler_ctx, curr_gr, NULL);
diff --git a/mysys/my_error.c b/mysys/my_error.c
index 08c67412fe1..1200385a43d 100644
--- a/mysys/my_error.c
+++ b/mysys/my_error.c
@@ -48,44 +48,73 @@
 */
 static struct my_err_head
 {
-  struct my_err_head    *meh_next;         /* chain link */
-  const char**          (*get_errmsgs) (); /* returns error message format */
-  int                   meh_first;       /* error number matching array slot 0 */
-  int                   meh_last;          /* error number matching last slot */
-} my_errmsgs_globerrs = {NULL, get_global_errmsgs, EE_ERROR_FIRST, EE_ERROR_LAST};
+  struct my_err_head *meh_next;         /* chain link */
+  const char**       (*get_errmsgs)(); /* returns error message format */
+  uint               meh_first;       /* error number matching array slot 0 */
+  uint               meh_last;          /* error number matching last slot */
+} my_errmsgs_globerrs=
+{NULL, get_global_errmsgs, EE_ERROR_FIRST, EE_ERROR_LAST};
 
 static struct my_err_head *my_errmsgs_list= &my_errmsgs_globerrs;
 
 
-/*
-   Error message to user
+/**
+  @brief Get an error format string from one of the my_error_register()ed sets
+
+  @note
+    NULL values are possible even within a registered range.
 
-   SYNOPSIS
-     my_error()
-       nr	Errno
-       MyFlags	Flags
-       ...	variable list
+  @param nr Errno
 
+  @retval NULL  if no message is registered for this error number
+  @retval str   C-string
 */
 
-void my_error(int nr, myf MyFlags, ...)
+const char *my_get_err_msg(uint nr)
 {
   const char *format;
   struct my_err_head *meh_p;
-  va_list args;
-  char ebuff[ERRMSGSIZE];
-  DBUG_ENTER("my_error");
-  DBUG_PRINT("my", ("nr: %d  MyFlags: %lu  errno: %d", nr, MyFlags, errno));
 
-  /* Search for the error messages array, which could contain the message. */
+  /* Search for the range this error is in. */
   for (meh_p= my_errmsgs_list; meh_p; meh_p= meh_p->meh_next)
     if (nr <= meh_p->meh_last)
       break;
 
-  /* get the error message string. Default, if NULL or empty string (""). */
-  if (! (format= (meh_p && (nr >= meh_p->meh_first)) ?
-                  meh_p->get_errmsgs()[nr - meh_p->meh_first] : NULL) || ! *format)
-    (void) my_snprintf (ebuff, sizeof(ebuff), "Unknown error %d", nr);
+  /*
+    If we found the range this error number is in, get the format string.
+    If the string is empty, or a NULL pointer, or if we're out of return,
+    we return NULL.
+  */
+  if (!(format= (meh_p && (nr >= meh_p->meh_first)) ?
+                meh_p->get_errmsgs()[nr - meh_p->meh_first] : NULL) ||
+      !*format)
+    return NULL;
+
+  return format;
+}
+
+
+/**
+  Fill in and print a previously registered error message.
+
+  @note
+    Goes through the (sole) function registered in error_handler_hook
+
+  @param nr        error number
+  @param MyFlags   Flags
+  @param ...       variable list matching that error format string
+*/
+ 
+void my_error(uint nr, myf MyFlags, ...)
+{
+  const char *format;
+  va_list args;
+  char ebuff[ERRMSGSIZE];
+  DBUG_ENTER("my_error");
+  DBUG_PRINT("my", ("nr: %d  MyFlags: %lu  errno: %d", nr, MyFlags, errno));
+
+  if (!(format = my_get_err_msg(nr)))
+    (void) my_snprintf(ebuff, sizeof(ebuff), "Unknown error %d", nr);
   else
   {
     va_start(args,MyFlags);
@@ -98,15 +127,16 @@ void my_error(int nr, myf MyFlags, ...)
 }
 
 
-/*
-  Error as printf
-
-  SYNOPSIS
-    my_printf_error()
-      error	Errno
-      format	Format string
-      MyFlags	Flags
-      ...	variable list
+/**
+  Print an error message.
+
+  @note
+    Goes through the (sole) function registered in error_handler_hook
+
+  @param error     error number
+  @param format    format string
+  @param MyFlags   Flags
+  @param ...       variable list matching that error format string
 */
 
 void my_printf_error(uint error, const char *format, myf MyFlags, ...)
@@ -125,15 +155,16 @@ void my_printf_error(uint error, const char *format, myf MyFlags, ...)
   DBUG_VOID_RETURN;
 }
 
-/*
-  Error with va_list
-
-  SYNOPSIS
-    my_printv_error()
-      error	Errno
-      format	Format string
-      MyFlags	Flags
-      ...	variable list
+/**
+  Print an error message.
+
+  @note
+    Goes through the (sole) function registered in error_handler_hook
+
+  @param error     error number
+  @param format    format string
+  @param MyFlags   Flags
+  @param ap        variable list matching that error format string
 */
 
 void my_printv_error(uint error, const char *format, myf MyFlags, va_list ap)
@@ -149,14 +180,15 @@ void my_printv_error(uint error, const char *format, myf MyFlags, va_list ap)
 }
 
 
-/*
-  Give message using error_handler_hook
+/**
+  Print an error message.
 
-  SYNOPSIS
-    my_message()
-      error	Errno
-      str	Error message
-      MyFlags	Flags
+  @note
+    Goes through the (sole) function registered in error_handler_hook
+
+  @param error     error number
+  @param str       error message
+  @param MyFlags   Flags
 */
 
 void my_message(uint error, const char *str, register myf MyFlags)
@@ -165,16 +197,11 @@ void my_message(uint error, const char *str, register myf MyFlags)
 }
 
 
-/*
+/**
   Register error messages for use with my_error().
 
-  SYNOPSIS
-    my_error_register()
-    errmsgs                     array of pointers to error messages
-    first                       error number of first message in the array
-    last                        error number of last message in the array
+  @description
 
-  DESCRIPTION
     The pointer array is expected to contain addresses to NUL-terminated
     C character strings. The array contains (last - first + 1) pointers.
     NULL pointers and empty strings ("") are allowed. These will be mapped to
@@ -182,12 +209,15 @@ void my_message(uint error, const char *str, register myf MyFlags)
     This function registers the error numbers 'first' to 'last'.
     No overlapping with previously registered error numbers is allowed.
 
-  RETURN
-    0           OK
-    != 0        Error
+  @param   errmsgs  array of pointers to error messages
+  @param   first    error number of first message in the array
+  @param   last     error number of last message in the array
+
+  @retval  0        OK
+  @retval  != 0     Error
 */
 
-int my_error_register(const char** (*get_errmsgs) (), int first, int last)
+int my_error_register(const char** (*get_errmsgs) (), uint first, uint last)
 {
   struct my_err_head *meh_p;
   struct my_err_head **search_meh_pp;
@@ -223,28 +253,27 @@ int my_error_register(const char** (*get_errmsgs) (), int first, int last)
 }
 
 
-/*
+/**
   Unregister formerly registered error messages.
 
-  SYNOPSIS
-    my_error_unregister()
-    first                       error number of first message
-    last                        error number of last message
+  @description
 
-  DESCRIPTION
     This function unregisters the error numbers 'first' to 'last'.
     These must have been previously registered by my_error_register().
     'first' and 'last' must exactly match the registration.
     If a matching registration is present, the header is removed from the
     list and the pointer to the error messages pointers array is returned.
+    (The messages themselves are not released here as they may be static.)
     Otherwise, NULL is returned.
 
-  RETURN
-    non-NULL    OK, returns address of error messages pointers array.
-    NULL        Error, no such number range registered.
+  @param   first     error number of first message
+  @param   last      error number of last message
+
+  @retval  NULL      Error, no such number range registered.
+  @retval  non-NULL  OK, returns address of error messages pointers array.
 */
 
-const char **my_error_unregister(int first, int last)
+const char **my_error_unregister(uint first, uint last)
 {
   struct my_err_head    *meh_p;
   struct my_err_head    **search_meh_pp;
@@ -274,6 +303,17 @@ const char **my_error_unregister(int first, int last)
 }
 
 
+/**
+  Unregister all formerly registered error messages.
+
+  @description
+
+    This function unregisters all error numbers that previously have
+    been previously registered by my_error_register().
+    All headers are removed from the list; the messages themselves are
+    not released here as they may be static.
+*/
+
 void my_error_unregister_all(void)
 {
   struct my_err_head *cursor, *saved_next;
diff --git a/mysys/my_file.c b/mysys/my_file.c
index 8d01285a94b..a23ab487d00 100644
--- a/mysys/my_file.c
+++ b/mysys/my_file.c
@@ -76,7 +76,7 @@ static uint set_max_open_files(uint max_file_limit)
 static uint set_max_open_files(uint max_file_limit)
 {
   /* We don't know the limit. Return best guess */
-  return min(max_file_limit, OS_FILE_LIMIT);
+  return MY_MIN(max_file_limit, OS_FILE_LIMIT);
 }
 #endif
 
@@ -99,7 +99,7 @@ uint my_set_max_open_files(uint files)
   DBUG_PRINT("enter",("files: %u  my_file_limit: %u", files, my_file_limit));
 
   files+= MY_FILE_MIN;
-  files= set_max_open_files(min(files, OS_FILE_LIMIT));
+  files= set_max_open_files(MY_MIN(files, OS_FILE_LIMIT));
   if (files <= MY_NFILE)
     DBUG_RETURN(files);
 
@@ -109,9 +109,9 @@ uint my_set_max_open_files(uint files)
 
   /* Copy any initialized files */
   memcpy((char*) tmp, (char*) my_file_info,
-         sizeof(*tmp) * min(my_file_limit, files));
+         sizeof(*tmp) * MY_MIN(my_file_limit, files));
   bzero((char*) (tmp + my_file_limit),
-        max((int) (files- my_file_limit), 0)*sizeof(*tmp));
+        MY_MAX((int) (files- my_file_limit), 0)*sizeof(*tmp));
   my_free_open_file_info();			/* Free if already allocated */
   my_file_info= tmp;
   my_file_limit= files;
diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c
index 7905ad90877..16808fe1986 100644
--- a/mysys/my_getopt.c
+++ b/mysys/my_getopt.c
@@ -15,6 +15,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
 #include <my_global.h>
+#include <my_default.h>
 #include <m_string.h>
 #include <stdlib.h>
 #include <my_sys.h>
diff --git a/mysys/my_rnd.c b/mysys/my_rnd.c
index 178bcd9c539..1c8de9dd7a5 100644
--- a/mysys/my_rnd.c
+++ b/mysys/my_rnd.c
@@ -14,6 +14,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
 #include "mysys_priv.h"
+#include <my_rnd.h>
 #include <m_string.h>
 
 /*
@@ -53,3 +54,39 @@ double my_rnd(struct my_rnd_struct *rand_st)
   rand_st->seed2=(rand_st->seed1+rand_st->seed2+33) % rand_st->max_value;
   return (((double) rand_st->seed1)/rand_st->max_value_dbl);
 }
+
+
+/**
+  Generate a random number using the OpenSSL/yaSSL supplied
+  random number generator if available.
+
+  @param rand_st [INOUT] Structure used for number generation
+                         only if none of the SSL libraries are
+                         available.
+
+  @retval                Generated random number.
+*/
+
+double my_rnd_ssl(struct my_rnd_struct *rand_st)
+{
+
+#if defined(HAVE_YASSL) || defined(HAVE_OPENSSL)
+  int rc;
+  unsigned int res;
+
+#if defined(HAVE_YASSL)
+  rc= yaSSL::RAND_bytes((unsigned char *) &res, sizeof (unsigned int));
+#else
+  rc= RAND_bytes((unsigned char *) &res, sizeof (unsigned int));
+#endif /* HAVE_YASSL */
+
+  if (rc)
+    return (double)res / (double)UINT_MAX;
+#endif /* defined(HAVE_YASSL) || defined(HAVE_OPENSSL) */
+
+  return my_rnd(rand_st);
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c
index 4e5bed84637..5034ff9b35f 100644
--- a/mysys/my_thr_init.c
+++ b/mysys/my_thr_init.c
@@ -376,12 +376,16 @@ void my_thread_end(void)
     This must be done before trashing st_my_thread_var,
     because the LF_HASH depends on it.
   */
-  if (PSI_server)
-    PSI_server->delete_current_thread();
+  PSI_THREAD_CALL(delete_current_thread)();
 #endif
 
+  /*
+    We need to disable DBUG early for this thread to ensure that the
+    the mutex calls doesn't enable it again
+    To this we have to both do DBUG_POP() and also reset THR_KEY_mysys
+    as the key is used by DBUG.
+  */
   DBUG_POP();
-
   pthread_setspecific(THR_KEY_mysys,0);
 
   if (tmp && tmp->init)
@@ -418,6 +422,10 @@ struct st_my_thread_var *_my_thread_var(void)
   return  my_pthread_getspecific(struct st_my_thread_var*,THR_KEY_mysys);
 }
 
+int set_mysys_var(struct st_my_thread_var *mysys_var)
+{
+  return my_pthread_setspecific_ptr(THR_KEY_mysys, mysys_var);
+}
 
 /****************************************************************************
   Get name of current thread.
diff --git a/mysys/my_uuid.c b/mysys/my_uuid.c
index ab1b259ae0f..569616d09eb 100644
--- a/mysys/my_uuid.c
+++ b/mysys/my_uuid.c
@@ -40,6 +40,7 @@
 */
 
 #include "mysys_priv.h"
+#include <my_rnd.h>
 #include <m_string.h>
 #include <myisampack.h> /* mi_int2store, mi_int4store */
 
@@ -151,7 +152,7 @@ void my_uuid(uchar *to)
       /*
         -1 so we won't make tv= uuid_time for nanoseq >= (tv - uuid_time)
       */
-      delta= min(nanoseq, (ulong)(tv - uuid_time -1));
+      delta= MY_MIN(nanoseq, (ulong)(tv - uuid_time -1));
       tv-= delta;
       nanoseq-= delta;
     }
diff --git a/mysys/psi_noop.c b/mysys/psi_noop.c
index 78629ca16d7..8c9f2773170 100644
--- a/mysys/psi_noop.c
+++ b/mysys/psi_noop.c
@@ -119,7 +119,8 @@ static void destroy_cond_noop(PSI_cond* cond NNN)
 }
 
 static PSI_socket*
-init_socket_noop(PSI_socket_key key NNN, const my_socket *fd NNN)
+init_socket_noop(PSI_socket_key key NNN, const my_socket *fd NNN,
+                 const struct sockaddr *addr NNN, socklen_t addr_len NNN)
 {
   return NULL;
 }
@@ -188,12 +189,12 @@ static int spawn_thread_noop(PSI_thread_key key NNN,
 
 static PSI_thread*
 new_thread_noop(PSI_thread_key key NNN,
-                const void *identity NNN, ulong thread_id NNN)
+                const void *identity NNN, ulonglong thread_id NNN)
 {
   return NULL;
 }
 
-static void set_thread_id_noop(PSI_thread *thread NNN, unsigned long id NNN)
+static void set_thread_id_noop(PSI_thread *thread NNN, ulonglong id NNN)
 {
   return;
 }
@@ -401,16 +402,17 @@ static void end_table_lock_wait_noop(PSI_table_locker* locker NNN)
   return;
 }
 
-static PSI_file* start_file_open_wait_noop(PSI_file_locker *locker NNN,
-                                           const char *src_file NNN,
-                                           uint src_line NNN)
+static void start_file_open_wait_noop(PSI_file_locker *locker NNN,
+                                      const char *src_file NNN,
+                                      uint src_line NNN)
 {
-  return NULL;
+  return;
 }
 
-static void end_file_open_wait_noop(PSI_file_locker *locker NNN)
+static PSI_file* end_file_open_wait_noop(PSI_file_locker *locker NNN,
+                                         void *result NNN)
 {
-  return;
+  return NULL;
 }
 
 static void end_file_open_wait_and_bind_to_descriptor_noop
@@ -433,6 +435,19 @@ static void end_file_wait_noop(PSI_file_locker *locker NNN,
   return;
 }
 
+static void start_file_close_wait_noop(PSI_file_locker *locker NNN,
+                                       const char *src_file NNN,
+                                       uint src_line NNN)
+{
+  return;
+}
+
+static void end_file_close_wait_noop(PSI_file_locker *locker NNN,
+                                     int result NNN)
+{
+  return;
+}
+
 static void start_stage_noop(PSI_stage_key key NNN,
                              const char *src_file NNN, int src_line NNN)
 {
@@ -446,7 +461,8 @@ static void end_stage_noop(void)
 
 static PSI_statement_locker*
 get_thread_statement_locker_noop(PSI_statement_locker_state *state NNN,
-                                 PSI_statement_key key NNN)
+                                 PSI_statement_key key NNN,
+                                 const void *charset NNN)
 {
   return NULL;
 }
@@ -621,6 +637,14 @@ digest_add_token_noop(PSI_digest_locker *locker NNN,
   return NULL;
 }
 
+static int
+set_thread_connect_attrs_noop(const char *buffer __attribute__((unused)),
+                             uint length  __attribute__((unused)),
+                             const void *from_cs __attribute__((unused)))
+{
+  return 0;
+}
+
 static PSI PSI_noop=
 {
   register_mutex_noop,
@@ -687,6 +711,8 @@ static PSI PSI_noop=
   end_file_open_wait_and_bind_to_descriptor_noop,
   start_file_wait_noop,
   end_file_wait_noop,
+  start_file_close_wait_noop,
+  end_file_close_wait_noop,
   start_stage_noop,
   end_stage_noop,
   get_thread_statement_locker_noop,
@@ -716,7 +742,8 @@ static PSI PSI_noop=
   set_socket_info_noop,
   set_socket_thread_owner_noop,
   digest_start_noop,
-  digest_add_token_noop
+  digest_add_token_noop,
+  set_thread_connect_attrs_noop
 };
 
 /**
diff --git a/mysys/rijndael.c b/mysys/rijndael.c
deleted file mode 100644
index e893a886726..00000000000
--- a/mysys/rijndael.c
+++ /dev/null
@@ -1,1379 +0,0 @@
-/* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
-
-
-/*
-  Based on version 3.0 (December 2000)
-
-  Optimised ANSI C code for the Rijndael cipher (now AES)
-
-  author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
-  author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
-  author Paulo Barreto <paulo.barreto@terra.com.br>
-*/
-
-#include <my_global.h>
-#include "rijndael.h"
-
-/*
-  Define the following to use fastest and much larger code (~10K extra code)
-  #define FULL_UNROLL
-*/
-
-static const uint32 Te0[256]=
-{
-  0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
-  0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
-  0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
-  0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
-  0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
-  0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
-  0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
-  0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
-  0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
-  0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
-  0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
-  0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
-  0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
-  0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
-  0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
-  0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
-  0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
-  0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
-  0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
-  0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
-  0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
-  0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
-  0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
-  0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
-  0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
-  0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
-  0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
-  0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
-  0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
-  0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
-  0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
-  0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
-  0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
-  0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
-  0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
-  0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
-  0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
-  0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
-  0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
-  0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
-  0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
-  0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
-  0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
-  0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
-  0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
-  0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
-  0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
-  0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
-  0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
-  0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
-  0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
-  0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
-  0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
-  0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
-  0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
-  0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
-  0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
-  0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
-  0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
-  0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
-  0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
-  0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
-  0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
-  0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
-};
-
-static const uint32 Te1[256]=
-{
-  0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
-  0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
-  0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
-  0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
-  0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
-  0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
-  0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
-  0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
-  0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
-  0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
-  0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
-  0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
-  0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
-  0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
-  0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
-  0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
-  0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
-  0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
-  0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
-  0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
-  0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
-  0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
-  0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
-  0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
-  0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
-  0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
-  0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
-  0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
-  0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
-  0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
-  0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
-  0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
-  0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
-  0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
-  0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
-  0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
-  0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
-  0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
-  0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
-  0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
-  0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
-  0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
-  0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
-  0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
-  0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
-  0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
-  0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
-  0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
-  0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
-  0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
-  0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
-  0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
-  0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
-  0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
-  0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
-  0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
-  0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
-  0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
-  0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
-  0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
-  0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
-  0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
-  0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
-  0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
-};
-
-static const uint32 Te2[256]=
-{
-  0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
-  0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
-  0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
-  0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
-  0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
-  0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
-  0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
-  0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
-  0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
-  0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
-  0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
-  0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
-  0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
-  0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
-  0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
-  0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
-  0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
-  0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
-  0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
-  0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
-  0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
-  0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
-  0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
-  0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
-  0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
-  0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
-  0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
-  0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
-  0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
-  0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
-  0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
-  0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
-  0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
-  0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
-  0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
-  0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
-  0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
-  0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
-  0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
-  0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
-  0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
-  0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
-  0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
-  0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
-  0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
-  0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
-  0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
-  0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
-  0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
-  0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
-  0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
-  0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
-  0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
-  0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
-  0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
-  0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
-  0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
-  0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
-  0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
-  0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
-  0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
-  0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
-  0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
-  0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
-};
-
-static const uint32 Te3[256]=
-{
-  0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
-  0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
-  0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
-  0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
-  0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
-  0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
-  0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
-  0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
-  0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
-  0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
-  0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
-  0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
-  0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
-  0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
-  0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
-  0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
-  0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
-  0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
-  0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
-  0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
-  0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
-  0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
-  0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
-  0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
-  0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
-  0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
-  0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
-  0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
-  0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
-  0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
-  0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
-  0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
-  0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
-  0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
-  0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
-  0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
-  0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
-  0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
-  0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
-  0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
-  0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
-  0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
-  0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
-  0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
-  0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
-  0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
-  0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
-  0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
-  0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
-  0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
-  0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
-  0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
-  0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
-  0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
-  0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
-  0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
-  0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
-  0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
-  0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
-  0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
-  0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
-  0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
-  0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
-  0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
-};
-
-static const uint32 Te4[256]=
-{
-  0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
-  0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
-  0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
-  0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
-  0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
-  0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
-  0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
-  0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
-  0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
-  0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
-  0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
-  0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
-  0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
-  0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
-  0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
-  0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
-  0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
-  0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
-  0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
-  0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
-  0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
-  0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
-  0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
-  0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
-  0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
-  0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
-  0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
-  0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
-  0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
-  0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
-  0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
-  0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
-  0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
-  0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
-  0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
-  0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
-  0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
-  0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
-  0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
-  0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
-  0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
-  0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
-  0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
-  0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
-  0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
-  0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
-  0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
-  0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
-  0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
-  0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
-  0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
-  0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
-  0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
-  0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
-  0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
-  0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
-  0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
-  0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
-  0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
-  0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
-  0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
-  0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
-  0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
-  0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
-};
-
-static const uint32 Td0[256]=
-{
-  0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
-  0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
-  0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
-  0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
-  0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
-  0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
-  0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
-  0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
-  0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
-  0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
-  0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
-  0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
-  0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
-  0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
-  0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
-  0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
-  0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
-  0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
-  0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
-  0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
-  0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
-  0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
-  0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
-  0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
-  0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
-  0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
-  0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
-  0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
-  0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
-  0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
-  0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
-  0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
-  0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
-  0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
-  0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
-  0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
-  0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
-  0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
-  0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
-  0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
-  0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
-  0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
-  0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
-  0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
-  0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
-  0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
-  0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
-  0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
-  0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
-  0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
-  0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
-  0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
-  0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
-  0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
-  0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
-  0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
-  0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
-  0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
-  0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
-  0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
-  0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
-  0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
-  0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
-  0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
-};
-
-static const uint32 Td1[256]=
-{
-  0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
-  0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
-  0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
-  0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
-  0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
-  0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
-  0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
-  0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
-  0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
-  0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
-  0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
-  0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
-  0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
-  0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
-  0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
-  0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
-  0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
-  0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
-  0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
-  0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
-  0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
-  0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
-  0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
-  0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
-  0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
-  0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
-  0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
-  0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
-  0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
-  0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
-  0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
-  0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
-  0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
-  0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
-  0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
-  0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
-  0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
-  0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
-  0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
-  0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
-  0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
-  0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
-  0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
-  0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
-  0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
-  0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
-  0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
-  0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
-  0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
-  0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
-  0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
-  0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
-  0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
-  0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
-  0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
-  0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
-  0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
-  0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
-  0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
-  0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
-  0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
-  0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
-  0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
-  0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
-};
-
-static const uint32 Td2[256]=
-{
-  0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
-  0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
-  0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
-  0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
-  0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
-  0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
-  0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
-  0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
-  0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
-  0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
-  0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
-  0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
-  0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
-  0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
-  0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
-  0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
-  0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
-  0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
-  0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
-  0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
-
-  0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
-  0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
-  0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
-  0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
-  0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
-  0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
-  0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
-  0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
-  0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
-  0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
-  0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
-  0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
-  0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
-  0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
-  0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
-  0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
-  0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
-  0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
-  0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
-  0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
-  0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
-  0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
-  0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
-  0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
-  0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
-  0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
-  0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
-  0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
-  0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
-  0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
-  0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
-  0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
-  0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
-  0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
-  0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
-  0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
-  0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
-  0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
-  0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
-  0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
-  0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
-  0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
-  0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
-  0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
-};
-
-static const uint32 Td3[256]=
-{
-  0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
-  0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
-  0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
-  0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
-  0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
-  0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
-  0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
-  0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
-  0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
-  0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
-  0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
-  0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
-  0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
-  0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
-  0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
-  0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
-  0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
-  0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
-  0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
-  0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
-  0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
-  0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
-  0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
-  0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
-  0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
-  0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
-  0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
-  0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
-  0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
-  0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
-  0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
-  0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
-  0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
-  0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
-  0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
-  0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
-  0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
-  0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
-  0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
-  0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
-  0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
-  0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
-  0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
-  0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
-  0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
-  0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
-  0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
-  0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
-  0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
-  0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
-  0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
-  0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
-  0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
-  0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
-  0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
-  0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
-  0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
-  0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
-  0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
-  0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
-  0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
-  0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
-  0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
-  0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
-};
-
-static const uint32 Td4[256]=
-{
-  0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
-  0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
-  0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
-  0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
-  0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
-  0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
-  0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
-  0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
-  0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
-  0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
-  0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
-  0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
-  0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
-  0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
-  0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
-  0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
-  0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
-  0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
-  0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
-  0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
-  0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
-  0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
-  0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
-  0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
-  0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
-  0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
-  0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
-  0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
-  0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
-  0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
-  0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
-  0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
-  0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
-  0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
-  0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
-  0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
-  0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
-  0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
-  0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
-  0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
-  0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
-  0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
-  0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
-  0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
-  0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
-  0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
-  0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
-  0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
-  0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
-  0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
-  0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
-  0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
-  0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
-  0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
-  0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
-  0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
-  0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
-  0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
-  0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
-  0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
-  0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
-  0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
-  0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
-  0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
-};
-
-
-/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
-static const uint32 rcon[]=
-{
-  0x01000000, 0x02000000, 0x04000000, 0x08000000,
-  0x10000000, 0x20000000, 0x40000000, 0x80000000,
-  0x1B000000, 0x36000000,
-};
-
-#if defined(_MSC_VER) && defined(__i386__)
-
-#define RJ_SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00)
-#define GETuint32(p) RJ_SWAP(*((uint32 *)(p)))
-#define PUTuint32(ct, st) { *((uint32 *)(ct)) = RJ_SWAP((st)); }
-
-#else
-
-#define GETuint32(pt) (((uint32)(pt)[0] << 24) ^ ((uint32)(pt)[1] << 16)\
-	^ ((uint32)(pt)[2] <<  8) ^ ((uint32)(pt)[3]))
-#define PUTuint32(ct, st) { (ct)[0] = (uint8)((st) >> 24); (ct)[1]\
-= (uint8)((st) >> 16); (ct)[2] = (uint8)((st) >>  8); (ct)[3] = (uint8)(st); }
-
-#endif /* defined(_MSC_VER) && defined(__i386__) */
-
-
-/*
-  Expand the cipher key into the encryption key schedule.
-
- RETURN
-   The number of rounds for the given cipher key size.
-*/
-
-int rijndaelKeySetupEnc(uint32 rk[/*4*(Nr + 1)*/], const uint8 cipherKey[],
-			int keyBits)
-{
-  int i = 0;
-  uint32 temp;
-
-  rk[0] = GETuint32(cipherKey	  );
-  rk[1] = GETuint32(cipherKey +  4);
-  rk[2] = GETuint32(cipherKey +  8);
-  rk[3] = GETuint32(cipherKey + 12);
-  if (keyBits == 128)
-  {
-    for (;;)
-    {
-      temp  = rk[3];
-      rk[4] = (rk[0] ^
-	       (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
-	       (Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
-	       (Te4[(temp      ) & 0xff] & 0x0000ff00) ^
-	       (Te4[(temp >> 24)       ] & 0x000000ff) ^
-	       rcon[i]);
-      rk[5] = rk[1] ^ rk[4];
-      rk[6] = rk[2] ^ rk[5];
-      rk[7] = rk[3] ^ rk[6];
-      if (++i == 10)
-	return 10;
-      rk += 4;
-    }
-  }
-  rk[4] = GETuint32(cipherKey + 16);
-  rk[5] = GETuint32(cipherKey + 20);
-  if (keyBits == 192)
-  {
-    for (;;)
-    {
-      temp = rk[ 5];
-      rk[ 6] = (rk[ 0] ^
-		(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
-		(Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
-		(Te4[(temp	) & 0xff] & 0x0000ff00) ^
-		(Te4[(temp >> 24)	] & 0x000000ff) ^
-		rcon[i]);
-      rk[ 7] = rk[ 1] ^ rk[ 6];
-      rk[ 8] = rk[ 2] ^ rk[ 7];
-      rk[ 9] = rk[ 3] ^ rk[ 8];
-      if (++i == 8)
-      {
-	return 12;
-      }
-      rk[10] = rk[ 4] ^ rk[ 9];
-      rk[11] = rk[ 5] ^ rk[10];
-      rk += 6;
-    }
-  }
-  rk[6] = GETuint32(cipherKey + 24);
-  rk[7] = GETuint32(cipherKey + 28);
-  if (keyBits == 256)
-  {
-    for (;;)
-    {
-      temp = rk[ 7];
-      rk[ 8] = (rk[ 0] ^
-		(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
-		(Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
-		(Te4[(temp	) & 0xff] & 0x0000ff00) ^
-		(Te4[(temp >> 24)	] & 0x000000ff) ^
-		rcon[i]);
-      rk[ 9] = rk[ 1] ^ rk[ 8];
-      rk[10] = rk[ 2] ^ rk[ 9];
-      rk[11] = rk[ 3] ^ rk[10];
-      if (++i == 7)
-      {
-	return 14;
-      }
-      temp = rk[11];
-      rk[12] = (rk[ 4] ^
-		(Te4[(temp >> 24)	] & 0xff000000) ^
-		(Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
-		(Te4[(temp >>  8) & 0xff] & 0x0000ff00) ^
-		(Te4[(temp	) & 0xff] & 0x000000ff));
-      rk[13] = rk[ 5] ^ rk[12];
-      rk[14] = rk[ 6] ^ rk[13];
-      rk[15] = rk[ 7] ^ rk[14];
-      rk += 8;
-    }
-  }
-  return 0;
-}
-
-
-/*
-  Expand the cipher key into the decryption key schedule.
-
-  RETURN
-    The number of rounds for the given cipher key size.
-*/
-
-int rijndaelKeySetupDec(uint32 rk[/*4*(Nr + 1)*/], const uint8 cipherKey[],
-			int keyBits)
-{
-  int nr, i, j;
-  uint32 temp;
-
-  /* expand the cipher key: */
-  nr = rijndaelKeySetupEnc(rk, cipherKey, keyBits);
-  /* invert the order of the round keys: */
-  for (i = 0, j = 4*nr; i < j; i += 4, j -= 4)
-  {
-    temp = rk[i    ]; rk[i    ] = rk[j	  ]; rk[j    ] = temp;
-    temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
-    temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
-    temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
-  }
-  /*
-    Apply the inverse MixColumn transform to all round keys but the first
-    and the last:
-  */
-  for (i = 1; i < nr; i++)
-  {
-    rk += 4;
-
-    rk[0]= (
-	    Td0[Te4[(rk[0] >> 24)	  ] & 0xff] ^
-	    Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
-	    Td2[Te4[(rk[0] >>  8) & 0xff] & 0xff] ^
-	    Td3[Te4[(rk[0]	  ) & 0xff] & 0xff]);
-
-    rk[1]= (Td0[Te4[(rk[1] >> 24)	  ] & 0xff] ^
-	    Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
-	    Td2[Te4[(rk[1] >>  8) & 0xff] & 0xff] ^
-	    Td3[Te4[(rk[1]	  ) & 0xff] & 0xff]);
-
-    rk[2]= (Td0[Te4[(rk[2] >> 24)	  ] & 0xff] ^
-	    Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
-	    Td2[Te4[(rk[2] >>  8) & 0xff] & 0xff] ^
-	    Td3[Te4[(rk[2]	  ) & 0xff] & 0xff]);
-
-    rk[3]= (Td0[Te4[(rk[3] >> 24)	  ] & 0xff] ^
-	    Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
-	    Td2[Te4[(rk[3] >>  8) & 0xff] & 0xff] ^
-	    Td3[Te4[(rk[3]	  ) & 0xff] & 0xff]);
-  }
-  return nr;
-}
-
-
-void rijndaelEncrypt(const uint32 rk[/*4*(Nr + 1)*/], int Nr,
-		     const uint8 pt[16], uint8 ct[16])
-{
-  uint32 s0, s1, s2, s3, t0, t1, t2, t3;
-#ifndef FULL_UNROLL
-  int r;
-#endif /* FULL_UNROLL */
-
-  /* map byte array block to cipher state and add initial round key: */
-  s0 = GETuint32(pt	) ^ rk[0];
-  s1 = GETuint32(pt +  4) ^ rk[1];
-  s2 = GETuint32(pt +  8) ^ rk[2];
-  s3 = GETuint32(pt + 12) ^ rk[3];
-
-#ifdef FULL_UNROLL
-  /* round 1: */
-  t0= (Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff]
-       ^ Te3[s3 & 0xff] ^ rk[ 4]);
-  t1= (Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff]
-       ^ Te3[s0 & 0xff] ^ rk[ 5]);
-  t2= (Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff]
-       ^ Te3[s1 & 0xff] ^ rk[ 6]);
-  t3= (Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff]
-       ^ Te3[s2 & 0xff] ^ rk[ 7]);
-
-  /* round 2: */
-  s0= (Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff]
-       ^ Te3[t3 & 0xff] ^ rk[ 8]);
-  s1= (Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff]
-       ^ Te3[t0 & 0xff] ^ rk[ 9]);
-  s2= (Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff]
-       ^ Te3[t1 & 0xff] ^ rk[10]);
-  s3= (Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff]
-       ^ Te3[t2 & 0xff] ^ rk[11]);
-
-  /* round 3: */
-  t0= (Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff]
-       ^ Te3[s3 & 0xff] ^ rk[12]);
-  t1= (Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff]
-       ^ Te3[s0 & 0xff] ^ rk[13]);
-  t2= (Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff]
-       ^ Te3[s1 & 0xff] ^ rk[14]);
-  t3= (Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff]
-       ^ Te3[s2 & 0xff] ^ rk[15]);
-
-  /* round 4: */
-  s0= (Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff]
-       ^ Te3[t3 & 0xff] ^ rk[16]);
-  s1= (Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff]
-       ^ Te3[t0 & 0xff] ^ rk[17]);
-  s2= (Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff]
-       ^ Te3[t1 & 0xff] ^ rk[18]);
-  s3= (Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff]
-       ^ Te3[t2 & 0xff] ^ rk[19]);
-
-  /* round 5: */
-  t0= (Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff]
-       ^ Te3[s3 & 0xff] ^ rk[20]);
-  t1= (Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff]
-       ^ Te3[s0 & 0xff] ^ rk[21]);
-  t2= (Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff]
-       ^ Te3[s1 & 0xff] ^ rk[22]);
-  t3= (Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff]
-       ^ Te3[s2 & 0xff] ^ rk[23]);
-
-  /* round 6: */
-  s0= (Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff]
-       ^ Te3[t3 & 0xff] ^ rk[24]);
-  s1= (Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff]
-       ^ Te3[t0 & 0xff] ^ rk[25]);
-  s2= (Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff]
-       ^ Te3[t1 & 0xff] ^ rk[26]);
-  s3= (Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff]
-       ^ Te3[t2 & 0xff] ^ rk[27]);
-
-  /* round 7: */
-  t0= (Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff]
-       ^ Te3[s3 & 0xff] ^ rk[28]);
-  t1= (Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff]
-       ^ Te3[s0 & 0xff] ^ rk[29]);
-  t2= (Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff]
-       ^ Te3[s1 & 0xff] ^ rk[30]);
-  t3= (Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff]
-       ^ Te3[s2 & 0xff] ^ rk[31]);
-
-  /* round 8: */
-  s0= (Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff]
-       ^ Te3[t3 & 0xff] ^ rk[32]);
-  s1= (Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff]
-       ^ Te3[t0 & 0xff] ^ rk[33]);
-  s2= (Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff]
-       ^ Te3[t1 & 0xff] ^ rk[34]);
-  s3= (Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff]
-       ^ Te3[t2 & 0xff] ^ rk[35]);
-
-  /* round 9: */
-  t0= (Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff]
-       ^ Te3[s3 & 0xff] ^ rk[36]);
-  t1= (Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff]
-       ^ Te3[s0 & 0xff] ^ rk[37]);
-  t2= (Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff]
-       ^ Te3[s1 & 0xff] ^ rk[38]);
-  t3= (Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff]
-       ^ Te3[s2 & 0xff] ^ rk[39]);
-
-  if (Nr > 10)
-  {
-    /* round 10: */
-    s0= (Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff]
-	 ^ Te3[t3 & 0xff] ^ rk[40]);
-    s1= (Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff]
-	 ^ Te3[t0 & 0xff] ^ rk[41]);
-    s2= (Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff]
-	 ^ Te3[t1 & 0xff] ^ rk[42]);
-    s3= (Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff]
-	 ^ Te3[t2 & 0xff] ^ rk[43]);
-
-    /* round 11: */
-    t0= (Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff]
-	 ^ Te3[s3 & 0xff] ^ rk[44]);
-    t1= (Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff]
-	 ^ Te3[s0 & 0xff] ^ rk[45]);
-    t2= (Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff]
-	 ^ Te3[s1 & 0xff] ^ rk[46]);
-    t3= (Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff]
-	 ^ Te3[s2 & 0xff] ^ rk[47]);
-
-    if (Nr > 12)
-    {
-      /* round 12: */
-      s0= (Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>	8) & 0xff]
-	   ^ Te3[t3 & 0xff] ^ rk[48]);
-      s1= (Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>	8) & 0xff]
-	   ^ Te3[t0 & 0xff] ^ rk[49]);
-      s2= (Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>	8) & 0xff]
-	   ^ Te3[t1 & 0xff] ^ rk[50]);
-      s3= (Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>	8) & 0xff]
-	   ^ Te3[t2 & 0xff] ^ rk[51]);
-
-      /* round 13: */
-      t0= (Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>	8) & 0xff]
-	   ^ Te3[s3 & 0xff] ^ rk[52]);
-      t1= (Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>	8) & 0xff]
-	   ^ Te3[s0 & 0xff] ^ rk[53]);
-      t2= (Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>	8) & 0xff]
-	   ^ Te3[s1 & 0xff] ^ rk[54]);
-      t3= (Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>	8) & 0xff]
-	   ^ Te3[s2 & 0xff] ^ rk[55]);
-    }
-  }
-  rk += Nr << 2;
-#else  /* !FULL_UNROLL */
-
-  /* Nr - 1 full rounds: */
-
-  r = Nr >> 1;
-  for (;;)
-  {
-    t0= (Te0[(s0 >> 24)  ] ^
-	 Te1[(s1 >> 16) & 0xff] ^
-	 Te2[(s2 >>	8) & 0xff] ^
-	 Te3[(s3	 ) & 0xff] ^
-	 rk[4]);
-
-    t1= (Te0[(s1 >> 24)  ] ^
-	 Te1[(s2 >> 16) & 0xff] ^
-	 Te2[(s3 >>	8) & 0xff] ^
-	 Te3[(s0	 ) & 0xff] ^
-	 rk[5]);
-
-    t2= (Te0[(s2 >> 24)  ] ^
-	 Te1[(s3 >> 16) & 0xff] ^
-	 Te2[(s0 >>	8) & 0xff] ^
-	 Te3[(s1	 ) & 0xff] ^
-	 rk[6]);
-
-    t3= (Te0[(s3 >> 24)  ] ^
-	 Te1[(s0 >> 16) & 0xff] ^
-	 Te2[(s1 >>	8) & 0xff] ^
-	 Te3[(s2	 ) & 0xff] ^
-	 rk[7]);
-
-    rk+= 8;
-    if (--r == 0)
-      break;
-
-    s0= (Te0[(t0 >> 24)  ] ^
-	 Te1[(t1 >> 16) & 0xff] ^
-	 Te2[(t2 >>	8) & 0xff] ^
-	 Te3[(t3	 ) & 0xff] ^
-	 rk[0]);
-
-    s1= (Te0[(t1 >> 24)  ] ^
-	 Te1[(t2 >> 16) & 0xff] ^
-	 Te2[(t3 >>	8) & 0xff] ^
-	 Te3[(t0	 ) & 0xff] ^
-	 rk[1]);
-
-    s2= (Te0[(t2 >> 24)  ] ^
-	 Te1[(t3 >> 16) & 0xff] ^
-	 Te2[(t0 >>	8) & 0xff] ^
-	 Te3[(t1	 ) & 0xff] ^
-	 rk[2]);
-
-    s3= (Te0[(t3 >> 24)  ] ^
-	 Te1[(t0 >> 16) & 0xff] ^
-	 Te2[(t1 >>	8) & 0xff] ^
-	 Te3[(t2	 ) & 0xff] ^
-	 rk[3]);
-  }
-#endif /* FULL_UNROLL */
-
-  /* Apply last round and map cipher state to byte array block: */
-  s0= ((Te4[(t0 >> 24)	] & 0xff000000) ^
-       (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
-       (Te4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
-       (Te4[(t3 ) & 0xff] & 0x000000ff) ^
-       rk[0]);
-  PUTuint32(ct	   , s0);
-
-  s1= ((Te4[(t1 >> 24)	] & 0xff000000) ^
-       (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
-       (Te4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
-       (Te4[(t0 ) & 0xff] & 0x000000ff) ^
-       rk[1]);
-  PUTuint32(ct +  4, s1);
-
-  s2= ((Te4[(t2 >> 24)	] & 0xff000000) ^
-       (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
-       (Te4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
-       (Te4[(t1 ) & 0xff] & 0x000000ff) ^
-       rk[2]);
-  PUTuint32(ct +  8, s2);
-
-  s3= ((Te4[(t3 >> 24)	] & 0xff000000) ^
-       (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
-       (Te4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
-       (Te4[(t2 ) & 0xff] & 0x000000ff) ^
-       rk[3]);
-  PUTuint32(ct + 12, s3);
-}
-
-
-void rijndaelDecrypt(const uint32 rk[/*4*(Nr + 1)*/], int Nr,
-		     const uint8 ct[16], uint8 pt[16])
-{
-  uint32 s0, s1, s2, s3, t0, t1, t2, t3;
-#ifndef FULL_UNROLL
-  int r;
-#endif /* FULL_UNROLL */
-
-  /* Map byte array block to cipher state and add initial round key: */
-
-  s0 = GETuint32(ct	) ^ rk[0];
-  s1 = GETuint32(ct +  4) ^ rk[1];
-  s2 = GETuint32(ct +  8) ^ rk[2];
-  s3 = GETuint32(ct + 12) ^ rk[3];
-
-#ifdef FULL_UNROLL
-  /* round 1: */
-  t0= (Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff]
-       ^ Td3[s1 & 0xff] ^ rk[ 4]);
-  t1= (Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff]
-       ^ Td3[s2 & 0xff] ^ rk[ 5]);
-  t2= (Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff]
-       ^ Td3[s3 & 0xff] ^ rk[ 6]);
-  t3= (Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff]
-       ^ Td3[s0 & 0xff] ^ rk[ 7]);
-
-  /* round 2: */
-  s0= (Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff]
-       ^ Td3[t1 & 0xff] ^ rk[ 8]);
-  s1= (Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff]
-       ^ Td3[t2 & 0xff] ^ rk[ 9]);
-  s2= (Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff]
-       ^ Td3[t3 & 0xff] ^ rk[10]);
-  s3= (Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff]
-       ^ Td3[t0 & 0xff] ^ rk[11]);
-
-  /* round 3: */
-  t0= (Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff]
-       ^ Td3[s1 & 0xff] ^ rk[12]);
-  t1= (Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff]
-       ^ Td3[s2 & 0xff] ^ rk[13]);
-  t2= (Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff]
-       ^ Td3[s3 & 0xff] ^ rk[14]);
-  t3= (Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff]
-       ^ Td3[s0 & 0xff] ^ rk[15]);
-
-  /* round 4: */
-  s0= (Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff]
-       ^ Td3[t1 & 0xff] ^ rk[16]);
-  s1= (Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff]
-       ^ Td3[t2 & 0xff] ^ rk[17]);
-  s2= (Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff]
-       ^ Td3[t3 & 0xff] ^ rk[18]);
-  s3= (Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff]
-       ^ Td3[t0 & 0xff] ^ rk[19]);
-
-  /* round 5: */
-  t0= (Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff]
-       ^ Td3[s1 & 0xff] ^ rk[20]);
-  t1= (Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff]
-       ^ Td3[s2 & 0xff] ^ rk[21]);
-  t2= (Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff]
-       ^ Td3[s3 & 0xff] ^ rk[22]);
-  t3= (Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff]
-       ^ Td3[s0 & 0xff] ^ rk[23]);
-
-  /* round 6: */
-  s0= (Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff]
-       ^ Td3[t1 & 0xff] ^ rk[24]);
-  s1= (Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff]
-       ^ Td3[t2 & 0xff] ^ rk[25]);
-  s2= (Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff]
-       ^ Td3[t3 & 0xff] ^ rk[26]);
-  s3= (Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff]
-       ^ Td3[t0 & 0xff] ^ rk[27]);
-
-  /* round 7: */
-  t0= (Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff]
-       ^ Td3[s1 & 0xff] ^ rk[28]);
-  t1= (Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff]
-       ^ Td3[s2 & 0xff] ^ rk[29]);
-  t2= (Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff]
-       ^ Td3[s3 & 0xff] ^ rk[30]);
-  t3= (Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff]
-       ^ Td3[s0 & 0xff] ^ rk[31]);
-
-  /* round 8: */
-  s0= (Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff]
-       ^ Td3[t1 & 0xff] ^ rk[32]);
-  s1= (Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff]
-       ^ Td3[t2 & 0xff] ^ rk[33]);
-  s2= (Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff]
-       ^ Td3[t3 & 0xff] ^ rk[34]);
-  s3= (Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff]
-       ^ Td3[t0 & 0xff] ^ rk[35]);
-
-  /* round 9: */
-  t0= (Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff]
-       ^ Td3[s1 & 0xff] ^ rk[36]);
-  t1= (Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff]
-       ^ Td3[s2 & 0xff] ^ rk[37]);
-  t2= (Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff]
-       ^ Td3[s3 & 0xff] ^ rk[38]);
-  t3= (Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff]
-       ^ Td3[s0 & 0xff] ^ rk[39]);
-
-  if (Nr > 10)
-  {
-    /* round 10: */
-    s0= (Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff]
-	 ^ Td3[t1 & 0xff] ^ rk[40]);
-    s1= (Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff]
-	 ^ Td3[t2 & 0xff] ^ rk[41]);
-    s2= (Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff]
-	 ^ Td3[t3 & 0xff] ^ rk[42]);
-    s3= (Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff]
-	 ^ Td3[t0 & 0xff] ^ rk[43]);
-
-    /* round 11: */
-    t0= (Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff]
-	 ^ Td3[s1 & 0xff] ^ rk[44]);
-    t1= (Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff]
-	 ^ Td3[s2 & 0xff] ^ rk[45]);
-    t2= (Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff]
-	 ^ Td3[s3 & 0xff] ^ rk[46]);
-    t3= (Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff]
-	 ^ Td3[s0 & 0xff] ^ rk[47]);
-
-    if (Nr > 12)
-    {
-      /* round 12: */
-      s0= (Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>	8) & 0xff]
-	   ^ Td3[t1 & 0xff] ^ rk[48]);
-      s1= (Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>	8) & 0xff]
-	   ^ Td3[t2 & 0xff] ^ rk[49]);
-      s2= (Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>	8) & 0xff]
-	   ^ Td3[t3 & 0xff] ^ rk[50]);
-      s3= (Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>	8) & 0xff]
-	   ^ Td3[t0 & 0xff] ^ rk[51]);
-
-      /* round 13: */
-      t0= (Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>	8) & 0xff]
-	   ^ Td3[s1 & 0xff] ^ rk[52]);
-      t1= (Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>	8) & 0xff]
-	   ^ Td3[s2 & 0xff] ^ rk[53]);
-      t2= (Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>	8) & 0xff]
-	   ^ Td3[s3 & 0xff] ^ rk[54]);
-      t3= (Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>	8) & 0xff]
-	   ^ Td3[s0 & 0xff] ^ rk[55]);
-    }
-  }
-  rk += Nr << 2;
-#else  /* !FULL_UNROLL */
-
-  /* Nr - 1 full rounds: */
-  r= (Nr >> 1);
-  for (;;)
-  {
-    t0= (Td0[(s0 >> 24)  ] ^
-	 Td1[(s3 >> 16) & 0xff] ^
-	 Td2[(s2 >>	8) & 0xff] ^
-	 Td3[(s1	 ) & 0xff] ^
-	 rk[4]);
-
-    t1= (Td0[(s1 >> 24)  ] ^
-	 Td1[(s0 >> 16) & 0xff] ^
-	 Td2[(s3 >>	8) & 0xff] ^
-	 Td3[(s2	 ) & 0xff] ^
-	 rk[5]);
-
-    t2= (Td0[(s2 >> 24)  ] ^
-	 Td1[(s1 >> 16) & 0xff] ^
-	 Td2[(s0 >>	8) & 0xff] ^
-	 Td3[(s3	 ) & 0xff] ^
-	 rk[6]);
-
-    t3= (Td0[(s3 >> 24)  ] ^
-	 Td1[(s2 >> 16) & 0xff] ^
-	 Td2[(s1 >>	8) & 0xff] ^
-	 Td3[(s0	 ) & 0xff] ^
-	 rk[7]);
-
-    rk+= 8;
-    if (--r == 0)
-      break;
-
-    s0= (Td0[(t0 >> 24)  ] ^
-	 Td1[(t3 >> 16) & 0xff] ^
-	 Td2[(t2 >>	8) & 0xff] ^
-	 Td3[(t1	 ) & 0xff] ^
-	 rk[0]);
-
-    s1= (Td0[(t1 >> 24)  ] ^
-	 Td1[(t0 >> 16) & 0xff] ^
-	 Td2[(t3 >>	8) & 0xff] ^
-	 Td3[(t2	 ) & 0xff] ^
-	 rk[1]);
-
-    s2= (Td0[(t2 >> 24)  ] ^
-	 Td1[(t1 >> 16) & 0xff] ^
-	 Td2[(t0 >>	8) & 0xff] ^
-	 Td3[(t3	 ) & 0xff] ^
-	 rk[2]);
-
-    s3= (Td0[(t3 >> 24)  ] ^
-	 Td1[(t2 >> 16) & 0xff] ^
-	 Td2[(t1 >>	8) & 0xff] ^
-	 Td3[(t0	 ) & 0xff] ^
-	 rk[3]);
-  }
-
-#endif /* FULL_UNROLL */
-
-  /* Apply last round and map cipher state to byte array block: */
-
-  s0= ((Td4[(t0 >> 24)	] & 0xff000000) ^
-       (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
-       (Td4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
-       (Td4[(t1 ) & 0xff] & 0x000000ff) ^
-       rk[0]);
-  PUTuint32(pt	   , s0);
-
-  s1= ((Td4[(t1 >> 24)	] & 0xff000000) ^
-       (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
-       (Td4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
-       (Td4[(t2 ) & 0xff] & 0x000000ff) ^
-       rk[1]);
-  PUTuint32(pt +  4, s1);
-
-  s2= ((Td4[(t2 >> 24)	] & 0xff000000) ^
-       (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
-       (Td4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
-       (Td4[(t3 ) & 0xff] & 0x000000ff) ^
-       rk[2]);
-  PUTuint32(pt +  8, s2);
-
-  s3= ((Td4[(t3 >> 24)	] & 0xff000000) ^
-       (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
-       (Td4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
-       (Td4[(t0 ) & 0xff] & 0x000000ff) ^
-       rk[3]);
-  PUTuint32(pt + 12, s3);
-}
diff --git a/mysys/sha1.c b/mysys/sha1.c
deleted file mode 100644
index e5b33a9ad13..00000000000
--- a/mysys/sha1.c
+++ /dev/null
@@ -1,422 +0,0 @@
-/* Copyright (c) 2002, 2004, 2006 MySQL AB
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-*/
-
-/*
-  Original Source from: http://www.faqs.org/rfcs/rfc3174.html
-
-  Copyright (C) The Internet Society (2001).  All Rights Reserved.
-
-  This document and translations of it may be copied and furnished to
-  others, and derivative works that comment on or otherwise explain it
-  or assist in its implementation may be prepared, copied, published
-  and distributed, in whole or in part, without restriction of any
-  kind, provided that the above copyright notice and this paragraph are
-  included on all such copies and derivative works.  However, this
-  document itself may not be modified in any way, such as by removing
-  the copyright notice or references to the Internet Society or other
-  Internet organizations, except as needed for the purpose of
-  developing Internet standards in which case the procedures for
-  copyrights defined in the Internet Standards process must be
-  followed, or as required to translate it into languages other than
-  English.
-
-  The limited permissions granted above are perpetual and will not be
-  revoked by the Internet Society or its successors or assigns.
-
-  This document and the information contained herein is provided on an
-  "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
-  TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
-  BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
-  HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
-  MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
-
-  Acknowledgement 
-  Funding for the RFC Editor function is currently provided by the 
-  Internet Society. 
-
- DESCRIPTION
-  This file implements the Secure Hashing Algorithm 1 as
-  defined in FIPS PUB 180-1 published April 17, 1995.
-
-  The SHA-1, produces a 160-bit message digest for a given data
-  stream.  It should take about 2**n steps to find a message with the
-  same digest as a given message and 2**(n/2) to find any two
-  messages with the same digest, when n is the digest size in bits.
-  Therefore, this algorithm can serve as a means of providing a
-  "fingerprint" for a message.
-
- PORTABILITY ISSUES
-   SHA-1 is defined in terms of 32-bit "words".  This code uses
-   <stdint.h> (included via "sha1.h" to define 32 and 8 bit unsigned
-   integer types.  If your C compiler does not support 32 bit unsigned
-   integers, this code is not appropriate.
-
- CAVEATS
-   SHA-1 is designed to work with messages less than 2^64 bits long.
-   Although SHA-1 allows a message digest to be generated for messages
-   of any number of bits less than 2^64, this implementation only
-   works with messages with a length that is a multiple of the size of
-   an 8-bit character.
-
-  CHANGES
-    2002 by Peter Zaitsev to
-     - fit to new prototypes according to MySQL standard
-     - Some optimizations
-     - All checking is now done in debug only mode
-     - More comments
-*/
-
-#include "my_global.h"
-#include "m_string.h"
-#include "sha1.h"
-
-/*
-  Define the SHA1 circular left shift macro
-*/
-
-#define SHA1CircularShift(bits,word) \
-		(((word) << (bits)) | ((word) >> (32-(bits))))
-
-/* Local Function Prototyptes */
-static void SHA1PadMessage(SHA1_CONTEXT*);
-static void SHA1ProcessMessageBlock(SHA1_CONTEXT*);
-
-
-/*
-  Initialize SHA1Context
-
-  SYNOPSIS
-    mysql_sha1_reset()
-    context [in/out]		The context to reset.
-
- DESCRIPTION
-   This function will initialize the SHA1Context in preparation
-   for computing a new SHA1 message digest.
-
- RETURN
-   SHA_SUCCESS		ok
-   != SHA_SUCCESS	sha Error Code.
-*/
-
-
-const uint32 sha_const_key[5]=
-{
-  0x67452301,
-  0xEFCDAB89,
-  0x98BADCFE,
-  0x10325476,
-  0xC3D2E1F0
-};
-
-
-int mysql_sha1_reset(SHA1_CONTEXT *context)
-{
-#ifndef DBUG_OFF
-  if (!context)
-    return SHA_NULL;
-#endif
-
-  context->Length		  = 0;
-  context->Message_Block_Index	  = 0;
-
-  context->Intermediate_Hash[0]   = sha_const_key[0];
-  context->Intermediate_Hash[1]   = sha_const_key[1];
-  context->Intermediate_Hash[2]   = sha_const_key[2];
-  context->Intermediate_Hash[3]   = sha_const_key[3];
-  context->Intermediate_Hash[4]   = sha_const_key[4];
-
-  context->Computed   = 0;
-  context->Corrupted  = 0;
-
-  return SHA_SUCCESS;
-}
-
-
-/*
-   Return the 160-bit message digest into the array provided by the caller
-
-  SYNOPSIS
-    mysql_sha1_result()
-    context [in/out]		The context to use to calculate the SHA-1 hash.
-    Message_Digest: [out]	Where the digest is returned.
-
-  DESCRIPTION
-    NOTE: The first octet of hash is stored in the 0th element,
-	  the last octet of hash in the 19th element.
-
- RETURN
-   SHA_SUCCESS		ok
-   != SHA_SUCCESS	sha Error Code.
-*/
-
-int mysql_sha1_result(SHA1_CONTEXT *context,
-                      uint8 Message_Digest[SHA1_HASH_SIZE])
-{
-  int i;
-
-#ifndef DBUG_OFF
-  if (!context || !Message_Digest)
-    return SHA_NULL;
-
-  if (context->Corrupted)
-    return context->Corrupted;
-#endif
-
-  if (!context->Computed)
-  {
-    SHA1PadMessage(context);
-     /* message may be sensitive, clear it out */
-    bzero((char*) context->Message_Block,64);
-    context->Length   = 0;    /* and clear length  */
-    context->Computed = 1;
-  }
-
-  for (i = 0; i < SHA1_HASH_SIZE; i++)
-    Message_Digest[i] = (int8)((context->Intermediate_Hash[i>>2] >> 8
-			 * ( 3 - ( i & 0x03 ) )));
-  return SHA_SUCCESS;
-}
-
-
-/*
-  Accepts an array of octets as the next portion of the message.
-
-  SYNOPSIS
-   mysql_sha1_input()
-   context [in/out]	The SHA context to update
-   message_array	An array of characters representing the next portion
-			of the message.
-  length		The length of the message in message_array
-
- RETURN
-   SHA_SUCCESS		ok
-   != SHA_SUCCESS	sha Error Code.
-*/
-
-int mysql_sha1_input(SHA1_CONTEXT *context, const uint8 *message_array,
-                     unsigned length)
-{
-  if (!length)
-    return SHA_SUCCESS;
-
-#ifndef DBUG_OFF
-  /* We assume client konows what it is doing in non-debug mode */
-  if (!context || !message_array)
-    return SHA_NULL;
-  if (context->Computed)
-    return (context->Corrupted= SHA_STATE_ERROR);
-  if (context->Corrupted)
-    return context->Corrupted;
-#endif
-
-  while (length--)
-  {
-    context->Message_Block[context->Message_Block_Index++]=
-      (*message_array & 0xFF);
-    context->Length  += 8;  /* Length is in bits */
-
-#ifndef DBUG_OFF
-    /*
-      Then we're not debugging we assume we never will get message longer
-      2^64 bits.
-    */
-    if (context->Length == 0)
-      return (context->Corrupted= 1);	   /* Message is too long */
-#endif
-
-    if (context->Message_Block_Index == 64)
-    {
-      SHA1ProcessMessageBlock(context);
-    }
-    message_array++;
-  }
-  return SHA_SUCCESS;
-}
-
-
-/*
-  Process the next 512 bits of the message stored in the Message_Block array.
-
-  SYNOPSIS
-    SHA1ProcessMessageBlock()
-
-   DESCRIPTION
-     Many of the variable names in this code, especially the single
-     character names, were used because those were the names used in
-     the publication.
-*/
-
-/* Constants defined in SHA-1	*/
-static const uint32  K[]=
-{
-  0x5A827999,
-  0x6ED9EBA1,
-  0x8F1BBCDC,
-  0xCA62C1D6
-};
-
-
-static void SHA1ProcessMessageBlock(SHA1_CONTEXT *context)
-{
-  int		t;		   /* Loop counter		  */
-  uint32	temp;		   /* Temporary word value	  */
-  uint32	W[80];		   /* Word sequence		  */
-  uint32	A, B, C, D, E;	   /* Word buffers		  */
-  int idx;
-
-  /*
-    Initialize the first 16 words in the array W
-  */
-
-  for (t = 0; t < 16; t++)
-  {
-    idx=t*4;
-    W[t] = context->Message_Block[idx] << 24;
-    W[t] |= context->Message_Block[idx + 1] << 16;
-    W[t] |= context->Message_Block[idx + 2] << 8;
-    W[t] |= context->Message_Block[idx + 3];
-  }
-
-
-  for (t = 16; t < 80; t++)
-  {
-    W[t] = SHA1CircularShift(1,W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16]);
-  }
-
-  A = context->Intermediate_Hash[0];
-  B = context->Intermediate_Hash[1];
-  C = context->Intermediate_Hash[2];
-  D = context->Intermediate_Hash[3];
-  E = context->Intermediate_Hash[4];
-
-  for (t = 0; t < 20; t++)
-  {
-    temp= SHA1CircularShift(5,A) + ((B & C) | ((~B) & D)) + E + W[t] + K[0];
-    E = D;
-    D = C;
-    C = SHA1CircularShift(30,B);
-    B = A;
-    A = temp;
-  }
-
-  for (t = 20; t < 40; t++)
-  {
-    temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[1];
-    E = D;
-    D = C;
-    C = SHA1CircularShift(30,B);
-    B = A;
-    A = temp;
-  }
-
-  for (t = 40; t < 60; t++)
-  {
-    temp= (SHA1CircularShift(5,A) + ((B & C) | (B & D) | (C & D)) + E + W[t] +
-	   K[2]);
-    E = D;
-    D = C;
-    C = SHA1CircularShift(30,B);
-    B = A;
-    A = temp;
-  }
-
-  for (t = 60; t < 80; t++)
-  {
-    temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[3];
-    E = D;
-    D = C;
-    C = SHA1CircularShift(30,B);
-    B = A;
-    A = temp;
-  }
-
-  context->Intermediate_Hash[0] += A;
-  context->Intermediate_Hash[1] += B;
-  context->Intermediate_Hash[2] += C;
-  context->Intermediate_Hash[3] += D;
-  context->Intermediate_Hash[4] += E;
-
-  context->Message_Block_Index = 0;
-}
-
-
-/*
-  Pad message
-
-  SYNOPSIS
-    SHA1PadMessage()
-    context: [in/out]		The context to pad
-
-  DESCRIPTION
-    According to the standard, the message must be padded to an even
-    512 bits.  The first padding bit must be a '1'. The last 64 bits
-    represent the length of the original message.  All bits in between
-    should be 0.  This function will pad the message according to
-    those rules by filling the Message_Block array accordingly.  It
-    will also call the ProcessMessageBlock function provided
-    appropriately. When it returns, it can be assumed that the message
-    digest has been computed.
-
-*/
-
-static void SHA1PadMessage(SHA1_CONTEXT *context)
-{
-  /*
-    Check to see if the current message block is too small to hold
-    the initial padding bits and length.  If so, we will pad the
-    block, process it, and then continue padding into a second
-    block.
-  */
-
-  int i=context->Message_Block_Index;
-
-  if (i > 55)
-  {
-    context->Message_Block[i++] = 0x80;
-    bzero((char*) &context->Message_Block[i],
-	  sizeof(context->Message_Block[0])*(64-i));
-    context->Message_Block_Index=64;
-
-    /* This function sets context->Message_Block_Index to zero	*/
-    SHA1ProcessMessageBlock(context);
-
-    bzero((char*) &context->Message_Block[0],
-	  sizeof(context->Message_Block[0])*56);
-    context->Message_Block_Index=56;
-  }
-  else
-  {
-    context->Message_Block[i++] = 0x80;
-    bzero((char*) &context->Message_Block[i],
-	  sizeof(context->Message_Block[0])*(56-i));
-    context->Message_Block_Index=56;
-  }
-
-  /*
-    Store the message length as the last 8 octets
-  */
-
-  context->Message_Block[56] = (int8) (context->Length >> 56);
-  context->Message_Block[57] = (int8) (context->Length >> 48);
-  context->Message_Block[58] = (int8) (context->Length >> 40);
-  context->Message_Block[59] = (int8) (context->Length >> 32);
-  context->Message_Block[60] = (int8) (context->Length >> 24);
-  context->Message_Block[61] = (int8) (context->Length >> 16);
-  context->Message_Block[62] = (int8) (context->Length >> 8);
-  context->Message_Block[63] = (int8) (context->Length);
-
-  SHA1ProcessMessageBlock(context);
-}
diff --git a/mysys/stacktrace.c b/mysys/stacktrace.c
index 402520990b6..613911e4495 100644
--- a/mysys/stacktrace.c
+++ b/mysys/stacktrace.c
@@ -95,7 +95,7 @@ static int safe_print_str(const char *addr, int max_len)
   /* Read up to the maximum number of bytes. */
   while (total)
   {
-    count= min(sizeof(buf), total);
+    count= MY_MIN(sizeof(buf), total);
 
     if ((nbytes= pread(fd, buf, count, offset)) < 0)
     {
@@ -348,7 +348,7 @@ void my_print_stacktrace(uchar* stack_bottom, ulong thread_stack)
 
   if (!stack_bottom || (uchar*) stack_bottom > (uchar*) &fp)
   {
-    ulong tmp= min(0x10000,thread_stack);
+    ulong tmp= MY_MIN(0x10000,thread_stack);
     /* Assume that the stack starts at the previous even 65K */
     stack_bottom= (uchar*) (((ulong) &fp + tmp) & ~(ulong) 0xFFFF);
     my_safe_printf_stderr("Cannot determine thread, fp=%p, "
diff --git a/mysys/string.c b/mysys/string.c
index 1263e7824f9..42fe83ed4e1 100644
--- a/mysys/string.c
+++ b/mysys/string.c
@@ -223,77 +223,3 @@ void dynstr_reassociate(DYNAMIC_STRING *str, char **ptr, size_t *length,
   *alloc_length= str->max_length;
   str->str=0;
 }
-
-
-/*
-  copy a string from one character set to another
-
-  SYNOPSIS
-    copy_and_convert()
-    to			Store result here
-    to_cs		Character set of result string
-    from		Copy from here
-    from_length		Length of from string
-    from_cs		From character set
-
-  NOTES
-    'to' must be big enough as form_length * to_cs->mbmaxlen
-
-  RETURN
-    length of bytes copied to 'to'
-*/
-
-uint32
-copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
-                          const char *from, uint32 from_length,
-                          CHARSET_INFO *from_cs,
-                          uint *errors)
-{
-  int         cnvres;
-  my_wc_t     wc;
-  const uchar *from_end= (const uchar*) from+from_length;
-  char *to_start= to;
-  uchar *to_end= (uchar*) to+to_length;
-  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
-  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
-  uint error_count= 0;
-
-  while (1)
-  {
-    if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from,
-				      from_end)) > 0)
-      from+= cnvres;
-    else if (cnvres == MY_CS_ILSEQ)
-    {
-      error_count++;
-      from++;
-      wc= '?';
-    }
-    else if (cnvres > MY_CS_TOOSMALL)
-    {
-      /*
-        A correct multibyte sequence detected
-        But it doesn't have Unicode mapping.
-      */
-      error_count++;
-      from+= (-cnvres);
-      wc= '?';
-    }
-    else
-      break;  // Not enough characters
-
-outp:
-    if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
-      to+= cnvres;
-    else if (cnvres == MY_CS_ILUNI && wc != '?')
-    {
-      error_count++;
-      wc= '?';
-      goto outp;
-    }
-    else
-      break;
-  }
-  *errors= error_count;
-  return (uint32) (to - to_start);
-}
diff --git a/mysys/testhash.c b/mysys/testhash.c
index ffdaaece770..3359b5dce29 100644
--- a/mysys/testhash.c
+++ b/mysys/testhash.c
@@ -79,7 +79,7 @@ static int do_test()
 
   for (i=0 ; i < recant ; i++)
   {
-    n1=rnd(1000); n2=rnd(100); n3=rnd(min(recant*5,MAX_RECORDS));
+    n1=rnd(1000); n2=rnd(100); n3=rnd(MY_MIN(recant*5,MAX_RECORDS));
     record= (char*) my_malloc(reclength,MYF(MY_FAE));
     sprintf(record,"%6d:%4d:%8d:Pos: %4d      ",n1,n2,n3,write_count);
     if (my_hash_insert(&hash,record))
@@ -133,7 +133,7 @@ static int do_test()
   printf("- Update\n");
   for (i=0 ; i < write_count/10 ; i++)
   {
-    n1=rnd(1000); n2=rnd(100); n3=rnd(min(recant*2,MAX_RECORDS));
+    n1=rnd(1000); n2=rnd(100); n3=rnd(MY_MIN(recant*2,MAX_RECORDS));
     for (j=rnd(1000) ; j>0 && key1[j] == 0 ; j--) ;
     if (j)
     {
diff --git a/mysys_ssl/CMakeLists.txt b/mysys_ssl/CMakeLists.txt
new file mode 100644
index 00000000000..b91988d1c8b
--- /dev/null
+++ b/mysys_ssl/CMakeLists.txt
@@ -0,0 +1,48 @@
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+
+INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include
+                    ${CMAKE_SOURCE_DIR}/mysys_ssl
+                    ${SSL_INCLUDE_DIRS})
+
+IF(SSL_DEFINES)
+ADD_DEFINITIONS(${SSL_DEFINES})
+ENDIF()
+
+# We do RESTRICT_SYMBOL_EXPORTS(yassl) elsewhere.
+# In order to get correct symbol visibility, these files
+# must be compiled with "-fvisibility=hidden"
+IF(WITH_SSL STREQUAL "bundled" AND HAVE_VISIBILITY_HIDDEN)
+  SET_SOURCE_FILES_PROPERTIES(
+    crypt_genhash_impl.cc
+    my_aes.cc
+    my_md5.cc
+    my_sha1.cc
+    my_sha2.cc
+    PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
+ENDIF()
+
+SET(MYSYS_SSL_SOURCES
+    crypt_genhash_impl.cc
+    my_aes.cc
+    my_sha1.cc
+    my_sha2.cc
+    my_md5.cc
+    my_rnd.cc
+   )
+
+ADD_CONVENIENCE_LIBRARY(mysys_ssl ${MYSYS_SSL_SOURCES})
+TARGET_LINK_LIBRARIES(mysys_ssl dbug strings ${SSL_LIBRARIES})
+DTRACE_INSTRUMENT(mysys_ssl)
diff --git a/mysys_ssl/CTestTestfile.cmake b/mysys_ssl/CTestTestfile.cmake
new file mode 100644
index 00000000000..fc98399082f
--- /dev/null
+++ b/mysys_ssl/CTestTestfile.cmake
@@ -0,0 +1,6 @@
+# CMake generated Testfile for 
+# Source directory: /my/maria-10.0-merge/mysys_ssl
+# Build directory: /my/maria-10.0-merge/mysys_ssl
+# 
+# This file includes the relevent testing commands required for 
+# testing this directory and lists subdirectories to be tested as well.
diff --git a/mysys_ssl/cmake_install.cmake b/mysys_ssl/cmake_install.cmake
new file mode 100644
index 00000000000..9617527ed80
--- /dev/null
+++ b/mysys_ssl/cmake_install.cmake
@@ -0,0 +1,34 @@
+# Install script for directory: /my/maria-10.0-merge/mysys_ssl
+
+# Set the install prefix
+IF(NOT DEFINED CMAKE_INSTALL_PREFIX)
+  SET(CMAKE_INSTALL_PREFIX "/usr/local/mysql")
+ENDIF(NOT DEFINED CMAKE_INSTALL_PREFIX)
+STRING(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
+
+# Set the install configuration name.
+IF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
+  IF(BUILD_TYPE)
+    STRING(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
+           CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
+  ELSE(BUILD_TYPE)
+    SET(CMAKE_INSTALL_CONFIG_NAME "Debug")
+  ENDIF(BUILD_TYPE)
+  MESSAGE(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
+ENDIF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
+
+# Set the component getting installed.
+IF(NOT CMAKE_INSTALL_COMPONENT)
+  IF(COMPONENT)
+    MESSAGE(STATUS "Install component: \"${COMPONENT}\"")
+    SET(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
+  ELSE(COMPONENT)
+    SET(CMAKE_INSTALL_COMPONENT)
+  ENDIF(COMPONENT)
+ENDIF(NOT CMAKE_INSTALL_COMPONENT)
+
+# Install shared libraries without execute permission?
+IF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
+  SET(CMAKE_INSTALL_SO_NO_EXE "0")
+ENDIF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
+
diff --git a/mysys_ssl/crypt_genhash_impl.cc b/mysys_ssl/crypt_genhash_impl.cc
new file mode 100644
index 00000000000..ab7fdec46b9
--- /dev/null
+++ b/mysys_ssl/crypt_genhash_impl.cc
@@ -0,0 +1,454 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02111-1307  USA */
+
+/* We always should include my_global first */
+
+#include <my_global.h>
+
+#ifdef HAVE_OPENSSL
+
+#ifdef HAVE_YASSL
+#include <sha.hpp>
+#include <openssl/ssl.h>
+#else
+#include <openssl/sha.h>
+#include <openssl/rand.h>
+#endif
+#include "crypt_genhash_impl.h"
+#include <string.h>
+
+#ifndef HAVE_YASSL
+#define	DIGEST_CTX	SHA256_CTX
+#define	DIGESTInit	SHA256_Init
+#define	DIGESTUpdate	SHA256_Update
+#define	DIGESTFinal	SHA256_Final
+#define	DIGEST_LEN	SHA256_DIGEST_LENGTH
+#else
+#define DIGEST_CTX TaoCrypt::SHA256
+#define DIGEST_LEN 32
+void DIGESTInit(DIGEST_CTX *ctx)
+{
+  ctx->Init();
+}
+
+void DIGESTUpdate(DIGEST_CTX *ctx, const void *plaintext, int len)
+{
+  ctx->Update((const TaoCrypt::byte *)plaintext, len);
+}
+
+void DIGESTFinal(void *txt, DIGEST_CTX *ctx)
+{
+  ctx->Final((TaoCrypt::byte *)txt);
+}
+
+#endif // HAVE_YASSL
+
+static const char crypt_alg_magic[] = "$5";
+
+#ifndef MAX
+#define MAX(a, b)  (((a) > (b)) ? (a) : (b))
+#endif
+#ifndef MIN
+#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
+#endif
+
+
+/**
+  Size-bounded string copying and concatenation
+  This is a replacement for STRLCPY(3)
+*/
+
+size_t
+strlcat(char *dst, const char *src, size_t siz)
+{
+  char *d= dst;
+  const char *s= src;
+  size_t n= siz;
+  size_t dlen;
+  /* Find the end of dst and adjust bytes left but don't go past end */
+  while (n-- != 0 && *d != '\0')
+    d++;
+  dlen= d - dst;
+  n= siz - dlen;
+  if (n == 0)
+    return(dlen + siz);
+  while (*s != '\0')
+  {
+    if (n != 1)
+    {
+      *d++= *s;
+      n--;
+    }
+    s++;
+  }
+  *d= '\0';
+  return(dlen + (s - src));       /* count does not include NUL */
+}
+
+static const int crypt_alg_magic_len = sizeof (crypt_alg_magic) - 1;
+
+static unsigned char b64t[] =		/* 0 ... 63 => ascii - 64 */
+	"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+
+#define	b64_from_24bit(B2, B1, B0, N) \
+{ \
+	uint32 w = ((B2) << 16) | ((B1) << 8) | (B0); \
+	int n = (N); \
+	while (--n >= 0 && ctbufflen > 0) { \
+		*p++ = b64t[w & 0x3f]; \
+		w >>= 6; \
+		ctbufflen--; \
+} \
+}
+
+#define	ROUNDS		"rounds="
+#define	ROUNDSLEN	(sizeof (ROUNDS) - 1)
+
+/**
+  Get the integer value after rounds= where ever it occurs in the string.
+  if the last char after the int is a , or $ that is fine anything else is an
+  error.
+*/
+static uint32 getrounds(const char *s)
+{
+  const char *r;
+  const char *p;
+  char *e;
+  long val;
+
+  if (s == NULL)
+    return (0);
+
+  if ((r = strstr(s, ROUNDS)) == NULL)
+  {
+    return (0);
+  }
+
+  if (strncmp(r, ROUNDS, ROUNDSLEN) != 0)
+  {
+    return (0);
+  }
+
+  p= r + ROUNDSLEN;
+  errno= 0;
+  val= strtol(p, &e, 10);
+  /*
+    An error occurred or there is non-numeric stuff at the end
+    which isn't one of the crypt(3c) special chars ',' or '$'
+  */
+  if (errno != 0 || val < 0 || !(*e == '\0' || *e == ',' || *e == '$'))
+  {
+    return (0);
+  }
+
+  return ((uint32) val);
+}
+
+/**
+  Finds the interval which envelopes the user salt in a crypt password
+  The crypt format is assumed to be $a$bbbb$cccccc\0 and the salt is found
+  by counting the delimiters and marking begin and end.
+
+   @param salt_being[in]  Pointer to start of crypt passwd
+   @param salt_being[out] Pointer to first byte of the salt
+   @param salt_end[in]    Pointer to the last byte in passwd
+   @param salt_end[out]   Pointer to the byte immediatly following the salt ($)
+
+   @return The size of the salt identified
+*/
+
+int extract_user_salt(char **salt_begin,
+                      char **salt_end)
+{
+  char *it= *salt_begin;
+  int delimiter_count= 0;
+  while(it != *salt_end)
+  {
+    if (*it == '$')
+    {
+      ++delimiter_count;
+      if (delimiter_count == 2)
+      {
+        *salt_begin= it + 1;
+      }
+      if (delimiter_count == 3)
+        break;
+    }
+    ++it;
+  }
+  *salt_end= it;
+  return *salt_end - *salt_begin;
+}
+
+const char *sha256_find_digest(char *pass)
+{
+  int sz= strlen(pass);
+  return pass + sz - SHA256_HASH_LENGTH;
+}
+
+/*
+ * Portions of the below code come from crypt_bsdmd5.so (bsdmd5.c) :
+ * ----------------------------------------------------------------------------
+ * "THE BEER-WARE LICENSE" (Revision 42):
+ * <phk@login.dknet.dk> wrote this file.  As long as you retain this notice you
+ * can do whatever you want with this stuff. If we meet some day, and you think
+ * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
+ * ----------------------------------------------------------------------------
+ *
+ * $FreeBSD: crypt.c,v 1.5 1996/10/14 08:34:02 phk Exp $
+ *
+ */
+
+/*
+ * The below code implements the specification from:
+ *
+ * From http://people.redhat.com/drepper/SHA-crypt.txt
+ *
+ * Portions of the code taken from inspired by or verified against the
+ * source in the above document which is licensed as:
+ *
+ * "Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>."
+ */
+ 
+/*
+  Due to a Solaris namespace bug DS is a reserved word. To work around this
+  DS is undefined.
+*/
+#undef DS
+
+/* ARGSUSED4 */
+extern "C"
+char *
+my_crypt_genhash(char *ctbuffer,
+                   size_t ctbufflen,
+                   const char *plaintext,
+                   int plaintext_len,
+                   const char *switchsalt,
+                   const char **params)
+{
+  int salt_len, i;
+  char *salt;
+  unsigned char A[DIGEST_LEN];
+  unsigned char B[DIGEST_LEN];
+  unsigned char DP[DIGEST_LEN];
+  unsigned char DS[DIGEST_LEN];
+  DIGEST_CTX ctxA, ctxB, ctxC, ctxDP, ctxDS;
+  int rounds = ROUNDS_DEFAULT;
+  int srounds = 0;
+  bool custom_rounds= false;
+  char *p;
+  char *P, *Pp;
+  char *S, *Sp;
+
+  /* Refine the salt */
+  salt = (char *)switchsalt;
+
+  /* skip our magic string */
+  if (strncmp((char *)salt, crypt_alg_magic, crypt_alg_magic_len) == 0)
+  {
+          salt += crypt_alg_magic_len + 1;
+  }
+
+  srounds = getrounds(salt);
+  if (srounds != 0) {
+          rounds = MAX(ROUNDS_MIN, MIN(srounds, ROUNDS_MAX));
+          custom_rounds= true;
+          p = strchr(salt, '$');
+          if (p != NULL)
+                  salt = p + 1;
+  }
+
+  salt_len = MIN(strcspn(salt, "$"), CRYPT_SALT_LENGTH);
+  //plaintext_len = strlen(plaintext);
+
+  /* 1. */
+  DIGESTInit(&ctxA);
+
+  /* 2. The password first, since that is what is most unknown */
+  DIGESTUpdate(&ctxA, plaintext, plaintext_len);
+
+  /* 3. Then the raw salt */
+  DIGESTUpdate(&ctxA, salt, salt_len);
+
+  /* 4. - 8. */
+  DIGESTInit(&ctxB);
+  DIGESTUpdate(&ctxB, plaintext, plaintext_len);
+  DIGESTUpdate(&ctxB, salt, salt_len);
+  DIGESTUpdate(&ctxB, plaintext, plaintext_len);
+  DIGESTFinal(B, &ctxB);
+
+  /* 9. - 10. */
+  for (i= plaintext_len; i > MIXCHARS; i -= MIXCHARS)
+    DIGESTUpdate(&ctxA, B, MIXCHARS);
+  DIGESTUpdate(&ctxA, B, i);
+
+  /* 11. */
+  for (i= plaintext_len; i > 0; i >>= 1) {
+    if ((i & 1) != 0)
+    {
+      DIGESTUpdate(&ctxA, B, MIXCHARS);
+    }
+    else
+    {
+      DIGESTUpdate(&ctxA, plaintext, plaintext_len);
+    }
+  }
+
+  /* 12. */
+  DIGESTFinal(A, &ctxA);
+
+  /* 13. - 15. */
+  DIGESTInit(&ctxDP);
+  for (i= 0; i < plaintext_len; i++)
+          DIGESTUpdate(&ctxDP, plaintext, plaintext_len);
+  DIGESTFinal(DP, &ctxDP);
+
+  /* 16. */
+  Pp= P= (char *)alloca(plaintext_len);
+  for (i= plaintext_len; i >= MIXCHARS; i -= MIXCHARS)
+  {
+          Pp= (char *)(memcpy(Pp, DP, MIXCHARS)) + MIXCHARS;
+  }
+  (void) memcpy(Pp, DP, i);
+
+  /* 17. - 19. */
+  DIGESTInit(&ctxDS);
+  for (i= 0; i < 16 + (uint8)A[0]; i++)
+          DIGESTUpdate(&ctxDS, salt, salt_len);
+  DIGESTFinal(DS, &ctxDS);
+
+  /* 20. */
+  Sp= S= (char *)alloca(salt_len);
+  for (i= salt_len; i >= MIXCHARS; i -= MIXCHARS)
+  {
+          Sp= (char *)(memcpy(Sp, DS, MIXCHARS)) + MIXCHARS;
+  }
+  (void) memcpy(Sp, DS, i);
+
+  /*  21. */
+  for (i= 0; i < rounds; i++)
+  {
+  DIGESTInit(&ctxC);
+
+    if ((i & 1) != 0)
+    {
+      DIGESTUpdate(&ctxC, P, plaintext_len);
+    }
+    else
+    {
+      if (i == 0)
+        DIGESTUpdate(&ctxC, A, MIXCHARS);
+      else
+        DIGESTUpdate(&ctxC, DP, MIXCHARS);
+    }
+
+    if (i % 3 != 0) {
+      DIGESTUpdate(&ctxC, S, salt_len);
+    }
+
+    if (i % 7 != 0) {
+      DIGESTUpdate(&ctxC, P, plaintext_len);
+    }
+
+    if ((i & 1) != 0)
+    {
+      if (i == 0)
+        DIGESTUpdate(&ctxC, A, MIXCHARS);
+      else
+        DIGESTUpdate(&ctxC, DP, MIXCHARS);
+    }
+    else
+    {
+        DIGESTUpdate(&ctxC, P, plaintext_len);
+    }
+    DIGESTFinal(DP, &ctxC);
+  }
+
+  /* 22. Now make the output string */
+  if (custom_rounds)
+  {
+    (void) snprintf(ctbuffer, ctbufflen,
+                    "%s$rounds=%zu$", crypt_alg_magic, (size_t)rounds);
+  }
+  else
+  {
+    (void) snprintf(ctbuffer, ctbufflen,
+                    "%s$", crypt_alg_magic);
+  }
+  (void) strncat(ctbuffer, (const char *)salt, salt_len);
+  (void) strlcat(ctbuffer, "$", ctbufflen);
+
+  p= ctbuffer + strlen(ctbuffer);
+  ctbufflen -= strlen(ctbuffer);
+
+  b64_from_24bit(DP[ 0], DP[10], DP[20], 4);
+  b64_from_24bit(DP[21], DP[ 1], DP[11], 4);
+  b64_from_24bit(DP[12], DP[22], DP[ 2], 4);
+  b64_from_24bit(DP[ 3], DP[13], DP[23], 4);
+  b64_from_24bit(DP[24], DP[ 4], DP[14], 4);
+  b64_from_24bit(DP[15], DP[25], DP[ 5], 4);
+  b64_from_24bit(DP[ 6], DP[16], DP[26], 4);
+  b64_from_24bit(DP[27], DP[ 7], DP[17], 4);
+  b64_from_24bit(DP[18], DP[28], DP[ 8], 4);
+  b64_from_24bit(DP[ 9], DP[19], DP[29], 4);
+  b64_from_24bit(0, DP[31], DP[30], 3);
+  *p= '\0';
+
+  (void) memset(A, 0, sizeof (A));
+  (void) memset(B, 0, sizeof (B));
+  (void) memset(DP, 0, sizeof (DP));
+  (void) memset(DS, 0, sizeof (DS));
+
+  return (ctbuffer);
+}
+
+
+/**
+  Generate a random string using ASCII characters but avoid seperator character.
+  Stdlib rand and srand are used to produce pseudo random numbers between 
+  with about 7 bit worth of entropty between 1-127.
+*/
+extern "C"
+void generate_user_salt(char *buffer, int buffer_len)
+{
+  char *end= buffer + buffer_len - 1;
+#ifdef HAVE_YASSL
+  yaSSL::RAND_bytes((unsigned char *) buffer, buffer_len);
+#else
+  RAND_bytes((unsigned char *) buffer, buffer_len);
+#endif
+      
+  /* Sequence must be a legal UTF8 string */
+  for (; buffer < end; buffer++)
+  { 
+    *buffer &= 0x7f;
+    if (*buffer == '\0' || *buffer == '$')
+      *buffer= *buffer + 1;
+  }
+  /* Make sure the buffer is terminated properly */
+  *end= '\0';
+}
+
+void xor_string(char *to, int to_len, char *pattern, int pattern_len)
+{
+  int loop= 0;
+  while(loop <= to_len)
+  {
+    *(to + loop) ^= *(pattern + loop % pattern_len);
+    ++loop;
+  }
+}
+
+#endif // HAVE_OPENSSL
diff --git a/mysys_ssl/my_aes.cc b/mysys_ssl/my_aes.cc
new file mode 100644
index 00000000000..9327bc32a3b
--- /dev/null
+++ b/mysys_ssl/my_aes.cc
@@ -0,0 +1,278 @@
+/* Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
+
+
+#include <my_global.h>
+#include <m_string.h>
+#include <my_aes.h>
+
+#if defined(HAVE_YASSL)
+#include "aes.hpp"
+#include "openssl/ssl.h"
+#elif defined(HAVE_OPENSSL)
+#include <openssl/aes.h>
+#include <openssl/evp.h>
+
+// Wrap C struct, to ensure resources are released.
+struct MyCipherCtx
+{
+  MyCipherCtx() { memset(&ctx, 0, sizeof(ctx)); }
+  ~MyCipherCtx() { EVP_CIPHER_CTX_cleanup(&ctx); }
+
+  EVP_CIPHER_CTX ctx;
+};
+#endif
+
+enum encrypt_dir { MY_AES_ENCRYPT, MY_AES_DECRYPT };
+
+#define MY_AES_BLOCK_SIZE 16                    /* Block size in bytes */
+
+/* If bad data discovered during decoding */
+#define AES_BAD_DATA  -1
+
+/**
+  This is internal function just keeps joint code of Key generation
+
+  SYNOPSIS
+    my_aes_create_key()
+    @param key        [in]       Key to use for real key creation
+    @param key_length [in]       Length of the key
+    @param rkey       [out]      Real key (used by OpenSSL/YaSSL)
+
+  @return
+    0         Ok
+    -1        Error; Note: The current impementation never returns this
+*/
+
+static int my_aes_create_key(const char *key, int key_length, uint8 *rkey)
+{
+  uint8 *rkey_end= rkey + AES_KEY_LENGTH / 8;   /* Real key boundary */
+  uint8 *ptr;                                   /* Start of the real key*/
+  const char *sptr;                             /* Start of the working key */
+  const char *key_end= key + key_length;        /* Working key boundary*/
+
+  memset(rkey, 0, AES_KEY_LENGTH / 8);          /* Set initial key  */
+
+  for (ptr= rkey, sptr= key; sptr < key_end; ptr ++, sptr ++)
+  {
+    if (ptr == rkey_end)
+      /*  Just loop over tmp_key until we used all key */
+      ptr= rkey;
+    *ptr ^= (uint8) *sptr;
+  }
+#ifdef AES_USE_KEY_BITS
+  /*
+   This block is intended to allow more weak encryption if application
+   build with libmysqld needs to correspond to export regulations
+   It should be never used in normal distribution as does not give
+   any speed improvement.
+   To get worse security define AES_USE_KEY_BITS to number of bits
+   you want key to be. It should be divisible by 8
+
+   WARNING: Changing this value results in changing of enryption for
+   all key lengths  so altering this value will result in impossibility
+   to decrypt data encrypted with previous value
+  */
+#define AES_USE_KEY_BYTES (AES_USE_KEY_BITS/8)
+  /*
+   To get weaker key we use first AES_USE_KEY_BYTES bytes of created key
+   and cyclically copy them until we created all required key length
+  */
+  for (ptr= rkey+AES_USE_KEY_BYTES, sptr=rkey ; ptr < rkey_end;
+       ptr ++, sptr ++)
+  {
+    if (sptr == rkey + AES_USE_KEY_BYTES)
+      sptr= rkey;
+    *ptr= *sptr;
+  }
+#endif
+  return 0;
+}
+
+
+/**
+  Crypt buffer with AES encryption algorithm.
+
+  SYNOPSIS
+     my_aes_encrypt()
+     @param source         [in]  Pointer to data for encryption
+     @param source_length  [in]  Size of encryption data
+     @param dest           [out] Buffer to place encrypted data (must be large enough)
+     @param key            [in]  Key to be used for encryption
+     @param key_length     [in]  Length of the key. Will handle keys of any length
+
+  @return
+    >= 0             Size of encrypted data
+    < 0              Error
+*/
+
+int my_aes_encrypt(const char* source, int source_length, char* dest,
+                   const char* key, int key_length)
+{
+#if defined(HAVE_YASSL)
+  TaoCrypt::AES_ECB_Encryption enc;
+  /* 128 bit block used for padding */
+  uint8 block[MY_AES_BLOCK_SIZE];
+  int num_blocks;                               /* number of complete blocks */
+  int i;
+#elif defined(HAVE_OPENSSL)
+  MyCipherCtx ctx;
+  int u_len, f_len;
+#endif
+
+  /* The real key to be used for encryption */
+  uint8 rkey[AES_KEY_LENGTH / 8];
+  int rc;                                       /* result codes */
+
+  if ((rc= my_aes_create_key(key, key_length, rkey)))
+    return rc;
+
+#if defined(HAVE_YASSL)
+  enc.SetKey((const TaoCrypt::byte *) rkey, MY_AES_BLOCK_SIZE);
+
+  num_blocks = source_length / MY_AES_BLOCK_SIZE;
+
+  for (i = num_blocks; i > 0; i--)              /* Encode complete blocks */
+  {
+    enc.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) source,
+                MY_AES_BLOCK_SIZE);
+    source += MY_AES_BLOCK_SIZE;
+    dest += MY_AES_BLOCK_SIZE;
+  }
+
+  /* Encode the rest. We always have incomplete block */
+  char pad_len = MY_AES_BLOCK_SIZE - (source_length -
+                                      MY_AES_BLOCK_SIZE * num_blocks);
+  memcpy(block, source, 16 - pad_len);
+  memset(block + MY_AES_BLOCK_SIZE - pad_len, pad_len,  pad_len);
+
+  enc.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) block,
+              MY_AES_BLOCK_SIZE);
+
+  return MY_AES_BLOCK_SIZE * (num_blocks + 1);
+#elif defined(HAVE_OPENSSL)
+  if (! EVP_EncryptInit(&ctx.ctx, EVP_aes_128_ecb(),
+                        (const unsigned char *) rkey, NULL))
+    return AES_BAD_DATA;                        /* Error */
+  if (! EVP_EncryptUpdate(&ctx.ctx, (unsigned char *) dest, &u_len,
+                          (unsigned const char *) source, source_length))
+    return AES_BAD_DATA;                        /* Error */
+  if (! EVP_EncryptFinal(&ctx.ctx, (unsigned char *) dest + u_len, &f_len))
+    return AES_BAD_DATA;                        /* Error */
+
+  return u_len + f_len;
+#endif
+}
+
+
+/**
+  DeCrypt buffer with AES encryption algorithm.
+
+  SYNOPSIS
+    my_aes_decrypt()
+    @param source        [in]   Pointer to data for decryption
+    @param source_length [in]   Size of encrypted data
+    @param dest          [out]  Buffer to place decrypted data (must
+                                be large enough)
+    @param key           [in]   Key to be used for decryption
+    @param key_length    [in]   Length of the key. Will handle keys of any length
+
+  @return
+    >= 0             Size of encrypted data
+    < 0              Error
+*/
+
+int my_aes_decrypt(const char *source, int source_length, char *dest,
+                   const char *key, int key_length)
+{
+#if defined(HAVE_YASSL)
+  TaoCrypt::AES_ECB_Decryption dec;
+  /* 128 bit block used for padding */
+  uint8 block[MY_AES_BLOCK_SIZE];
+  int num_blocks;                               /* Number of complete blocks */
+  int i;
+#elif defined(HAVE_OPENSSL)
+  MyCipherCtx ctx;
+  int u_len, f_len;
+#endif
+
+  /* The real key to be used for decryption */
+  uint8 rkey[AES_KEY_LENGTH / 8];
+  int rc;                                       /* Result codes */
+
+  if ((rc= my_aes_create_key(key, key_length, rkey)))
+    return rc;
+
+#if defined(HAVE_YASSL)
+  dec.SetKey((const TaoCrypt::byte *) rkey, MY_AES_BLOCK_SIZE);
+
+  num_blocks = source_length / MY_AES_BLOCK_SIZE;
+
+  if ((source_length != num_blocks * MY_AES_BLOCK_SIZE) || num_blocks == 0 )
+    /* Input size has to be even and at least one block */
+    return AES_BAD_DATA;
+
+  /* Decode all but last blocks */
+  for (i = num_blocks - 1; i > 0; i--)
+  {
+    dec.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) source,
+                MY_AES_BLOCK_SIZE);
+    source += MY_AES_BLOCK_SIZE;
+    dest += MY_AES_BLOCK_SIZE;
+  }
+
+  dec.Process((TaoCrypt::byte *) block, (const TaoCrypt::byte *) source,
+              MY_AES_BLOCK_SIZE);
+
+  /* Use last char in the block as size */
+  uint pad_len = (uint) (uchar) block[MY_AES_BLOCK_SIZE - 1];
+
+  if (pad_len > MY_AES_BLOCK_SIZE)
+    return AES_BAD_DATA;
+  /* We could also check whole padding but we do not really need this */
+
+  memcpy(dest, block, MY_AES_BLOCK_SIZE - pad_len);
+  return MY_AES_BLOCK_SIZE * num_blocks - pad_len;
+#elif defined(HAVE_OPENSSL)
+  if (! EVP_DecryptInit(&ctx.ctx, EVP_aes_128_ecb(),
+                        (const unsigned char *) rkey, NULL))
+    return AES_BAD_DATA;                        /* Error */
+  if (! EVP_DecryptUpdate(&ctx.ctx, (unsigned char *) dest, &u_len,
+                          (unsigned const char *) source, source_length))
+    return AES_BAD_DATA;                        /* Error */
+  if (! EVP_DecryptFinal(&ctx.ctx, (unsigned char *) dest + u_len, &f_len))
+    return AES_BAD_DATA;                        /* Error */
+  return u_len + f_len;
+#endif
+}
+
+
+/**
+  Get size of buffer which will be large enough for encrypted data
+
+  SYNOPSIS
+    my_aes_get_size()
+    @param source_length  [in] Length of data to be encrypted
+
+  @return
+    Size of buffer required to store encrypted data
+*/
+
+int my_aes_get_size(int source_length)
+{
+  return MY_AES_BLOCK_SIZE * (source_length / MY_AES_BLOCK_SIZE)
+    + MY_AES_BLOCK_SIZE;
+}
+
diff --git a/mysys_ssl/my_md5.cc b/mysys_ssl/my_md5.cc
new file mode 100644
index 00000000000..4c14366a4e3
--- /dev/null
+++ b/mysys_ssl/my_md5.cc
@@ -0,0 +1,68 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+
+/**
+  @file
+
+  @brief
+  Wrapper functions for OpenSSL and YaSSL. Also provides a Compatibility layer
+  to make available YaSSL's MD5 implementation.
+*/
+
+#include <my_global.h>
+#include <my_md5.h>
+
+#if defined(HAVE_YASSL)
+#include "my_config.h"
+#include "md5.hpp"
+
+static void my_md5_hash(char *digest, const char *buf, int len)
+{
+  TaoCrypt::MD5 hasher;
+  hasher.Update((TaoCrypt::byte *) buf, len);
+  hasher.Final((TaoCrypt::byte *) digest);
+}
+
+#elif defined(HAVE_OPENSSL)
+#include <openssl/md5.h>
+
+static void my_md5_hash(unsigned char* digest, unsigned const char *buf, int len)
+{
+  MD5_CTX ctx;
+  MD5_Init (&ctx);
+  MD5_Update (&ctx, buf, len);
+  MD5_Final (digest, &ctx);
+}
+
+#endif /* HAVE_YASSL */
+
+/**
+    Wrapper function to compute MD5 message digest.
+
+    @param digest [out]  Computed MD5 digest
+    @param buf    [in]   Message to be computed
+    @param len    [in]   Length of the message
+
+    @return              void
+*/
+void compute_md5_hash(char *digest, const char *buf, int len)
+{
+#if defined(HAVE_YASSL)
+  my_md5_hash(digest, buf, len);
+#elif defined(HAVE_OPENSSL)
+  my_md5_hash((unsigned char*)digest, (unsigned const char*)buf, len);
+#endif /* HAVE_YASSL */
+}
diff --git a/mysys_ssl/my_rnd.cc b/mysys_ssl/my_rnd.cc
new file mode 100644
index 00000000000..aa8fb63cd4d
--- /dev/null
+++ b/mysys_ssl/my_rnd.cc
@@ -0,0 +1,103 @@
+/*
+   Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
+
+#include <my_global.h>
+#include <my_rnd.h>
+#include <m_string.h>
+
+#if defined(HAVE_YASSL)
+#if defined(YASSL_PREFIX)
+#define RAND_bytes yaRAND_bytes
+#endif /* YASSL_PREFIX */
+
+#include <openssl/ssl.h>
+
+#elif defined(HAVE_OPENSSL)
+#include <openssl/rand.h>
+#endif /* HAVE_YASSL */
+
+
+/*
+  A wrapper to use OpenSSL/yaSSL PRNGs.
+*/
+
+extern "C" {
+
+/*
+  Initialize random generator
+
+  NOTES
+    MySQL's password checks depends on this, so don't do any changes
+    that changes the random numbers that are generated!
+*/
+
+void my_rnd_init(struct my_rnd_struct *rand_st, ulong seed1, ulong seed2)
+{
+#ifdef HAVE_valgrind
+  bzero((char*) rand_st,sizeof(*rand_st));      /* Avoid UMC varnings */
+#endif
+  rand_st->max_value= 0x3FFFFFFFL;
+  rand_st->max_value_dbl=(double) rand_st->max_value;
+  rand_st->seed1=seed1%rand_st->max_value ;
+  rand_st->seed2=seed2%rand_st->max_value;
+}
+
+/**
+  Generate random number.
+
+  @param rand_st [INOUT] Structure used for number generation.
+
+  @retval                Generated pseudo random number.
+*/
+
+double my_rnd(struct my_rnd_struct *rand_st)
+{
+  rand_st->seed1= (rand_st->seed1*3+rand_st->seed2) % rand_st->max_value;
+  rand_st->seed2= (rand_st->seed1+rand_st->seed2+33) % rand_st->max_value;
+  return (((double) rand_st->seed1) / rand_st->max_value_dbl);
+}
+
+/**
+  Generate a random number using the OpenSSL/yaSSL supplied
+  random number generator if available.
+
+  @param rand_st [INOUT] Structure used for number generation
+                         only if none of the SSL libraries are
+                         available.
+
+  @retval                Generated random number.
+*/
+
+double my_rnd_ssl(struct my_rnd_struct *rand_st)
+{
+
+#if defined(HAVE_YASSL) || defined(HAVE_OPENSSL)
+  int rc;
+  unsigned int res;
+
+#if defined(HAVE_YASSL)
+  rc= yaSSL::RAND_bytes((unsigned char *) &res, sizeof (unsigned int));
+#else
+  rc= RAND_bytes((unsigned char *) &res, sizeof (unsigned int));
+#endif /* HAVE_YASSL */
+  if (rc)
+    return (double)res / (double)UINT_MAX;
+
+#endif /* defined(HAVE_YASSL) || defined(HAVE_OPENSSL) */
+  return my_rnd(rand_st);
+}
+
+}
diff --git a/mysys_ssl/my_sha1.cc b/mysys_ssl/my_sha1.cc
new file mode 100644
index 00000000000..1c4bf7c9747
--- /dev/null
+++ b/mysys_ssl/my_sha1.cc
@@ -0,0 +1,141 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+
+/**
+  @file
+
+  @brief
+  Wrapper functions for OpenSSL, YaSSL implementations. Also provides a
+  Compatibility layer to make available YaSSL's SHA1 implementation.
+*/
+
+#include <my_global.h>
+#include <sha1.h>
+
+#if defined(HAVE_YASSL)
+#include "sha.hpp"
+
+/**
+  Compute SHA1 message digest using YaSSL.
+
+  @param digest [out]  Computed SHA1 digest
+  @param buf    [in]   Message to be computed
+  @param len    [in]   Length of the message
+
+  @return              void
+*/
+void mysql_sha1_yassl(uint8 *digest, const char *buf, int len)
+{
+  TaoCrypt::SHA hasher;
+  hasher.Update((const TaoCrypt::byte *) buf, len);
+  hasher.Final ((TaoCrypt::byte *) digest);
+}
+
+/**
+  Compute SHA1 message digest for two messages in order to
+  emulate sha1(msg1, msg2) using YaSSL.
+
+  @param digest [out]  Computed SHA1 digest
+  @param buf1   [in]   First message
+  @param len1   [in]   Length of first message
+  @param buf2   [in]   Second message
+  @param len2   [in]   Length of second message
+
+  @return              void
+*/
+void mysql_sha1_multi_yassl(uint8 *digest, const char *buf1, int len1,
+                            const char *buf2, int len2)
+{
+  TaoCrypt::SHA hasher;
+  hasher.Update((const TaoCrypt::byte *) buf1, len1);
+  hasher.Update((const TaoCrypt::byte *) buf2, len2);
+  hasher.Final((TaoCrypt::byte *) digest);
+}
+
+#elif defined(HAVE_OPENSSL)
+#include <openssl/sha.h>
+
+int mysql_sha1_reset(SHA_CTX *context)
+{
+    return SHA1_Init(context);
+}
+
+
+int mysql_sha1_input(SHA_CTX *context, const uint8 *message_array,
+                     unsigned length)
+{
+    return SHA1_Update(context, message_array, length);
+}
+
+
+int mysql_sha1_result(SHA_CTX *context,
+                      uint8 Message_Digest[SHA1_HASH_SIZE])
+{
+    return SHA1_Final(Message_Digest, context);
+}
+
+#endif /* HAVE_YASSL */
+
+/**
+  Wrapper function to compute SHA1 message digest.
+
+  @param digest [out]  Computed SHA1 digest
+  @param buf    [in]   Message to be computed
+  @param len    [in]   Length of the message
+
+  @return              void
+*/
+void compute_sha1_hash(uint8 *digest, const char *buf, int len)
+{
+#if defined(HAVE_YASSL)
+  mysql_sha1_yassl(digest, buf, len);
+#elif defined(HAVE_OPENSSL)
+  SHA_CTX sha1_context;
+
+  mysql_sha1_reset(&sha1_context);
+  mysql_sha1_input(&sha1_context, (const uint8 *) buf, len);
+  mysql_sha1_result(&sha1_context, digest);
+#endif /* HAVE_YASSL */
+}
+
+
+/**
+  Wrapper function to compute SHA1 message digest for
+  two messages in order to emulate sha1(msg1, msg2).
+
+  @param digest [out]  Computed SHA1 digest
+  @param buf1   [in]   First message
+  @param len1   [in]   Length of first message
+  @param buf2   [in]   Second message
+  @param len2   [in]   Length of second message
+
+  @return              void
+*/
+void compute_sha1_hash_multi(uint8 *digest, const char *buf1, int len1,
+                             const char *buf2, int len2)
+{
+#if defined(HAVE_YASSL)
+  mysql_sha1_multi_yassl(digest, buf1, len1, buf2, len2);
+#elif defined(HAVE_OPENSSL)
+  SHA_CTX sha1_context;
+
+  mysql_sha1_reset(&sha1_context);
+  mysql_sha1_input(&sha1_context, (const uint8 *) buf1, len1);
+  mysql_sha1_input(&sha1_context, (const uint8 *) buf2, len2);
+  mysql_sha1_result(&sha1_context, digest);
+#endif /* HAVE_YASSL */
+}
+
diff --git a/mysys_ssl/my_sha2.cc b/mysys_ssl/my_sha2.cc
new file mode 100644
index 00000000000..00200337f08
--- /dev/null
+++ b/mysys_ssl/my_sha2.cc
@@ -0,0 +1,68 @@
+/* Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+
+/**
+  @file
+  A compatibility layer to our built-in SSL implementation, to mimic the
+  oft-used external library, OpenSSL.
+*/
+
+#include <my_global.h>
+#include <sha2.h>
+
+#ifdef HAVE_YASSL
+
+/*
+  If TaoCrypt::SHA512 or ::SHA384 are not defined (but ::SHA256 is), it's
+  probably that neither of config.h's SIZEOF_LONG or SIZEOF_LONG_LONG are
+  64 bits long.  At present, both OpenSSL and YaSSL require 64-bit integers
+  for SHA-512.  (The SIZEOF_* definitions come from autoconf's config.h .)
+*/
+
+#  define GEN_YASSL_SHA2_BRIDGE(size) \
+unsigned char* SHA##size(const unsigned char *input_ptr, size_t input_length, \
+               char unsigned *output_ptr) {                         \
+  TaoCrypt::SHA##size hasher;                                       \
+                                                                    \
+  hasher.Update(input_ptr, input_length);                           \
+  hasher.Final(output_ptr);                                         \
+  return(output_ptr);                                               \
+}
+
+
+/**
+  @fn SHA512
+  @fn SHA384
+  @fn SHA256
+  @fn SHA224
+
+  Instantiate an hash object, fill in the cleartext value, compute the digest,
+  and extract the result from the object.
+  
+  (Generate the functions.  See similar .h code for the prototypes.)
+*/
+#  ifndef OPENSSL_NO_SHA512
+GEN_YASSL_SHA2_BRIDGE(512);
+GEN_YASSL_SHA2_BRIDGE(384);
+#  else
+#    warning Some SHA2 functionality is missing.  See OPENSSL_NO_SHA512.
+#  endif
+GEN_YASSL_SHA2_BRIDGE(256);
+GEN_YASSL_SHA2_BRIDGE(224);
+
+#  undef GEN_YASSL_SHA2_BRIDGE
+
+#endif /* HAVE_YASSL */
diff --git a/sql-common/client.c b/sql-common/client.c
index e03f5236fef..e9e6b857dd8 100644
--- a/sql-common/client.c
+++ b/sql-common/client.c
@@ -36,7 +36,7 @@
 */ 
 
 #include <my_global.h>
-
+#include <my_default.h>
 #include "mysql.h"
 
 /* Remove client convenience wrappers */
diff --git a/sql-common/client_authentication.cc b/sql-common/client_authentication.cc
new file mode 100644
index 00000000000..195f37bcc59
--- /dev/null
+++ b/sql-common/client_authentication.cc
@@ -0,0 +1,253 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates.
+   Copyright (c) 2013, Monty Program Ab
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02111-1307  USA */
+
+#include <my_global.h>
+
+#if defined(HAVE_OPENSSL)
+#include "crypt_genhash_impl.h"
+#include "mysql/client_authentication.h"
+#include "m_ctype.h"
+#include "sql_common.h"
+#include "errmsg.h"
+#include "m_string.h"
+#include <string.h>
+
+#if !defined(HAVE_YASSL)
+#include <openssl/rsa.h>
+#include <openssl/pem.h>
+#include <openssl/err.h>
+#if defined(_WIN32) && !defined(_OPENSSL_Applink) && defined(HAVE_OPENSSL_APPLINK_C)
+#include <openssl/applink.c>
+#endif
+#endif
+#include "mysql/service_my_plugin_log.h"
+
+#define MAX_CIPHER_LENGTH 1024
+
+#if !defined(HAVE_YASSL)
+mysql_mutex_t g_public_key_mutex;
+#endif
+
+int sha256_password_init(char *a, size_t b, int c, va_list d)
+{
+#if !defined(HAVE_YASSL)
+  mysql_mutex_init(0,&g_public_key_mutex, MY_MUTEX_INIT_SLOW);
+#endif
+  return 0;
+}
+
+int sha256_password_deinit(void)
+{
+#if !defined(HAVE_YASSL)
+  mysql_mutex_destroy(&g_public_key_mutex);
+#endif
+  return 0;
+}
+
+
+#if !defined(HAVE_YASSL)
+/**
+  Reads and parse RSA public key data from a file.
+
+  @param mysql connection handle with file path data
+ 
+  @return Pointer to the RSA public key storage buffer
+*/
+
+RSA *rsa_init(MYSQL *mysql)
+{
+  static RSA *g_public_key= NULL;
+  RSA *key= NULL;
+
+  mysql_mutex_lock(&g_public_key_mutex);
+  key= g_public_key;
+  mysql_mutex_unlock(&g_public_key_mutex);
+
+  if (key != NULL)
+    return key;
+
+  FILE *pub_key_file= NULL;
+
+  if (mysql->options.extension != NULL &&
+      mysql->options.extension->server_public_key_path != NULL &&
+      mysql->options.extension->server_public_key_path != '\0')
+  {
+    pub_key_file= fopen(mysql->options.extension->server_public_key_path,
+                        "r");
+  }
+  /* No public key is used; return 0 without errors to indicate this. */
+  else
+    return 0;
+
+  if (pub_key_file == NULL)
+  {
+    /*
+      If a key path was submitted but no key located then we print an error
+      message. Else we just report that there is no public key.
+    */
+    fprintf(stderr,"Can't locate server public key '%s'\n",
+              mysql->options.extension->server_public_key_path);
+
+    return 0;
+  }
+  
+  mysql_mutex_lock(&g_public_key_mutex);
+  key= g_public_key= PEM_read_RSA_PUBKEY(pub_key_file, 0, 0, 0);
+  mysql_mutex_unlock(&g_public_key_mutex);
+  fclose(pub_key_file);
+  if (g_public_key == NULL)
+  {
+    fprintf(stderr, "Public key is not in PEM format: '%s'\n",
+            mysql->options.extension->server_public_key_path);
+    return 0;
+  }
+
+  return key;
+}
+#endif // !defined(HAVE_YASSL)
+
+/**
+  Authenticate the client using the RSA or TLS and a SHA256 salted password.
+ 
+  @param vio Provides plugin access to communication channel
+  @param mysql Client connection handler
+
+  @return Error status
+    @retval CR_ERROR An error occurred.
+    @retval CR_OK Authentication succeeded.
+*/
+
+extern "C"
+int sha256_password_auth_client(MYSQL_PLUGIN_VIO *vio, MYSQL *mysql)
+{
+  bool uses_password= mysql->passwd[0] != 0;
+#if !defined(HAVE_YASSL)
+  unsigned char encrypted_password[MAX_CIPHER_LENGTH];
+  static char request_public_key= '\1';
+  RSA *public_key= NULL;
+  bool got_public_key_from_server= false;
+#endif
+  bool connection_is_secure= false;
+  unsigned char scramble_pkt[20];
+  unsigned char *pkt;
+
+
+  DBUG_ENTER("sha256_password_auth_client");
+
+  /*
+    Get the scramble from the server because we need it when sending encrypted
+    password.
+  */
+  if (vio->read_packet(vio, &pkt) != SCRAMBLE_LENGTH)
+  {
+    DBUG_PRINT("info",("Scramble is not of correct length."));
+    DBUG_RETURN(CR_ERROR);
+  }
+  /*
+    Copy the scramble to the stack or it will be lost on the next use of the 
+    net buffer.
+  */
+  memcpy(scramble_pkt, pkt, SCRAMBLE_LENGTH);
+
+  if (mysql_get_ssl_cipher(mysql) != NULL)
+    connection_is_secure= true;
+  
+  /* If connection isn't secure attempt to get the RSA public key file */
+  if (!connection_is_secure)
+  {
+ #if !defined(HAVE_YASSL)
+    public_key= rsa_init(mysql);
+#endif
+  }
+
+  if (!uses_password)
+  {
+    /* We're not using a password */
+    static const unsigned char zero_byte= '\0'; 
+    if (vio->write_packet(vio, (const unsigned char *) &zero_byte, 1))
+      DBUG_RETURN(CR_ERROR);
+  }
+  else
+  {
+    /* Password is a 0-terminated byte array ('\0' character included) */
+    unsigned int passwd_len= strlen(mysql->passwd) + 1;
+    if (!connection_is_secure)
+    {
+#if !defined(HAVE_YASSL)
+      /*
+        If no public key; request one from the server.
+      */
+      if (public_key == NULL)
+      {
+        if (vio->write_packet(vio, (const unsigned char *) &request_public_key,
+                              1))
+          DBUG_RETURN(CR_ERROR);
+      
+        int pkt_len= 0;
+        unsigned char *pkt;
+        if ((pkt_len= vio->read_packet(vio, &pkt)) == -1)
+          DBUG_RETURN(CR_ERROR);
+        BIO* bio= BIO_new_mem_buf(pkt, pkt_len);
+        public_key= PEM_read_bio_RSA_PUBKEY(bio, NULL, NULL, NULL);
+        BIO_free(bio);
+        if (public_key == 0)
+          DBUG_RETURN(CR_ERROR);
+        got_public_key_from_server= true;
+      }
+      
+      /* Obfuscate the plain text password with the session scramble */
+      xor_string(mysql->passwd, strlen(mysql->passwd), (char *) scramble_pkt,
+                 SCRAMBLE_LENGTH);
+      /* Encrypt the password and send it to the server */
+      int cipher_length= RSA_size(public_key);
+      /*
+        When using RSA_PKCS1_OAEP_PADDING the password length must be less
+        than RSA_size(rsa) - 41.
+      */
+      if (passwd_len + 41 >= (unsigned) cipher_length)
+      {
+        /* password message is to long */
+        DBUG_RETURN(CR_ERROR);
+      }
+      RSA_public_encrypt(passwd_len, (unsigned char *) mysql->passwd,
+                         encrypted_password,
+                         public_key, RSA_PKCS1_OAEP_PADDING);
+      if (got_public_key_from_server)
+        RSA_free(public_key);
+
+      if (vio->write_packet(vio, (uchar*) encrypted_password, cipher_length))
+        DBUG_RETURN(CR_ERROR);
+#else
+      set_mysql_extended_error(mysql, CR_AUTH_PLUGIN_ERR, unknown_sqlstate,
+                                ER(CR_AUTH_PLUGIN_ERR), "sha256_password",
+                                "Authentication requires SSL encryption");
+      DBUG_RETURN(CR_ERROR); // If no openssl support
+#endif
+    }
+    else
+    {
+      /* The vio is encrypted already; just send the plain text passwd */
+      if (vio->write_packet(vio, (uchar*) mysql->passwd, passwd_len))
+        DBUG_RETURN(CR_ERROR);
+    }
+    
+    memset(mysql->passwd, 0, passwd_len);
+  }
+    
+  DBUG_RETURN(CR_OK);
+}
+
+#endif
diff --git a/sql-common/my_time.c b/sql-common/my_time.c
index fbcf52dbf19..640d52dab16 100644
--- a/sql-common/my_time.c
+++ b/sql-common/my_time.c
@@ -126,7 +126,7 @@ static int get_number(uint *val, uint *number_of_fields, const char **str,
 static int get_digits(uint *val, uint *number_of_fields, const char **str,
                       const char *end, uint length)
 {
-  return get_number(val, number_of_fields, str, min(end, *str + length));
+  return get_number(val, number_of_fields, str, MY_MIN(end, *str + length));
 }
 
 static int get_punct(const char **str, const char *end)
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 8c2b6c81755..070afbd9c38 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -106,7 +106,7 @@ ADD_LIBRARY(sql STATIC ${SQL_SOURCE})
 ADD_DEPENDENCIES(sql GenServerSource)
 DTRACE_INSTRUMENT(sql)
 TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS} 
-  mysys dbug strings vio regex
+  mysys mysys_ssl dbug strings vio regex
   ${LIBWRAP} ${LIBCRYPT} ${LIBDL}
   ${SSL_LIBRARIES})
 
@@ -141,7 +141,7 @@ IF(NOT WITHOUT_DYNAMIC_PLUGINS)
     # incremental appears to crash from time to time,if used with /DEF option
     SET_TARGET_PROPERTIES(mysqld PROPERTIES LINK_FLAGS "${mysqld_link_flags} /DEF:mysqld.def /INCREMENTAL:NO")
 
-    FOREACH (CORELIB sql mysys dbug strings)
+    FOREACH (CORELIB sql mysys mysys_ssl dbug strings)
       GET_TARGET_PROPERTY(LOC ${CORELIB} LOCATION)
       FILE(TO_NATIVE_PATH ${LOC} LOC)
       SET (LIB_LOCATIONS ${LIB_LOCATIONS} ${LOC}) 
@@ -171,7 +171,7 @@ ENDIF()
 # On Solaris, some extra effort is required in order to get dtrace probes
 # from static libraries
 DTRACE_INSTRUMENT_STATIC_LIBS(mysqld 
- "sql;mysys;${MYSQLD_STATIC_PLUGIN_LIBS}")
+ "sql;mysys;mysys_ssl;${MYSQLD_STATIC_PLUGIN_LIBS}")
  
 
 SET(WITH_MYSQLD_LDFLAGS "" CACHE STRING "Additional linker flags for mysqld")
@@ -222,7 +222,7 @@ ADD_CUSTOM_COMMAND(
 
 MYSQL_ADD_EXECUTABLE(mysql_tzinfo_to_sql tztime.cc COMPONENT Server)
 SET_TARGET_PROPERTIES(mysql_tzinfo_to_sql PROPERTIES COMPILE_FLAGS "-DTZINFO2SQL")
-TARGET_LINK_LIBRARIES(mysql_tzinfo_to_sql mysys)
+TARGET_LINK_LIBRARIES(mysql_tzinfo_to_sql mysys mysys_ssl)
 
 ADD_CUSTOM_TARGET( 
         GenServerSource
diff --git a/sql/debug_sync.cc b/sql/debug_sync.cc
index 25f028e5451..5e439839bca 100644
--- a/sql/debug_sync.cc
+++ b/sql/debug_sync.cc
@@ -38,7 +38,7 @@
 */
 struct st_debug_sync_action
 {
-  ulong         activation_count;       /* max(hit_limit, execute) */
+  ulong         activation_count;       /* MY_MAX(hit_limit, execute) */
   ulong         hit_limit;              /* hits before kill query */
   ulong         execute;                /* executes before self-clear */
   ulong         timeout;                /* wait_for timeout */
@@ -734,6 +734,11 @@ static st_debug_sync_action *debug_sync_get_action(THD *thd,
 
 static bool debug_sync_set_action(THD *thd, st_debug_sync_action *action)
 {
+  if(!thd)
+  {
+    return;
+  }
+
   st_debug_sync_control *ds_control= thd->debug_sync_control;
   bool is_dsp_now= FALSE;
   DBUG_ENTER("debug_sync_set_action");
@@ -741,7 +746,7 @@ static bool debug_sync_set_action(THD *thd, st_debug_sync_action *action)
   DBUG_ASSERT(action);
   DBUG_ASSERT(ds_control);
 
-  action->activation_count= max(action->hit_limit, action->execute);
+  action->activation_count= MY_MAX(action->hit_limit, action->execute);
   if (!action->activation_count)
   {
     debug_sync_remove_action(ds_control, action);
@@ -1521,9 +1526,10 @@ static void debug_sync_execute(THD *thd, st_debug_sync_action *action)
 static void debug_sync(THD *thd, const char *sync_point_name, size_t name_len)
 {
   if (!thd)
-    thd= current_thd;
-  if (!thd)
-    return;
+  {
+    if (!(thd= current_thd))
+      return;
+  }
 
   st_debug_sync_control *ds_control= thd->debug_sync_control;
   st_debug_sync_action  *action;
diff --git a/sql/derror.cc b/sql/derror.cc
index 665427f45bc..abe642bea79 100644
--- a/sql/derror.cc
+++ b/sql/derror.cc
@@ -146,8 +146,8 @@ bool read_texts(const char *file_name, const char *language,
                 const char ***point, uint error_messages)
 {
   register uint i;
-  uint count,funktpos,textcount;
-  size_t length;
+  uint count,funktpos;
+  size_t offset, length;
   File file;
   char name[FN_REFLEN];
   char lang_path[FN_REFLEN];
@@ -186,9 +186,8 @@ bool read_texts(const char *file_name, const char *language,
     goto err;
   funktpos=2;
   if (head[0] != (uchar) 254 || head[1] != (uchar) 254 ||
-      head[2] != 2 || head[3] != 2)
+      head[2] != 2 || head[3] != 3)
     goto err; /* purecov: inspected */
-  textcount=head[4];
 
   error_message_charset_info= system_charset_info;
   length=uint4korr(head+6); count=uint2korr(head+10);
@@ -203,7 +202,7 @@ Error message file '%s' had only %d error messages, but it should contain at lea
   }
 
   if (!(*point= (const char**)
-	my_malloc((size_t) (max(length,count*2)+count*sizeof(char*)),MYF(0))))
+	my_malloc((size_t) (MY_MAX(length,count*2)+count*sizeof(char*)),MYF(0))))
   {
     funktpos=3;					/* purecov: inspected */
     goto err;					/* purecov: inspected */
@@ -212,18 +211,15 @@ Error message file '%s' had only %d error messages, but it should contain at lea
 
   if (mysql_file_read(file, buff, (size_t) count*2, MYF(MY_NABP)))
     goto err;
-  for (i=0, pos= buff ; i< count ; i++)
+  for (i=0, offset=0, pos= buff ; i< count ; i++)
   {
-    (*point)[i]= (char*) buff+uint2korr(pos);
+    (*point)[i]= (char*) buff+offset;
+    offset+= uint2korr(pos);
     pos+=2;
   }
   if (mysql_file_read(file, buff, length, MYF(MY_NABP)))
     goto err;
 
-  for (i=1 ; i < textcount ; i++)
-  {
-    point[i]= *point +uint2korr(head+10+i+i);
-  }
   (void) mysql_file_close(file, MYF(0));
 
   i= check_error_mesg(file_name, *point);
diff --git a/sql/field.cc b/sql/field.cc
index 1769e4e55cb..1ae5c95ad56 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -68,7 +68,7 @@ const char field_separator=',';
 #define LONGLONG_TO_STRING_CONVERSION_BUFFER_SIZE 128
 #define DECIMAL_TO_STRING_CONVERSION_BUFFER_SIZE 128
 #define BLOB_PACK_LENGTH_TO_MAX_LENGH(arg) \
-((ulong) ((LL(1) << min(arg, 4) * 8) - LL(1)))
+((ulong) ((LL(1) << MY_MIN(arg, 4) * 8) - LL(1)))
 
 #define ASSERT_COLUMN_MARKED_FOR_READ DBUG_ASSERT(!table || (!table->read_set || bitmap_is_set(table->read_set, field_index)))
 #define ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED DBUG_ASSERT(is_stat_field || !table || (!table->write_set || bitmap_is_set(table->write_set, field_index) || bitmap_is_set(table->vcol_set, field_index)))
@@ -1070,7 +1070,7 @@ static void push_numerical_conversion_warning(THD* thd, const char* str,
                                               const char* field_name="UNKNOWN",
                                               ulong row_num=0)
 {
-    char buf[max(max(DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE,
+    char buf[MY_MAX(MY_MAX(DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE,
       LONGLONG_TO_STRING_CONVERSION_BUFFER_SIZE), 
       DECIMAL_TO_STRING_CONVERSION_BUFFER_SIZE)];
 
@@ -2147,7 +2147,7 @@ int Field_decimal::store(const char *from_arg, uint len, CHARSET_INFO *cs)
     tmp_uint=tmp_dec+(uint)(int_digits_end-int_digits_from);
   else if (expo_sign_char == '-') 
   {
-    tmp_uint=min(exponent,(uint)(int_digits_end-int_digits_from));
+    tmp_uint=MY_MIN(exponent,(uint)(int_digits_end-int_digits_from));
     frac_digits_added_zeros=exponent-tmp_uint;
     int_digits_end -= tmp_uint;
     frac_digits_head_end=int_digits_end+tmp_uint;
@@ -2155,7 +2155,7 @@ int Field_decimal::store(const char *from_arg, uint len, CHARSET_INFO *cs)
   }
   else // (expo_sign_char=='+') 
   {
-    tmp_uint=min(exponent,(uint)(frac_digits_end-frac_digits_from));
+    tmp_uint=MY_MIN(exponent,(uint)(frac_digits_end-frac_digits_from));
     int_digits_added_zeros=exponent-tmp_uint;
     int_digits_tail_from=frac_digits_from;
     frac_digits_from=frac_digits_from+tmp_uint;
@@ -2574,7 +2574,7 @@ Field *Field_new_decimal::create_from_item (Item *item)
   {
     signed int overflow;
 
-    dec= min(dec, DECIMAL_MAX_SCALE);
+    dec= MY_MIN(dec, DECIMAL_MAX_SCALE);
 
     /*
       If the value still overflows the field with the corrected dec,
@@ -2590,7 +2590,7 @@ Field *Field_new_decimal::create_from_item (Item *item)
     overflow= required_length - len;
 
     if (overflow > 0)
-      dec= max(0, dec - overflow);            // too long, discard fract
+      dec= MY_MAX(0, dec - overflow);            // too long, discard fract
     else
       /* Corrected value fits. */
       len= required_length;
@@ -3139,7 +3139,7 @@ String *Field_tiny::val_str(String *val_buffer,
   ASSERT_COLUMN_MARKED_FOR_READ;
   CHARSET_INFO *cs= &my_charset_numeric;
   uint length;
-  uint mlength=max(field_length+1,5*cs->mbmaxlen);
+  uint mlength=MY_MAX(field_length+1,5*cs->mbmaxlen);
   val_buffer->alloc(mlength);
   char *to=(char*) val_buffer->ptr();
 
@@ -3321,7 +3321,7 @@ String *Field_short::val_str(String *val_buffer,
   ASSERT_COLUMN_MARKED_FOR_READ;
   CHARSET_INFO *cs= &my_charset_numeric;
   uint length;
-  uint mlength=max(field_length+1,7*cs->mbmaxlen);
+  uint mlength=MY_MAX(field_length+1,7*cs->mbmaxlen);
   val_buffer->alloc(mlength);
   char *to=(char*) val_buffer->ptr();
   short j;
@@ -3511,7 +3511,7 @@ String *Field_medium::val_str(String *val_buffer,
   ASSERT_COLUMN_MARKED_FOR_READ;
   CHARSET_INFO *cs= &my_charset_numeric;
   uint length;
-  uint mlength=max(field_length+1,10*cs->mbmaxlen);
+  uint mlength=MY_MAX(field_length+1,10*cs->mbmaxlen);
   val_buffer->alloc(mlength);
   char *to=(char*) val_buffer->ptr();
   long j= unsigned_flag ? (long) uint3korr(ptr) : sint3korr(ptr);
@@ -3700,7 +3700,7 @@ String *Field_long::val_str(String *val_buffer,
   ASSERT_COLUMN_MARKED_FOR_READ;
   CHARSET_INFO *cs= &my_charset_numeric;
   uint length;
-  uint mlength=max(field_length+1,12*cs->mbmaxlen);
+  uint mlength=MY_MAX(field_length+1,12*cs->mbmaxlen);
   val_buffer->alloc(mlength);
   char *to=(char*) val_buffer->ptr();
   int32 j;
@@ -3850,7 +3850,7 @@ String *Field_longlong::val_str(String *val_buffer,
 {
   CHARSET_INFO *cs= &my_charset_numeric;
   uint length;
-  uint mlength=max(field_length+1,22*cs->mbmaxlen);
+  uint mlength=MY_MAX(field_length+1,22*cs->mbmaxlen);
   val_buffer->alloc(mlength);
   char *to=(char*) val_buffer->ptr();
   longlong j;
@@ -6379,7 +6379,7 @@ void Field_string::sql_type(String &res) const
 
 uchar *Field_string::pack(uchar *to, const uchar *from, uint max_length)
 {
-  uint length=      min(field_length,max_length);
+  uint length=      MY_MIN(field_length,max_length);
   uint local_char_length= max_length/field_charset->mbmaxlen;
   DBUG_PRINT("debug", ("Packing field '%s' - length: %u ", field_name, length));
 
@@ -7126,7 +7126,7 @@ int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs)
     from= tmpstr.ptr();
   }
 
-  new_length= min(max_data_length(), field_charset->mbmaxlen * length);
+  new_length= MY_MIN(max_data_length(), field_charset->mbmaxlen * length);
   if (value.alloc(new_length))
     goto oom_error;
 
@@ -7286,7 +7286,7 @@ int Field_blob::cmp_binary(const uchar *a_ptr, const uchar *b_ptr,
   b_length=get_length(b_ptr);
   if (b_length > max_length)
     b_length=max_length;
-  diff=memcmp(a,b,min(a_length,b_length));
+  diff=memcmp(a,b,MY_MIN(a_length,b_length));
   return diff ? diff : (int) (a_length - b_length);
 }
 
@@ -7464,7 +7464,7 @@ uchar *Field_blob::pack(uchar *to, const uchar *from, uint max_length)
     length given is smaller than the actual length of the blob, we
     just store the initial bytes of the blob.
   */
-  store_length(to, packlength, min(length, max_length));
+  store_length(to, packlength, MY_MIN(length, max_length));
 
   /*
     Store the actual blob data, which will occupy 'length' bytes.
@@ -8342,7 +8342,7 @@ String *Field_bit::val_str(String *val_buffer,
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   char buff[sizeof(longlong)];
-  uint length= min(pack_length(), sizeof(longlong));
+  uint length= MY_MIN(pack_length(), sizeof(longlong));
   ulonglong bits= val_int();
   mi_int8store(buff,bits);
 
@@ -8430,7 +8430,7 @@ uint Field_bit::get_key_image(uchar *buff, uint length, imagetype type_arg)
     *buff++= bits;
     length--;
   }
-  uint data_length = min(length, bytes_in_rec);
+  uint data_length = MY_MIN(length, bytes_in_rec);
   memcpy(buff, ptr, data_length);
   return data_length + 1;
 }
@@ -8554,7 +8554,7 @@ Field_bit::pack(uchar *to, const uchar *from, uint max_length)
     uchar bits= get_rec_bits(bit_ptr + (from - ptr), bit_ofs, bit_len);
     *to++= bits;
   }
-  length= min(bytes_in_rec, max_length - (bit_len > 0));
+  length= MY_MIN(bytes_in_rec, max_length - (bit_len > 0));
   memcpy(to, from, length);
   return to + length;
 }
diff --git a/sql/field.h b/sql/field.h
index e832928b114..162812adfba 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -464,32 +464,53 @@ public:
   */
   virtual void sql_type(String &str) const =0;
   virtual uint size_of() const =0;		// For new field
-  inline bool is_null(my_ptrdiff_t row_offset= 0)
-  { return null_ptr ? (null_ptr[row_offset] & null_bit ? 1 : 0) : table->null_row; }
-  inline bool is_real_null(my_ptrdiff_t row_offset= 0)
+  inline bool is_null(my_ptrdiff_t row_offset= 0) const
+  {
+    /*
+      The table may have been marked as containing only NULL values
+      for all fields if it is a NULL-complemented row of an OUTER JOIN
+      or if the query is an implicitly grouped query (has aggregate
+      functions but no GROUP BY clause) with no qualifying rows. If
+      this is the case (in which TABLE::null_row is true), the field
+      is considered to be NULL.
+      Note that if a table->null_row is set then also all null_bits are
+      set for the row.
+
+      Otherwise, if the field is NULLable, it has a valid null_ptr
+      pointer, and its NULLity is recorded in the "null_bit" bit of
+      null_ptr[row_offset].
+    */
+    return (table->null_row ? TRUE :
+            null_ptr ? test(null_ptr[row_offset] & null_bit) : 0);
+  }
+  inline bool is_real_null(my_ptrdiff_t row_offset= 0) const
     { return null_ptr ? (null_ptr[row_offset] & null_bit ? 1 : 0) : 0; }
-  inline bool is_null_in_record(const uchar *record)
+  inline bool is_null_in_record(const uchar *record) const
   {
     if (!null_ptr)
       return 0;
     return test(record[(uint) (null_ptr -table->record[0])] &
 		null_bit);
   }
-  inline bool is_null_in_record_with_offset(my_ptrdiff_t col_offset)
-  {
-    if (!null_ptr)
-      return 0;
-    return test(null_ptr[col_offset] & null_bit);
-  }
   inline void set_null(my_ptrdiff_t row_offset= 0)
     { if (null_ptr) null_ptr[row_offset]|= null_bit; }
   inline void set_notnull(my_ptrdiff_t row_offset= 0)
     { if (null_ptr) null_ptr[row_offset]&= (uchar) ~null_bit; }
-  inline bool maybe_null(void) { return null_ptr != 0 || table->maybe_null; }
-  /**
-     Signals that this field is NULL-able.
-  */
-  inline bool real_maybe_null(void) { return null_ptr != 0; }
+  inline bool maybe_null(void) const
+  { return null_ptr != 0 || table->maybe_null; }
+
+  /* @return true if this field is NULL-able, false otherwise. */
+  inline bool real_maybe_null(void) const { return null_ptr != 0; }
+  uint null_offset(const uchar *record) const
+  { return (uint) (null_ptr - record); }
+
+  uint null_offset() const
+  { return null_offset(table->record[0]); }
+  void set_null_ptr(uchar *p_null_ptr, uint p_null_bit)
+  {
+    null_ptr= p_null_ptr;
+    null_bit= p_null_bit;
+  }
 
   inline THD *get_thd() { return table ? table->in_use : current_thd; }
 
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 49aaa0af574..9195255c363 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -245,12 +245,12 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
   {
     DBUG_PRINT("info", ("filesort PQ is not applicable"));
 
-    ulong min_sort_memory= max(MIN_SORT_MEMORY, param.sort_length*MERGEBUFF2);
+    ulong min_sort_memory= MY_MAX(MIN_SORT_MEMORY, param.sort_length*MERGEBUFF2);
     set_if_bigger(min_sort_memory, sizeof(BUFFPEK*)*MERGEBUFF2);
     while (memory_available >= min_sort_memory)
     {
       ulong keys= memory_available / (param.rec_length + sizeof(char*));
-      param.max_keys_per_buffer= (uint) min(num_rows, keys);
+      param.max_keys_per_buffer= (uint) MY_MIN(num_rows, keys);
       if (table_sort.get_sort_keys())
       {
         // If we have already allocated a buffer, it better have same size!
@@ -1368,7 +1368,7 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
   register uint count;
   uint length;
 
-  if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
+  if ((count=(uint) MY_MIN((ha_rows) buffpek->max_keys,buffpek->count)))
   {
     if (mysql_file_pread(fromfile->file, (uchar*) buffpek->base,
                          (length= rec_length*count),
@@ -1693,7 +1693,7 @@ int merge_buffers(Sort_param *param, IO_CACHE *from_file,
          != -1 && error != 0);
 
 end:
-  lastbuff->count= min(org_max_rows-max_rows, param->max_rows);
+  lastbuff->count= MY_MIN(org_max_rows-max_rows, param->max_rows);
   lastbuff->file_pos= to_start_filepos;
 err:
   delete_queue(&queue);
diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc
index 6fc30fa4fa0..3c0bdc724c1 100644
--- a/sql/ha_ndbcluster.cc
+++ b/sql/ha_ndbcluster.cc
@@ -930,7 +930,7 @@ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field,
 
       DBUG_PRINT("value", ("set blob ptr: 0x%lx  len: %u",
                            (long) blob_ptr, blob_len));
-      DBUG_DUMP("value", blob_ptr, min(blob_len, 26));
+      DBUG_DUMP("value", blob_ptr, MY_MIN(blob_len, 26));
 
       if (set_blob_value)
         *set_blob_value= TRUE;
diff --git a/sql/handler.cc b/sql/handler.cc
index 5297a8e8cfc..2e0ccc5e1e5 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -3076,13 +3076,25 @@ void handler::ha_release_auto_increment()
 }
 
 
-void handler::print_keydup_error(uint key_nr, const char *msg, myf errflag)
+/**
+  Construct and emit duplicate key error message using information
+  from table's record buffer.
+
+  @param table    TABLE object which record buffer should be used as
+                  source for column values.
+  @param key      Key description.
+  @param msg      Error message template to which key value should be
+                  added.
+  @param errflag  Flags for my_error() call.
+*/
+
+void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
 {
   /* Write the duplicated key in the error message */
-  char key[MAX_KEY_LENGTH];
-  String str(key,sizeof(key),system_charset_info);
+  char key_buff[MAX_KEY_LENGTH];
+  String str(key_buff,sizeof(key_buff),system_charset_info);
 
-  if (key_nr == MAX_KEY)
+  if (key == NULL)
   {
     /* Key is unknown */
     str.copy("", 0, system_charset_info);
@@ -3091,18 +3103,29 @@ void handler::print_keydup_error(uint key_nr, const char *msg, myf errflag)
   else
   {
     /* Table is opened and defined at this point */
-    key_unpack(&str,table,(uint) key_nr);
+    key_unpack(&str,table, key);
     uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg);
     if (str.length() >= max_length)
     {
       str.length(max_length-4);
       str.append(STRING_WITH_LEN("..."));
     }
-    my_printf_error(ER_DUP_ENTRY, msg,
-		    errflag, str.c_ptr_safe(), table->key_info[key_nr].name);
+    my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(), key->name);
   }
 }
 
+/**
+  Construct and emit duplicate key error message using information
+  from table's record buffer.
+
+  @sa print_keydup_error(table, key, msg, errflag).
+*/
+
+void print_keydup_error(TABLE *table, KEY *key, myf errflag)
+{
+  print_keydup_error(table, key, ER(ER_DUP_ENTRY_WITH_KEY_NAME), errflag);
+}
+
 
 /**
   Print error that we got from handler function.
diff --git a/sql/handler.h b/sql/handler.h
index 8ee1044f10c..e74a2c6c42c 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -40,6 +40,8 @@
 #error MAX_KEY is too large.  Values up to 128 are supported.
 #endif
 
+class Alter_info;
+
 // the following is for checking tables
 
 #define HA_ADMIN_ALREADY_DONE	  1
@@ -57,6 +59,22 @@
 #define HA_ADMIN_NEEDS_ALTER    -11
 #define HA_ADMIN_NEEDS_CHECK    -12
 
+/**
+   Return values for check_if_supported_inplace_alter().
+
+   @see check_if_supported_inplace_alter() for description of
+   the individual values.
+*/
+enum enum_alter_inplace_result {
+  HA_ALTER_ERROR,
+  HA_ALTER_INPLACE_NOT_SUPPORTED,
+  HA_ALTER_INPLACE_EXCLUSIVE_LOCK,
+  HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE,
+  HA_ALTER_INPLACE_SHARED_LOCK,
+  HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE,
+  HA_ALTER_INPLACE_NO_LOCK
+};
+
 /* Bits in table_flags() to show what database can do */
 
 #define HA_NO_TRANSACTIONS     (1 << 0) /* Doesn't support transactions */
@@ -84,7 +102,7 @@
 */
 #define HA_REQUIRES_KEY_COLUMNS_FOR_DELETE (1 << 6)
 #define HA_NULL_IN_KEY         (1 << 7) /* One can have keys with NULL */
-#define HA_DUPLICATE_POS       (1 << 8)    /* ha_position() gives dup row */
+#define HA_DUPLICATE_POS       (1 << 8)    /* position() gives dup row */
 #define HA_NO_BLOBS            (1 << 9) /* Doesn't support blobs */
 #define HA_CAN_INDEX_BLOBS     (1 << 10)
 #define HA_AUTO_PART_KEY       (1 << 11) /* auto-increment in multi-part key */
@@ -97,8 +115,8 @@
 #define HA_CAN_INSERT_DELAYED  (1 << 14)
 /*
   If we get the primary key columns for free when we do an index read
-  It also implies that we have to retrive the primary key when using
-  position() and rnd_pos().
+  (usually, it also implies that HA_PRIMARY_KEY_REQUIRED_FOR_POSITION
+  flag is set).
 */
 #define HA_PRIMARY_KEY_IN_READ_INDEX (1 << 15)
 /*
@@ -430,6 +448,45 @@ enum enum_binlog_command {
 /* The following two are used by Maria engine: */
 #define HA_CREATE_USED_TRANSACTIONAL    (1L << 20)
 #define HA_CREATE_USED_PAGE_CHECKSUM    (1L << 21)
+/** This is set whenever STATS_PERSISTENT=0|1|default has been
+specified in CREATE/ALTER TABLE. See also HA_OPTION_STATS_PERSISTENT in
+include/my_base.h. It is possible to distinguish whether
+STATS_PERSISTENT=default has been specified or no STATS_PERSISTENT= is
+given at all. */
+#define HA_CREATE_USED_STATS_PERSISTENT (1L << 22)
+/**
+   This is set whenever STATS_AUTO_RECALC=0|1|default has been
+   specified in CREATE/ALTER TABLE. See enum_stats_auto_recalc.
+   It is possible to distinguish whether STATS_AUTO_RECALC=default
+   has been specified or no STATS_AUTO_RECALC= is given at all.
+*/
+#define HA_CREATE_USED_STATS_AUTO_RECALC (1L << 23)
+/**
+   This is set whenever STATS_SAMPLE_PAGES=N|default has been
+   specified in CREATE/ALTER TABLE. It is possible to distinguish whether
+   STATS_SAMPLE_PAGES=default has been specified or no STATS_SAMPLE_PAGES= is
+   given at all.
+*/
+#define HA_CREATE_USED_STATS_SAMPLE_PAGES (1L << 24)
+
+
+/*
+  This is master database for most of system tables. However there
+  can be other databases which can hold system tables. Respective
+  storage engines define their own system database names.
+*/
+extern const char *mysqld_system_database;
+
+/*
+  Structure to hold list of system_database.system_table.
+  This is used at both mysqld and storage engine layer.
+*/
+struct st_system_tablename
+{
+  const char *db;
+  const char *tablename;
+};
+
 
 typedef ulonglong my_xid; // this line is the same as in log_event.h
 #define MYSQL_XID_PREFIX "MySQLXid"
@@ -1105,15 +1162,74 @@ struct handlerton
                                  const char *name);
 
    uint32 license; /* Flag for Engine License */
+   void *data; /* Location for engines to keep personal structures */
+
    /*
      Optional clauses in the CREATE/ALTER TABLE
    */
    ha_create_table_option *table_options; // table level options
    ha_create_table_option *field_options; // these are specified per field
    ha_create_table_option *index_options; // these are specified per index
-
 };
 
+/**
+  The handler supports read before write removal optimization
+
+  Read before write removal may be used for storage engines which support
+  write without previous read of the row to be updated. Handler returning
+  this flag must implement start_read_removal() and end_read_removal().
+  The handler may return "fake" rows constructed from the key of the row
+  asked for. This is used to optimize UPDATE and DELETE by reducing the
+  numer of roundtrips between handler and storage engine.
+  
+  Example:
+  UPDATE a=1 WHERE pk IN (<keys>)
+
+  mysql_update()
+  {
+    if (<conditions for starting read removal>)
+      start_read_removal()
+      -> handler returns true if read removal supported for this table/query
+
+    while(read_record("pk=<key>"))
+      -> handler returns fake row with column "pk" set to <key>
+
+      ha_update_row()
+      -> handler sends write "a=1" for row with "pk=<key>"
+
+    end_read_removal()
+    -> handler returns the number of rows actually written
+  }
+
+  @note This optimization in combination with batching may be used to
+        remove even more roundtrips.
+*/
+#define HA_READ_BEFORE_WRITE_REMOVAL  (LL(1) << 38)
+
+/*
+  Engine supports extended fulltext API
+ */
+#define HA_CAN_FULLTEXT_EXT              (LL(1) << 39)
+
+/*
+  Storage engine doesn't synchronize result set with expected table contents.
+  Used by replication slave to check if it is possible to retrieve rows from
+  the table when deciding whether to do a full table scan, index scan or hash
+  scan while applying a row event.
+ */
+#define HA_READ_OUT_OF_SYNC              (LL(1) << 40)
+
+/*
+  Storage engine supports table export using the
+  FLUSH TABLE <table_list> FOR EXPORT statement.
+ */
+#define HA_CAN_EXPORT                 (LL(1) << 41)
+
+/*
+  The handler don't want accesses to this table to 
+  be const-table optimized
+*/
+#define HA_BLOCK_CONST_TABLE          (LL(1) << 42)
 
 inline LEX_STRING *hton_name(const handlerton *hton)
 {
@@ -1315,6 +1431,10 @@ struct st_partition_iter;
 
 enum ha_choice { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES };
 
+enum enum_stats_auto_recalc { HA_STATS_AUTO_RECALC_DEFAULT= 0,
+                              HA_STATS_AUTO_RECALC_ON,
+                              HA_STATS_AUTO_RECALC_OFF };
+
 typedef struct st_ha_create_information
 {
   CHARSET_INFO *table_charset, *default_table_charset;
@@ -1329,6 +1449,9 @@ typedef struct st_ha_create_information
   ulong avg_row_length;
   ulong used_fields;
   ulong key_block_size;
+  uint stats_sample_pages;		/* number of pages to sample during
+					stats estimation, if used, otherwise 0. */
+  enum_stats_auto_recalc stats_auto_recalc;
   SQL_I_List<TABLE_LIST> merge_list;
   handlerton *db_type;
   /**
@@ -1358,12 +1481,306 @@ typedef struct st_ha_create_information
 } HA_CREATE_INFO;
 
 
+/**
+  In-place alter handler context.
+
+  This is a superclass intended to be subclassed by individual handlers
+  in order to store handler unique context between in-place alter API calls.
+
+  The handler is responsible for creating the object. This can be done
+  as early as during check_if_supported_inplace_alter().
+
+  The SQL layer is responsible for destroying the object.
+  The class extends Sql_alloc so the memory will be mem root allocated.
+
+  @see Alter_inplace_info
+*/
+
+class inplace_alter_handler_ctx : public Sql_alloc
+{
+public:
+  inplace_alter_handler_ctx() {}
+
+  virtual ~inplace_alter_handler_ctx() {}
+};
+
+
+/**
+  Class describing changes to be done by ALTER TABLE.
+  Instance of this class is passed to storage engine in order
+  to determine if this ALTER TABLE can be done using in-place
+  algorithm. It is also used for executing the ALTER TABLE
+  using in-place algorithm.
+*/
+
+class Alter_inplace_info
+{
+public:
+  /**
+     Bits to show in detail what operations the storage engine is
+     to execute.
+
+     All these operations are supported as in-place operations by the
+     SQL layer. This means that operations that by their nature must
+     be performed by copying the table to a temporary table, will not
+     have their own flags here (e.g. ALTER TABLE FORCE, ALTER TABLE
+     ENGINE).
+
+     We generally try to specify handler flags only if there are real
+     changes. But in cases when it is cumbersome to determine if some
+     attribute has really changed we might choose to set flag
+     pessimistically, for example, relying on parser output only.
+  */
+  typedef ulong HA_ALTER_FLAGS;
+
+  // Add non-unique, non-primary index
+  static const HA_ALTER_FLAGS ADD_INDEX                  = 1L << 0;
+
+  // Drop non-unique, non-primary index
+  static const HA_ALTER_FLAGS DROP_INDEX                 = 1L << 1;
+
+  // Add unique, non-primary index
+  static const HA_ALTER_FLAGS ADD_UNIQUE_INDEX           = 1L << 2;
+
+  // Drop unique, non-primary index
+  static const HA_ALTER_FLAGS DROP_UNIQUE_INDEX          = 1L << 3;
+
+  // Add primary index
+  static const HA_ALTER_FLAGS ADD_PK_INDEX               = 1L << 4;
+
+  // Drop primary index
+  static const HA_ALTER_FLAGS DROP_PK_INDEX              = 1L << 5;
+
+  // Add column
+  static const HA_ALTER_FLAGS ADD_COLUMN                 = 1L << 6;
+
+  // Drop column
+  static const HA_ALTER_FLAGS DROP_COLUMN                = 1L << 7;
+
+  // Rename column
+  static const HA_ALTER_FLAGS ALTER_COLUMN_NAME          = 1L << 8;
+
+  // Change column datatype
+  static const HA_ALTER_FLAGS ALTER_COLUMN_TYPE          = 1L << 9;
+
+  /**
+    Change column datatype in such way that new type has compatible
+    packed representation with old type, so it is theoretically
+    possible to perform change by only updating data dictionary
+    without changing table rows.
+  */
+  static const HA_ALTER_FLAGS ALTER_COLUMN_EQUAL_PACK_LENGTH = 1L << 10;
+
+  // Reorder column
+  static const HA_ALTER_FLAGS ALTER_COLUMN_ORDER         = 1L << 11;
+
+  // Change column from NOT NULL to NULL
+  static const HA_ALTER_FLAGS ALTER_COLUMN_NULLABLE      = 1L << 12;
+
+  // Change column from NULL to NOT NULL
+  static const HA_ALTER_FLAGS ALTER_COLUMN_NOT_NULLABLE  = 1L << 13;
+
+  // Set or remove default column value
+  static const HA_ALTER_FLAGS ALTER_COLUMN_DEFAULT       = 1L << 14;
+
+  // Add foreign key
+  static const HA_ALTER_FLAGS ADD_FOREIGN_KEY            = 1L << 15;
+
+  // Drop foreign key
+  static const HA_ALTER_FLAGS DROP_FOREIGN_KEY           = 1L << 16;
+
+  // table_options changed, see HA_CREATE_INFO::used_fields for details.
+  static const HA_ALTER_FLAGS CHANGE_CREATE_OPTION       = 1L << 17;
+
+  // Table is renamed
+  static const HA_ALTER_FLAGS ALTER_RENAME               = 1L << 18;
+
+  // Change the storage type of column 
+  static const HA_ALTER_FLAGS ALTER_COLUMN_STORAGE_TYPE = 1L << 19;
+
+  // Change the column format of column
+  static const HA_ALTER_FLAGS ALTER_COLUMN_COLUMN_FORMAT = 1L << 20;
+
+  // Add partition
+  static const HA_ALTER_FLAGS ADD_PARTITION              = 1L << 21;
+
+  // Drop partition
+  static const HA_ALTER_FLAGS DROP_PARTITION             = 1L << 22;
+
+  // Changing partition options
+  static const HA_ALTER_FLAGS ALTER_PARTITION            = 1L << 23;
+
+  // Coalesce partition
+  static const HA_ALTER_FLAGS COALESCE_PARTITION         = 1L << 24;
+
+  // Reorganize partition ... into
+  static const HA_ALTER_FLAGS REORGANIZE_PARTITION       = 1L << 25;
+
+  // Reorganize partition
+  static const HA_ALTER_FLAGS ALTER_TABLE_REORG          = 1L << 26;
+
+  // Remove partitioning
+  static const HA_ALTER_FLAGS ALTER_REMOVE_PARTITIONING  = 1L << 27;
+
+  // Partition operation with ALL keyword
+  static const HA_ALTER_FLAGS ALTER_ALL_PARTITION        = 1L << 28;
+
+  /**
+    Create options (like MAX_ROWS) for the new version of table.
+
+    @note The referenced instance of HA_CREATE_INFO object was already
+          used to create new .FRM file for table being altered. So it
+          has been processed by mysql_prepare_create_table() already.
+          For example, this means that it has HA_OPTION_PACK_RECORD
+          flag in HA_CREATE_INFO::table_options member correctly set.
+  */
+  HA_CREATE_INFO *create_info;
+
+  /**
+    Alter options, fields and keys for the new version of table.
+
+    @note The referenced instance of Alter_info object was already
+          used to create new .FRM file for table being altered. So it
+          has been processed by mysql_prepare_create_table() already.
+          In particular, this means that in Create_field objects for
+          fields which were present in some form in the old version
+          of table, Create_field::field member points to corresponding
+          Field instance for old version of table.
+  */
+  Alter_info *alter_info;
+
+  /**
+    Array of KEYs for new version of table - including KEYs to be added.
+
+    @note Currently this array is produced as result of
+          mysql_prepare_create_table() call.
+          This means that it follows different convention for
+          KEY_PART_INFO::fieldnr values than objects in TABLE::key_info
+          array.
+
+    @todo This is mainly due to the fact that we need to keep compatibility
+          with removed handler::add_index() call. We plan to switch to
+          TABLE::key_info numbering later.
+
+    KEYs are sorted - see sort_keys().
+  */
+  KEY  *key_info_buffer;
+
+  /** Size of key_info_buffer array. */
+  uint key_count;
+
+  /** Size of index_drop_buffer array. */
+  uint index_drop_count;
+
+  /**
+     Array of pointers to KEYs to be dropped belonging to the TABLE instance
+     for the old version of the table.
+  */
+  KEY  **index_drop_buffer;
+
+  /** Size of index_add_buffer array. */
+  uint index_add_count;
+
+  /**
+     Array of indexes into key_info_buffer for KEYs to be added,
+     sorted in increasing order.
+  */
+  uint *index_add_buffer;
+
+  /**
+     Context information to allow handlers to keep context between in-place
+     alter API calls.
+
+     @see inplace_alter_handler_ctx for information about object lifecycle.
+  */
+  inplace_alter_handler_ctx *handler_ctx;
+
+  /**
+     Flags describing in detail which operations the storage engine is to execute.
+  */
+  HA_ALTER_FLAGS handler_flags;
+
+  /**
+     Partition_info taking into account the partition changes to be performed.
+     Contains all partitions which are present in the old version of the table
+     with partitions to be dropped or changed marked as such + all partitions
+     to be added in the new version of table marked as such.
+  */
+  partition_info *modified_part_info;
+
+  /** true for ALTER IGNORE TABLE ... */
+  const bool ignore;
+
+  /** true for online operation (LOCK=NONE) */
+  bool online;
+
+  /**
+     Can be set by handler to describe why a given operation cannot be done
+     in-place (HA_ALTER_INPLACE_NOT_SUPPORTED) or why it cannot be done
+     online (HA_ALTER_INPLACE_NO_LOCK or
+     HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE)
+     If set, it will be used with ER_ALTER_OPERATION_NOT_SUPPORTED_REASON if
+     results from handler::check_if_supported_inplace_alter() doesn't match
+     requirements set by user. If not set, the more generic
+     ER_ALTER_OPERATION_NOT_SUPPORTED will be used.
+
+     Please set to a properly localized string, for example using
+     my_get_err_msg(), so that the error message as a whole is localized.
+  */
+  const char *unsupported_reason;
+
+  Alter_inplace_info(HA_CREATE_INFO *create_info_arg,
+                     Alter_info *alter_info_arg,
+                     KEY *key_info_arg, uint key_count_arg,
+                     partition_info *modified_part_info_arg,
+                     bool ignore_arg)
+    : create_info(create_info_arg),
+    alter_info(alter_info_arg),
+    key_info_buffer(key_info_arg),
+    key_count(key_count_arg),
+    index_drop_count(0),
+    index_drop_buffer(NULL),
+    index_add_count(0),
+    index_add_buffer(NULL),
+    handler_ctx(NULL),
+    handler_flags(0),
+    modified_part_info(modified_part_info_arg),
+    ignore(ignore_arg),
+    online(false),
+    unsupported_reason(NULL)
+  {}
+
+  ~Alter_inplace_info()
+  {
+    delete handler_ctx;
+  }
+
+  /**
+    Used after check_if_supported_inplace_alter() to report
+    error if the result does not match the LOCK/ALGORITHM
+    requirements set by the user.
+
+    @param not_supported  Part of statement that was not supported.
+    @param try_instead    Suggestion as to what the user should
+                          replace not_supported with.
+  */
+  void report_unsupported_error(const char *not_supported,
+                                const char *try_instead);
+};
+
+
 typedef struct st_key_create_information
 {
   enum ha_key_alg algorithm;
   ulong block_size;
   LEX_STRING parser_name;
   LEX_STRING comment;
+  /**
+    A flag to determine if we will check for duplicate indexes.
+    This typically means that the key information was specified
+    directly by the user (set by the parser).
+  */
+  bool check_for_duplicate_indexes;
 } KEY_CREATE_INFO;
 
 
@@ -2060,7 +2477,6 @@ public:
 
   void adjust_next_insert_id_after_explicit_value(ulonglong nr);
   int update_auto_increment();
-  void print_keydup_error(uint key_nr, const char *msg, myf errflag);
   virtual void print_error(int error, myf errflag);
   virtual bool get_error_message(int error, String *buf);
   uint get_dup_key(int error);
@@ -2557,15 +2973,15 @@ public:
   { return (HA_ERR_WRONG_COMMAND); }
 
   uint max_record_length() const
-  { return min(HA_MAX_REC_LENGTH, max_supported_record_length()); }
+  { return MY_MIN(HA_MAX_REC_LENGTH, max_supported_record_length()); }
   uint max_keys() const
-  { return min(MAX_KEY, max_supported_keys()); }
+  { return MY_MIN(MAX_KEY, max_supported_keys()); }
   uint max_key_parts() const
-  { return min(MAX_REF_PARTS, max_supported_key_parts()); }
+  { return MY_MIN(MAX_REF_PARTS, max_supported_key_parts()); }
   uint max_key_length() const
-  { return min(MAX_KEY_LENGTH, max_supported_key_length()); }
+  { return MY_MIN(MAX_KEY_LENGTH, max_supported_key_length()); }
   uint max_key_part_length() const
-  { return min(MAX_KEY_LENGTH, max_supported_key_part_length()); }
+  { return MY_MIN(MAX_KEY_LENGTH, max_supported_key_part_length()); }
 
   virtual uint max_supported_record_length() const { return HA_MAX_REC_LENGTH; }
   virtual uint max_supported_keys() const { return 0; }
@@ -3192,4 +3608,7 @@ inline const char *table_case_name(HA_CREATE_INFO *info, const char *name)
 {
   return ((lower_case_table_names == 2 && info->alias) ? info->alias : name);
 }
+
+void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag);
+void print_keydup_error(TABLE *table, KEY *key, myf errflag);
 #endif
diff --git a/sql/item.cc b/sql/item.cc
index 665521c641e..80b3269dc63 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -105,7 +105,7 @@ void
 Hybrid_type_traits_decimal::fix_length_and_dec(Item *item, Item *arg) const
 {
   item->decimals= arg->decimals;
-  item->max_length= min(arg->max_length + DECIMAL_LONGLONG_DIGITS,
+  item->max_length= MY_MIN(arg->max_length + DECIMAL_LONGLONG_DIGITS,
                         DECIMAL_MAX_STR_LENGTH);
 }
 
@@ -531,9 +531,9 @@ uint Item::decimal_precision() const
     uint prec= 
       my_decimal_length_to_precision(max_char_length(), decimals,
                                      unsigned_flag);
-    return min(prec, DECIMAL_MAX_PRECISION);
+    return MY_MIN(prec, DECIMAL_MAX_PRECISION);
   }
-  return min(max_char_length(), DECIMAL_MAX_PRECISION);
+  return MY_MIN(max_char_length(), DECIMAL_MAX_PRECISION);
 }
 
 
@@ -977,7 +977,7 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
     {
       char buff[SAFE_NAME_LEN];
       strmake(buff, str_start,
-              min(sizeof(buff)-1, length + (int) (str-str_start)));
+              MY_MIN(sizeof(buff)-1, length + (int) (str-str_start)));
 
       if (length == 0)
         push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
@@ -998,7 +998,7 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
     name_length= res_length;
   }
   else
-    name= sql_strmake(str, (name_length= min(length,MAX_ALIAS_NAME)));
+    name= sql_strmake(str, (name_length= MY_MIN(length,MAX_ALIAS_NAME)));
 }
 
 
@@ -6157,7 +6157,7 @@ longlong Item_hex_string::val_int()
   // following assert is redundant, because fixed=1 assigned in constructor
   DBUG_ASSERT(fixed == 1);
   char *end=(char*) str_value.ptr()+str_value.length(),
-       *ptr=end-min(str_value.length(),sizeof(longlong));
+       *ptr=end-MY_MIN(str_value.length(),sizeof(longlong));
 
   ulonglong value=0;
   for (; ptr != end ; ptr++)
@@ -6212,7 +6212,7 @@ warn:
 void Item_hex_string::print(String *str, enum_query_type query_type)
 {
   char *end= (char*) str_value.ptr() + str_value.length(),
-       *ptr= end - min(str_value.length(), sizeof(longlong));
+       *ptr= end - MY_MIN(str_value.length(), sizeof(longlong));
   str->append("0x");
   for (; ptr != end ; ptr++)
   {
@@ -9295,14 +9295,14 @@ bool Item_type_holder::join_types(THD *thd, Item *item)
     /* fix variable decimals which always is NOT_FIXED_DEC */
     if (Field::result_merge_type(fld_type) == INT_RESULT)
       item_decimals= 0;
-    decimals= max(decimals, item_decimals);
+    decimals= MY_MAX(decimals, item_decimals);
   }
   if (Field::result_merge_type(fld_type) == DECIMAL_RESULT)
   {
-    decimals= min(max(decimals, item->decimals), DECIMAL_MAX_SCALE);
+    decimals= MY_MIN(MY_MAX(decimals, item->decimals), DECIMAL_MAX_SCALE);
     int item_int_part= item->decimal_int_part();
-    int item_prec = max(prev_decimal_int_part, item_int_part) + decimals;
-    int precision= min(item_prec, DECIMAL_MAX_PRECISION);
+    int item_prec = MY_MAX(prev_decimal_int_part, item_int_part) + decimals;
+    int precision= MY_MIN(item_prec, DECIMAL_MAX_PRECISION);
     unsigned_flag&= item->unsigned_flag;
     max_length= my_decimal_precision_to_length_no_truncation(precision,
                                                              decimals,
@@ -9333,7 +9333,7 @@ bool Item_type_holder::join_types(THD *thd, Item *item)
      */
     if (collation.collation != &my_charset_bin)
     {
-      max_length= max(old_max_chars * collation.collation->mbmaxlen,
+      max_length= MY_MAX(old_max_chars * collation.collation->mbmaxlen,
                       display_length(item) /
                       item->collation.collation->mbmaxlen *
                       collation.collation->mbmaxlen);
@@ -9355,7 +9355,7 @@ bool Item_type_holder::join_types(THD *thd, Item *item)
       {
         int delta1= max_length_orig - decimals_orig;
         int delta2= item->max_length - item->decimals;
-        max_length= max(delta1, delta2) + decimals;
+        max_length= MY_MAX(delta1, delta2) + decimals;
         if (fld_type == MYSQL_TYPE_FLOAT && max_length > FLT_DIG + 2)
         {
           max_length= MAX_FLOAT_STR_LENGTH;
@@ -9373,7 +9373,7 @@ bool Item_type_holder::join_types(THD *thd, Item *item)
     break;
   }
   default:
-    max_length= max(max_length, display_length(item));
+    max_length= MY_MAX(max_length, display_length(item));
   };
   maybe_null|= item->maybe_null;
   get_full_info(item);
diff --git a/sql/item_buff.cc b/sql/item_buff.cc
index ce396736d6f..a08ae8d8403 100644
--- a/sql/item_buff.cc
+++ b/sql/item_buff.cc
@@ -71,7 +71,7 @@ Cached_item::~Cached_item() {}
 
 Cached_item_str::Cached_item_str(THD *thd, Item *arg)
   :item(arg),
-   value_max_length(min(arg->max_length, thd->variables.max_sort_length)),
+   value_max_length(MY_MIN(arg->max_length, thd->variables.max_sort_length)),
    value(value_max_length)
 {}
 
@@ -81,7 +81,7 @@ bool Cached_item_str::cmp(void)
   bool tmp;
 
   if ((res=item->val_str(&tmp_value)))
-    res->length(min(res->length(), value_max_length));
+    res->length(MY_MIN(res->length(), value_max_length));
   if (null_value != item->null_value)
   {
     if ((null_value= item->null_value))
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 3b09da68927..d49af9bc2a0 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -679,7 +679,7 @@ int Arg_comparator::set_compare_func(Item_result_field *item, Item_result type)
   {
     if ((*a)->decimals < NOT_FIXED_DEC && (*b)->decimals < NOT_FIXED_DEC)
     {
-      precision= 5 / log_10[max((*a)->decimals, (*b)->decimals) + 1];
+      precision= 5 / log_10[MY_MAX((*a)->decimals, (*b)->decimals) + 1];
       if (func == &Arg_comparator::compare_real)
         func= &Arg_comparator::compare_real_fixed;
       else if (func == &Arg_comparator::compare_e_real)
@@ -1019,7 +1019,7 @@ int Arg_comparator::compare_binary_string()
         owner->null_value= 0;
       uint res1_length= res1->length();
       uint res2_length= res2->length();
-      int cmp= memcmp(res1->ptr(), res2->ptr(), min(res1_length,res2_length));
+      int cmp= memcmp(res1->ptr(), res2->ptr(), MY_MIN(res1_length,res2_length));
       return cmp ? cmp : (int) (res1_length - res2_length);
     }
   }
@@ -2377,7 +2377,7 @@ Item_func_ifnull::fix_length_and_dec()
   uint32 char_length;
   agg_result_type(&hybrid_type, args, 2);
   maybe_null=args[1]->maybe_null;
-  decimals= max(args[0]->decimals, args[1]->decimals);
+  decimals= MY_MAX(args[0]->decimals, args[1]->decimals);
   unsigned_flag= args[0]->unsigned_flag && args[1]->unsigned_flag;
 
   if (hybrid_type == DECIMAL_RESULT || hybrid_type == INT_RESULT) 
@@ -2388,10 +2388,10 @@ Item_func_ifnull::fix_length_and_dec()
     int len1= args[1]->max_char_length() - args[1]->decimals
       - (args[1]->unsigned_flag ? 0 : 1);
 
-    char_length= max(len0, len1) + decimals + (unsigned_flag ? 0 : 1);
+    char_length= MY_MAX(len0, len1) + decimals + (unsigned_flag ? 0 : 1);
   }
   else
-    char_length= max(args[0]->max_char_length(), args[1]->max_char_length());
+    char_length= MY_MAX(args[0]->max_char_length(), args[1]->max_char_length());
 
   switch (hybrid_type) {
   case STRING_RESULT:
@@ -2418,9 +2418,9 @@ uint Item_func_ifnull::decimal_precision() const
 {
   int arg0_int_part= args[0]->decimal_int_part();
   int arg1_int_part= args[1]->decimal_int_part();
-  int max_int_part= max(arg0_int_part, arg1_int_part);
+  int max_int_part= MY_MAX(arg0_int_part, arg1_int_part);
   int precision= max_int_part + decimals;
-  return min(precision, DECIMAL_MAX_PRECISION);
+  return MY_MIN(precision, DECIMAL_MAX_PRECISION);
 }
 
 
@@ -2597,7 +2597,7 @@ Item_func_if::fix_length_and_dec()
 
   agg_result_type(&cached_result_type, args + 1, 2);
   maybe_null= args[1]->maybe_null || args[2]->maybe_null;
-  decimals= max(args[1]->decimals, args[2]->decimals);
+  decimals= MY_MAX(args[1]->decimals, args[2]->decimals);
   unsigned_flag=args[1]->unsigned_flag && args[2]->unsigned_flag;
 
   if (cached_result_type == STRING_RESULT)
@@ -2621,10 +2621,10 @@ Item_func_if::fix_length_and_dec()
     int len2= args[2]->max_length - args[2]->decimals
       - (args[2]->unsigned_flag ? 0 : 1);
 
-    char_length= max(len1, len2) + decimals + (unsigned_flag ? 0 : 1);
+    char_length= MY_MAX(len1, len2) + decimals + (unsigned_flag ? 0 : 1);
   }
   else
-    char_length= max(args[1]->max_char_length(), args[2]->max_char_length());
+    char_length= MY_MAX(args[1]->max_char_length(), args[2]->max_char_length());
   fix_char_length(char_length);
 }
 
@@ -2633,8 +2633,8 @@ uint Item_func_if::decimal_precision() const
 {
   int arg1_prec= args[1]->decimal_int_part();
   int arg2_prec= args[2]->decimal_int_part();
-  int precision=max(arg1_prec,arg2_prec) + decimals;
-  return min(precision, DECIMAL_MAX_PRECISION);
+  int precision=MY_MAX(arg1_prec,arg2_prec) + decimals;
+  return MY_MIN(precision, DECIMAL_MAX_PRECISION);
 }
 
 
@@ -2935,7 +2935,7 @@ bool Item_func_case::fix_fields(THD *thd, Item **ref)
 
 void Item_func_case::agg_str_lengths(Item* arg)
 {
-  fix_char_length(max(max_char_length(), arg->max_char_length()));
+  fix_char_length(MY_MAX(max_char_length(), arg->max_char_length()));
   set_if_bigger(decimals, arg->decimals);
   unsigned_flag= unsigned_flag && arg->unsigned_flag;
 }
@@ -3135,7 +3135,7 @@ uint Item_func_case::decimal_precision() const
 
   if (else_expr_num != -1) 
     set_if_bigger(max_int_part, args[else_expr_num]->decimal_int_part());
-  return min(max_int_part + decimals, DECIMAL_MAX_PRECISION);
+  return MY_MIN(max_int_part + decimals, DECIMAL_MAX_PRECISION);
 }
 
 
@@ -5095,7 +5095,7 @@ void Item_func_like::turboBM_compute_suffixes(int *suff)
       else
       {
 	if (i < g)
-	  g = i; // g = min(i, g)
+	  g = i; // g = MY_MIN(i, g)
 	f = i;
 	while (g >= 0 && pattern[g] == pattern[g + plm1 - f])
 	  g--;
@@ -5114,7 +5114,7 @@ void Item_func_like::turboBM_compute_suffixes(int *suff)
       else
       {
 	if (i < g)
-	  g = i; // g = min(i, g)
+	  g = i; // g = MY_MIN(i, g)
 	f = i;
 	while (g >= 0 &&
 	       likeconv(cs, pattern[g]) == likeconv(cs, pattern[g + plm1 - f]))
@@ -5235,14 +5235,14 @@ bool Item_func_like::turboBM_matches(const char* text, int text_len) const
       register const int v = plm1 - i;
       turboShift = u - v;
       bcShift    = bmBc[(uint) (uchar) text[i + j]] - plm1 + i;
-      shift      = max(turboShift, bcShift);
-      shift      = max(shift, bmGs[i]);
+      shift      = MY_MAX(turboShift, bcShift);
+      shift      = MY_MAX(shift, bmGs[i]);
       if (shift == bmGs[i])
-	u = min(pattern_len - shift, v);
+	u = MY_MIN(pattern_len - shift, v);
       else
       {
 	if (turboShift < bcShift)
-	  shift = max(shift, u + 1);
+	  shift = MY_MAX(shift, u + 1);
 	u = 0;
       }
       j+= shift;
@@ -5266,14 +5266,14 @@ bool Item_func_like::turboBM_matches(const char* text, int text_len) const
       register const int v = plm1 - i;
       turboShift = u - v;
       bcShift    = bmBc[(uint) likeconv(cs, text[i + j])] - plm1 + i;
-      shift      = max(turboShift, bcShift);
-      shift      = max(shift, bmGs[i]);
+      shift      = MY_MAX(turboShift, bcShift);
+      shift      = MY_MAX(shift, bmGs[i]);
       if (shift == bmGs[i])
-	u = min(pattern_len - shift, v);
+	u = MY_MIN(pattern_len - shift, v);
       else
       {
 	if (turboShift < bcShift)
-	  shift = max(shift, u + 1);
+	  shift = MY_MAX(shift, u + 1);
 	u = 0;
       }
       j+= shift;
diff --git a/sql/item_create.cc b/sql/item_create.cc
index fc31b074055..1475a44f32a 100644
--- a/sql/item_create.cc
+++ b/sql/item_create.cc
@@ -55,7 +55,7 @@ static void wrong_precision_error(uint errcode, Item *a,
   char buff[1024];
   String buf(buff, sizeof(buff), system_charset_info);
 
-  my_error(errcode, MYF(0), (uint) min(number, UINT_MAX32),
+  my_error(errcode, MYF(0), (uint) MY_MIN(number, UINT_MAX32),
            item_name(a, &buf), maximum);
 }
 
diff --git a/sql/item_func.cc b/sql/item_func.cc
index 390ece724cb..1692f2b3d89 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -635,7 +635,7 @@ void Item_func::count_decimal_length()
     set_if_bigger(max_int_part, args[i]->decimal_int_part());
     set_if_smaller(unsigned_flag, args[i]->unsigned_flag);
   }
-  int precision= min(max_int_part + decimals, DECIMAL_MAX_PRECISION);
+  int precision= MY_MIN(max_int_part + decimals, DECIMAL_MAX_PRECISION);
   fix_char_length(my_decimal_precision_to_length_no_truncation(precision,
                                                                decimals,
                                                                unsigned_flag));
@@ -1371,10 +1371,10 @@ my_decimal *Item_func_plus::decimal_op(my_decimal *decimal_value)
 */
 void Item_func_additive_op::result_precision()
 {
-  decimals= max(args[0]->decimals, args[1]->decimals);
+  decimals= MY_MAX(args[0]->decimals, args[1]->decimals);
   int arg1_int= args[0]->decimal_precision() - args[0]->decimals;
   int arg2_int= args[1]->decimal_precision() - args[1]->decimals;
-  int precision= max(arg1_int, arg2_int) + 1 + decimals;
+  int precision= MY_MAX(arg1_int, arg2_int) + 1 + decimals;
 
   /* Integer operations keep unsigned_flag if one of arguments is unsigned */
   if (result_type() == INT_RESULT)
@@ -1612,9 +1612,9 @@ void Item_func_mul::result_precision()
     unsigned_flag= args[0]->unsigned_flag | args[1]->unsigned_flag;
   else
     unsigned_flag= args[0]->unsigned_flag & args[1]->unsigned_flag;
-  decimals= min(args[0]->decimals + args[1]->decimals, DECIMAL_MAX_SCALE);
+  decimals= MY_MIN(args[0]->decimals + args[1]->decimals, DECIMAL_MAX_SCALE);
   uint est_prec = args[0]->decimal_precision() + args[1]->decimal_precision();
-  uint precision= min(est_prec, DECIMAL_MAX_PRECISION);
+  uint precision= MY_MIN(est_prec, DECIMAL_MAX_PRECISION);
   max_length= my_decimal_precision_to_length_no_truncation(precision, decimals,
                                                            unsigned_flag);
 }
@@ -1666,7 +1666,7 @@ my_decimal *Item_func_div::decimal_op(my_decimal *decimal_value)
 
 void Item_func_div::result_precision()
 {
-  uint precision=min(args[0]->decimal_precision() + 
+  uint precision=MY_MIN(args[0]->decimal_precision() + 
                      args[1]->decimals + prec_increment,
                      DECIMAL_MAX_PRECISION);
 
@@ -1675,7 +1675,7 @@ void Item_func_div::result_precision()
     unsigned_flag= args[0]->unsigned_flag | args[1]->unsigned_flag;
   else
     unsigned_flag= args[0]->unsigned_flag & args[1]->unsigned_flag;
-  decimals= min(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
+  decimals= MY_MIN(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
   max_length= my_decimal_precision_to_length_no_truncation(precision, decimals,
                                                            unsigned_flag);
 }
@@ -1689,7 +1689,7 @@ void Item_func_div::fix_length_and_dec()
   switch (hybrid_type) {
   case REAL_RESULT:
   {
-    decimals=max(args[0]->decimals,args[1]->decimals)+prec_increment;
+    decimals=MY_MAX(args[0]->decimals,args[1]->decimals)+prec_increment;
     set_if_smaller(decimals, NOT_FIXED_DEC);
     uint tmp=float_length(decimals);
     if (decimals == NOT_FIXED_DEC)
@@ -1878,8 +1878,8 @@ my_decimal *Item_func_mod::decimal_op(my_decimal *decimal_value)
 
 void Item_func_mod::result_precision()
 {
-  decimals= max(args[0]->decimals, args[1]->decimals);
-  max_length= max(args[0]->max_length, args[1]->max_length);
+  decimals= MY_MAX(args[0]->decimals, args[1]->decimals);
+  max_length= MY_MAX(args[0]->max_length, args[1]->max_length);
 }
 
 
@@ -2424,7 +2424,7 @@ void Item_func_round::fix_length_and_dec()
 
   if (args[0]->decimals == NOT_FIXED_DEC)
   {
-    decimals= min(decimals_to_set, NOT_FIXED_DEC);
+    decimals= MY_MIN(decimals_to_set, NOT_FIXED_DEC);
     max_length= float_length(decimals);
     hybrid_type= REAL_RESULT;
     return;
@@ -2434,7 +2434,7 @@ void Item_func_round::fix_length_and_dec()
   case REAL_RESULT:
   case STRING_RESULT:
     hybrid_type= REAL_RESULT;
-    decimals= min(decimals_to_set, NOT_FIXED_DEC);
+    decimals= MY_MIN(decimals_to_set, NOT_FIXED_DEC);
     max_length= float_length(decimals);
     break;
   case INT_RESULT:
@@ -2451,13 +2451,13 @@ void Item_func_round::fix_length_and_dec()
   case DECIMAL_RESULT:
   {
     hybrid_type= DECIMAL_RESULT;
-    decimals_to_set= min(DECIMAL_MAX_SCALE, decimals_to_set);
+    decimals_to_set= MY_MIN(DECIMAL_MAX_SCALE, decimals_to_set);
     int decimals_delta= args[0]->decimals - decimals_to_set;
     int precision= args[0]->decimal_precision();
     int length_increase= ((decimals_delta <= 0) || truncate) ? 0:1;
 
     precision-= decimals_delta - length_increase;
-    decimals= min(decimals_to_set, DECIMAL_MAX_SCALE);
+    decimals= MY_MIN(decimals_to_set, DECIMAL_MAX_SCALE);
     max_length= my_decimal_precision_to_length_no_truncation(precision,
                                                              decimals,
                                                              unsigned_flag);
@@ -2568,7 +2568,7 @@ my_decimal *Item_func_round::decimal_op(my_decimal *decimal_value)
   my_decimal val, *value= args[0]->val_decimal(&val);
   longlong dec= args[1]->val_int();
   if (dec >= 0 || args[1]->unsigned_flag)
-    dec= min((ulonglong) dec, decimals);
+    dec= MY_MIN((ulonglong) dec, decimals);
   else if (dec < INT_MIN)
     dec= INT_MIN;
     
@@ -3428,7 +3428,7 @@ udf_handler::fix_fields(THD *thd, Item_result_field *func,
       free_udf(u_d);
       DBUG_RETURN(TRUE);
     }
-    func->max_length=min(initid.max_length,MAX_BLOB_WIDTH);
+    func->max_length=MY_MIN(initid.max_length,MAX_BLOB_WIDTH);
     func->maybe_null=initid.maybe_null;
     const_item_cache=initid.const_item;
     /* 
@@ -3437,7 +3437,7 @@ udf_handler::fix_fields(THD *thd, Item_result_field *func,
     */  
     if (!const_item_cache && !used_tables_cache)
       used_tables_cache= RAND_TABLE_BIT;
-    func->decimals=min(initid.decimals,NOT_FIXED_DEC);
+    func->decimals=MY_MIN(initid.decimals,NOT_FIXED_DEC);
   }
   initialized=1;
   if (error)
diff --git a/sql/item_func.h b/sql/item_func.h
index f562c87fe1c..d7c065e56f3 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -563,7 +563,7 @@ public:
   const char *func_name() const { return "cast_as_unsigned"; }
   void fix_length_and_dec()
   {
-    fix_char_length(min(args[0]->max_char_length(),
+    fix_char_length(MY_MIN(args[0]->max_char_length(),
                         DECIMAL_MAX_PRECISION + 2));
     unsigned_flag=1;
   }
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index 5071e494f04..0aafe2c3a74 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -585,7 +585,7 @@ String *Item_func_concat::val_str(String *str)
           }
           else
           {
-            uint new_len = max(tmp_value.alloced_length() * 2, concat_len);
+            uint new_len = MY_MAX(tmp_value.alloced_length() * 2, concat_len);
 
             if (tmp_value.realloc(new_len))
               goto null;
@@ -934,7 +934,7 @@ String *Item_func_concat_ws::val_str(String *str)
         }
         else
         {
-          uint new_len = max(tmp_value.alloced_length() * 2, concat_len);
+          uint new_len = MY_MAX(tmp_value.alloced_length() * 2, concat_len);
 
           if (tmp_value.realloc(new_len))
             goto null;
@@ -1426,7 +1426,7 @@ String *Item_func_substr::val_str(String *str)
 
   length= res->charpos((int) length, (uint32) start);
   tmp_length= res->length() - start;
-  length= min(length, tmp_length);
+  length= MY_MIN(length, tmp_length);
 
   if (!start && (longlong) res->length() == length)
     return res;
@@ -1449,7 +1449,7 @@ void Item_func_substr::fix_length_and_dec()
     else if (start < 0)
       max_length= ((uint)(-start) > max_length) ? 0 : (uint)(-start);
     else
-      max_length-= min((uint)(start - 1), max_length);
+      max_length-= MY_MIN((uint)(start - 1), max_length);
   }
   if (arg_count == 3 && args[2]->const_item())
   {
@@ -2143,7 +2143,7 @@ String *Item_func_soundex::val_str(String *str)
   if ((null_value= args[0]->null_value))
     return 0; /* purecov: inspected */
 
-  if (tmp_value.alloc(max(res->length(), 4 * cs->mbminlen)))
+  if (tmp_value.alloc(MY_MAX(res->length(), 4 * cs->mbminlen)))
     return str; /* purecov: inspected */
   char *to= (char *) tmp_value.ptr();
   char *to_end= to + tmp_value.alloced_length();
@@ -3363,7 +3363,7 @@ String* Item_func_export_set::val_str(String* str)
   const ulong max_allowed_packet= current_thd->variables.max_allowed_packet;
   const uint num_separators= num_set_values > 0 ? num_set_values - 1 : 0;
   const ulonglong max_total_length=
-    num_set_values * max(yes->length(), no->length()) +
+    num_set_values * MY_MAX(yes->length(), no->length()) +
     num_separators * sep->length();
 
   if (unlikely(max_total_length > max_allowed_packet))
@@ -3392,11 +3392,11 @@ String* Item_func_export_set::val_str(String* str)
 
 void Item_func_export_set::fix_length_and_dec()
 {
-  uint32 length= max(args[1]->max_char_length(), args[2]->max_char_length());
+  uint32 length= MY_MAX(args[1]->max_char_length(), args[2]->max_char_length());
   uint32 sep_length= (arg_count > 3 ? args[3]->max_char_length() : 1);
 
   if (agg_arg_charsets_for_string_result(collation,
-                                         args + 1, min(4, arg_count) - 1))
+                                         args + 1, MY_MIN(4, arg_count) - 1))
     return;
   fix_char_length(length * 64 + sep_length * 63);
 }
@@ -4464,7 +4464,7 @@ longlong Item_dyncol_get::val_int()
     if (end != org_end || error > 0)
     {
       char buff[80];
-      strmake(buff, val.x.string.value.str, min(sizeof(buff)-1,
+      strmake(buff, val.x.string.value.str, MY_MIN(sizeof(buff)-1,
                                               val.x.string.value.length));
       push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                           ER_BAD_DATA,
@@ -4528,7 +4528,7 @@ double Item_dyncol_get::val_real()
         error)
     {
       char buff[80];
-      strmake(buff, val.x.string.value.str, min(sizeof(buff)-1,
+      strmake(buff, val.x.string.value.str, MY_MIN(sizeof(buff)-1,
                                               val.x.string.value.length));
       push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                           ER_BAD_DATA,
@@ -4584,7 +4584,7 @@ my_decimal *Item_dyncol_get::val_decimal(my_decimal *decimal_value)
     rc= str2my_decimal(0, val.x.string.value.str, val.x.string.value.length,
                        val.x.string.charset, decimal_value);
     char buff[80];
-    strmake(buff, val.x.string.value.str, min(sizeof(buff)-1,
+    strmake(buff, val.x.string.value.str, MY_MIN(sizeof(buff)-1,
                                             val.x.string.value.length));
     if (rc != E_DEC_OK)
     {
diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h
index 486b7cf36ef..c6b8397100b 100644
--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@@ -812,7 +812,7 @@ public:
     collation.set(args[0]->collation);
     ulonglong max_result_length= (ulonglong) args[0]->max_length * 2 +
                                   2 * collation.collation->mbmaxlen;
-    max_length= (uint32) min(max_result_length, MAX_BLOB_WIDTH);
+    max_length= (uint32) MY_MIN(max_result_length, MAX_BLOB_WIDTH);
   }
 };
 
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index 8816e1352a9..165f9f4a5f8 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -36,7 +36,7 @@
 
 ulonglong Item_sum::ram_limitation(THD *thd)
 {
-  return min(thd->variables.tmp_table_size,
+  return MY_MIN(thd->variables.tmp_table_size,
       thd->variables.max_heap_table_size);
 }
 
@@ -1581,16 +1581,16 @@ void Item_sum_avg::fix_length_and_dec()
   if (hybrid_type == DECIMAL_RESULT)
   {
     int precision= args[0]->decimal_precision() + prec_increment;
-    decimals= min(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
+    decimals= MY_MIN(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
     max_length= my_decimal_precision_to_length_no_truncation(precision,
                                                              decimals,
                                                              unsigned_flag);
-    f_precision= min(precision+DECIMAL_LONGLONG_DIGITS, DECIMAL_MAX_PRECISION);
+    f_precision= MY_MIN(precision+DECIMAL_LONGLONG_DIGITS, DECIMAL_MAX_PRECISION);
     f_scale=  args[0]->decimals;
     dec_bin_size= my_decimal_get_binary_size(f_precision, f_scale);
   }
   else {
-    decimals= min(args[0]->decimals + prec_increment, NOT_FIXED_DEC);
+    decimals= MY_MIN(args[0]->decimals + prec_increment, NOT_FIXED_DEC);
     max_length= args[0]->max_length + prec_increment;
   }
 }
@@ -1787,13 +1787,13 @@ void Item_sum_variance::fix_length_and_dec()
   switch (args[0]->result_type()) {
   case REAL_RESULT:
   case STRING_RESULT:
-    decimals= min(args[0]->decimals + 4, NOT_FIXED_DEC);
+    decimals= MY_MIN(args[0]->decimals + 4, NOT_FIXED_DEC);
     break;
   case INT_RESULT:
   case DECIMAL_RESULT:
   {
     int precision= args[0]->decimal_precision()*2 + prec_increment;
-    decimals= min(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
+    decimals= MY_MIN(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
     max_length= my_decimal_precision_to_length_no_truncation(precision,
                                                              decimals,
                                                              unsigned_flag);
@@ -3488,7 +3488,7 @@ bool Item_func_group_concat::setup(THD *thd)
       syntax of this function). If there is no ORDER BY clause, we don't
       create this tree.
     */
-    init_tree(tree, (uint) min(thd->variables.max_heap_table_size,
+    init_tree(tree, (uint) MY_MIN(thd->variables.max_heap_table_size,
                                thd->variables.sortbuff_size/16), 0,
               tree_key_length, 
               group_concat_key_cmp_with_order, NULL, (void*) this,
diff --git a/sql/item_sum.h b/sql/item_sum.h
index 40a28d8beae..1c692014652 100644
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -1073,7 +1073,7 @@ public:
   enum Sumfunctype sum_func () const {return MIN_FUNC;}
 
   bool add();
-  const char *func_name() const { return "min("; }
+  const char *func_name() const { return "MY_MIN("; }
   Item *copy_or_same(THD* thd);
 };
 
@@ -1086,7 +1086,7 @@ public:
   enum Sumfunctype sum_func () const {return MAX_FUNC;}
 
   bool add();
-  const char *func_name() const { return "max("; }
+  const char *func_name() const { return "MY_MAX("; }
   Item *copy_or_same(THD* thd);
 };
 
diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc
index 02a7b8511af..c3e8204fd37 100644
--- a/sql/item_timefunc.cc
+++ b/sql/item_timefunc.cc
@@ -146,14 +146,14 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
       switch (*++ptr) {
 	/* Year */
       case 'Y':
-	tmp= (char*) val + min(4, val_len);
+	tmp= (char*) val + MY_MIN(4, val_len);
 	l_time->year= (int) my_strtoll10(val, &tmp, &error);
         if ((int) (tmp-val) <= 2)
           l_time->year= year_2000_handling(l_time->year);
 	val= tmp;
 	break;
       case 'y':
-	tmp= (char*) val + min(2, val_len);
+	tmp= (char*) val + MY_MIN(2, val_len);
 	l_time->year= (int) my_strtoll10(val, &tmp, &error);
 	val= tmp;
         l_time->year= year_2000_handling(l_time->year);
@@ -162,7 +162,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
 	/* Month */
       case 'm':
       case 'c':
-	tmp= (char*) val + min(2, val_len);
+	tmp= (char*) val + MY_MIN(2, val_len);
 	l_time->month= (int) my_strtoll10(val, &tmp, &error);
 	val= tmp;
 	break;
@@ -179,15 +179,15 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
 	/* Day */
       case 'd':
       case 'e':
-	tmp= (char*) val + min(2, val_len);
+	tmp= (char*) val + MY_MIN(2, val_len);
 	l_time->day= (int) my_strtoll10(val, &tmp, &error);
 	val= tmp;
 	break;
       case 'D':
-	tmp= (char*) val + min(2, val_len);
+	tmp= (char*) val + MY_MIN(2, val_len);
 	l_time->day= (int) my_strtoll10(val, &tmp, &error);
 	/* Skip 'st, 'nd, 'th .. */
-	val= tmp + min((int) (val_end-tmp), 2);
+	val= tmp + MY_MIN((int) (val_end-tmp), 2);
 	break;
 
 	/* Hour */
@@ -198,14 +198,14 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
 	/* fall through */
       case 'k':
       case 'H':
-	tmp= (char*) val + min(2, val_len);
+	tmp= (char*) val + MY_MIN(2, val_len);
 	l_time->hour= (int) my_strtoll10(val, &tmp, &error);
 	val= tmp;
 	break;
 
 	/* Minute */
       case 'i':
-	tmp= (char*) val + min(2, val_len);
+	tmp= (char*) val + MY_MIN(2, val_len);
 	l_time->minute= (int) my_strtoll10(val, &tmp, &error);
 	val= tmp;
 	break;
@@ -213,7 +213,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
 	/* Second */
       case 's':
       case 'S':
-	tmp= (char*) val + min(2, val_len);
+	tmp= (char*) val + MY_MIN(2, val_len);
 	l_time->second= (int) my_strtoll10(val, &tmp, &error);
 	val= tmp;
 	break;
@@ -265,7 +265,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
 	val= tmp;
 	break;
       case 'j':
-	tmp= (char*) val + min(val_len, 3);
+	tmp= (char*) val + MY_MIN(val_len, 3);
 	yearday= (int) my_strtoll10(val, &tmp, &error);
 	val= tmp;
 	break;
@@ -277,7 +277,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
       case 'u':
         sunday_first_n_first_week_non_iso= (*ptr=='U' || *ptr== 'V');
         strict_week_number= (*ptr=='V' || *ptr=='v');
-	tmp= (char*) val + min(val_len, 2);
+	tmp= (char*) val + MY_MIN(val_len, 2);
 	if ((week_number= (int) my_strtoll10(val, &tmp, &error)) < 0 ||
             (strict_week_number && !week_number) ||
             week_number > 53)
@@ -289,7 +289,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
       case 'X':
       case 'x':
         strict_week_number_year_type= (*ptr=='X');
-        tmp= (char*) val + min(4, val_len);
+        tmp= (char*) val + MY_MIN(4, val_len);
         strict_week_number_year= (int) my_strtoll10(val, &tmp, &error);
         val= tmp;
         break;
@@ -437,7 +437,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
 err:
   {
     char buff[128];
-    strmake(buff, val_begin, min(length, sizeof(buff)-1));
+    strmake(buff, val_begin, MY_MIN(length, sizeof(buff)-1));
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                         ER_WRONG_VALUE_FOR_TYPE, ER(ER_WRONG_VALUE_FOR_TYPE),
                         date_time_type, buff, "str_to_date");
@@ -1751,7 +1751,7 @@ void Item_func_date_format::fix_length_and_dec()
   else
   {
     fixed_length=0;
-    max_length=min(arg1->max_length, MAX_BLOB_WIDTH) * 10 *
+    max_length=MY_MIN(arg1->max_length, MAX_BLOB_WIDTH) * 10 *
                    collation.collation->mbmaxlen;
     set_if_smaller(max_length,MAX_BLOB_WIDTH);
   }
@@ -2525,7 +2525,7 @@ err:
 void Item_func_add_time::fix_length_and_dec()
 {
   enum_field_types arg0_field_type;
-  decimals= max(args[0]->decimals, args[1]->decimals);
+  decimals= MY_MAX(args[0]->decimals, args[1]->decimals);
 
   /*
     The field type for the result of an Item_func_add_time function is defined
diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h
index 3e3cd698efc..f25f4544e47 100644
--- a/sql/item_timefunc.h
+++ b/sql/item_timefunc.h
@@ -115,7 +115,7 @@ public:
   {
     int *input_version= (int*)int_arg;
     /* This function was introduced in 5.5 */
-    int output_version= max(*input_version, 50500);
+    int output_version= MY_MAX(*input_version, 50500);
     *input_version= output_version;
     return 0;
   }
@@ -933,7 +933,7 @@ public:
   const char *func_name() const { return "timediff"; }
   void fix_length_and_dec()
   {
-    decimals= max(args[0]->decimals, args[1]->decimals);
+    decimals= MY_MAX(args[0]->decimals, args[1]->decimals);
     Item_timefunc::fix_length_and_dec();
   }
   bool get_date(MYSQL_TIME *ltime, ulonglong fuzzy_date);
diff --git a/sql/key.cc b/sql/key.cc
index dd7818119c8..ebf9259d469 100644
--- a/sql/key.cc
+++ b/sql/key.cc
@@ -132,7 +132,7 @@ void key_copy(uchar *to_key, uchar *from_record, KEY *key_info,
           Don't copy data for null values
           The -1 below is to subtract the null byte which is already handled
         */
-        length= min(key_length, (uint) key_part->store_length-1);
+        length= MY_MIN(key_length, (uint) key_part->store_length-1);
         if (with_zerofill)
           bzero((char*) to_key, length);
         continue;
@@ -142,7 +142,7 @@ void key_copy(uchar *to_key, uchar *from_record, KEY *key_info,
         key_part->key_part_flag & HA_VAR_LENGTH_PART)
     {
       key_length-= HA_KEY_BLOB_LENGTH;
-      length= min(key_length, key_part->length);
+      length= MY_MIN(key_length, key_part->length);
       uint bytes= key_part->field->get_key_image(to_key, length, Field::itRAW);
       if (with_zerofill && bytes < length)
         bzero((char*) to_key + bytes, length - bytes);
@@ -150,7 +150,7 @@ void key_copy(uchar *to_key, uchar *from_record, KEY *key_info,
     }
     else
     {
-      length= min(key_length, key_part->length);
+      length= MY_MIN(key_length, key_part->length);
       Field *field= key_part->field;
       CHARSET_INFO *cs= field->charset();
       uint bytes= field->get_key_image(to_key, length, Field::itRAW);
@@ -202,7 +202,7 @@ void key_restore(uchar *to_record, uchar *from_key, KEY *key_info,
           Don't copy data for null bytes
           The -1 below is to subtract the null byte which is already handled
         */
-        length= min(key_length, (uint) key_part->store_length-1);
+        length= MY_MIN(key_length, (uint) key_part->store_length-1);
         continue;
       }
     }
@@ -244,7 +244,7 @@ void key_restore(uchar *to_record, uchar *from_key, KEY *key_info,
       my_ptrdiff_t ptrdiff= to_record - field->table->record[0];
       field->move_field_offset(ptrdiff);
       key_length-= HA_KEY_BLOB_LENGTH;
-      length= min(key_length, key_part->length);
+      length= MY_MIN(key_length, key_part->length);
       old_map= dbug_tmp_use_all_columns(field->table, field->table->write_set);
       field->set_key_image(from_key, length);
       dbug_tmp_restore_column_map(field->table->write_set, old_map);
@@ -253,7 +253,7 @@ void key_restore(uchar *to_record, uchar *from_key, KEY *key_info,
     }
     else
     {
-      length= min(key_length, key_part->length);
+      length= MY_MIN(key_length, key_part->length);
       /* skip the byte with 'uneven' bits, if used */
       memcpy(to_record + key_part->offset, from_key + used_uneven_bits
              , (size_t) length - used_uneven_bits);
@@ -311,7 +311,7 @@ bool key_cmp_if_same(TABLE *table,const uchar *key,uint idx,uint key_length)
 	return 1;
       continue;
     }
-    length= min((uint) (key_end-key), store_length);
+    length= MY_MIN((uint) (key_end-key), store_length);
     if (!(key_part->key_type & (FIELDFLAG_NUMBER+FIELDFLAG_BINARY+
                                 FIELDFLAG_PACK)))
     {
@@ -403,7 +403,7 @@ void key_unpack(String *to,TABLE *table,uint idx)
           tmp.length(charpos);
       }
       if (key_part->length < field->pack_length())
-	tmp.length(min(tmp.length(),key_part->length));
+	tmp.length(MY_MIN(tmp.length(),key_part->length));
       ErrConvString err(&tmp);
       to->append(err.ptr());
     }
@@ -558,8 +558,8 @@ int key_rec_cmp(void *key_p, uchar *first_rec, uchar *second_rec)
       if (key_part->null_bit)
       {
         /* The key_part can contain NULL values */
-        bool first_is_null= field->is_null_in_record_with_offset(first_diff);
-        bool sec_is_null= field->is_null_in_record_with_offset(sec_diff);
+        bool first_is_null= field->is_real_null(first_diff);
+        bool sec_is_null= field->is_real_null(sec_diff);
         /*
           NULL is smaller then everything so if first is NULL and the other
           not then we know that we should return -1 and for the opposite
diff --git a/sql/log.cc b/sql/log.cc
index 254449da05a..2572dc61894 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -781,8 +781,8 @@ bool Log_to_csv_event_handler::
   Open_tables_backup open_tables_backup;
   CHARSET_INFO *client_cs= thd->variables.character_set_client;
   bool save_time_zone_used;
-  long query_time= (long) min(query_utime/1000000, TIME_MAX_VALUE_SECONDS);
-  long lock_time=  (long) min(lock_utime/1000000, TIME_MAX_VALUE_SECONDS);
+  long query_time= (long) MY_MIN(query_utime/1000000, TIME_MAX_VALUE_SECONDS);
+  long lock_time=  (long) MY_MIN(lock_utime/1000000, TIME_MAX_VALUE_SECONDS);
   long query_time_micro= (long) (query_utime % 1000000);
   long lock_time_micro=  (long) (lock_utime % 1000000);
 
@@ -2925,7 +2925,7 @@ const char *MYSQL_LOG::generate_name(const char *log_name,
   {
     char *p= fn_ext(log_name);
     uint length= (uint) (p - log_name);
-    strmake(buff, log_name, min(length, FN_REFLEN-1));
+    strmake(buff, log_name, MY_MIN(length, FN_REFLEN-1));
     return (const char*)buff;
   }
   return log_name;
@@ -6992,7 +6992,7 @@ static void print_buffer_to_nt_eventlog(enum loglevel level, char *buff,
   DBUG_ENTER("print_buffer_to_nt_eventlog");
 
   /* Add ending CR/LF's to string, overwrite last chars if necessary */
-  strmov(buffptr+min(length, buffLen-5), "\r\n\r\n");
+  strmov(buffptr+MY_MIN(length, buffLen-5), "\r\n\r\n");
 
   setup_windows_event_source();
   if ((event= RegisterEventSource(NULL,"MySQL")))
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 7de72338d97..2dafd754293 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -1347,7 +1347,7 @@ Log_event* Log_event::read_log_event(IO_CACHE* file,
     of 13 bytes, whereas LOG_EVENT_MINIMAL_HEADER_LEN is 19 bytes (it's
     "minimal" over the set {MySQL >=4.0}).
   */
-  uint header_size= min(description_event->common_header_len,
+  uint header_size= MY_MIN(description_event->common_header_len,
                         LOG_EVENT_MINIMAL_HEADER_LEN);
 
   LOCK_MUTEX;
@@ -3090,7 +3090,7 @@ Query_log_event::Query_log_event(const char* buf, uint event_len,
       be even bigger, but this will suffice to catch most corruption
       errors that can lead to a crash.
     */
-    if (status_vars_len > min(data_len, MAX_SIZE_LOG_EVENT_STATUS))
+    if (status_vars_len > MY_MIN(data_len, MAX_SIZE_LOG_EVENT_STATUS))
     {
       DBUG_PRINT("info", ("status_vars_len (%u) > data_len (%lu); query= 0",
                           status_vars_len, data_len));
@@ -6602,7 +6602,7 @@ bool User_var_log_event::write(IO_CACHE* file)
   char buf[UV_NAME_LEN_SIZE];
   char buf1[UV_VAL_IS_NULL + UV_VAL_TYPE_SIZE + 
 	    UV_CHARSET_NUMBER_SIZE + UV_VAL_LEN_SIZE];
-  uchar buf2[max(8, DECIMAL_MAX_FIELD_SIZE + 2)], *pos= buf2;
+  uchar buf2[MY_MAX(8, DECIMAL_MAX_FIELD_SIZE + 2)], *pos= buf2;
   uint unsigned_len= 0;
   uint buf1_length;
   ulong event_length;
@@ -8371,7 +8371,7 @@ int Rows_log_event::do_add_row_data(uchar *row_data, size_t length)
     trigger false warnings.
    */
 #ifndef HAVE_valgrind
-  DBUG_DUMP("row_data", row_data, min(length, 32));
+  DBUG_DUMP("row_data", row_data, MY_MIN(length, 32));
 #endif
 
   DBUG_ASSERT(m_rows_buf <= m_rows_cur);
@@ -9453,7 +9453,7 @@ int Table_map_log_event::rewrite_db(const char* new_db, size_t new_len,
   DBUG_ENTER("Table_map_log_event::rewrite_db");
   DBUG_ASSERT(temp_buf);
 
-  uint header_len= min(desc->common_header_len,
+  uint header_len= MY_MIN(desc->common_header_len,
                        LOG_EVENT_MINIMAL_HEADER_LEN) + TABLE_MAP_HEADER_LEN;
   int len_diff;
 
diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc
index e9afe474418..566a367430c 100644
--- a/sql/log_event_old.cc
+++ b/sql/log_event_old.cc
@@ -1406,7 +1406,7 @@ int Old_rows_log_event::do_add_row_data(uchar *row_data, size_t length)
     trigger false warnings.
    */
 #ifndef HAVE_valgrind
-  DBUG_DUMP("row_data", row_data, min(length, 32));
+  DBUG_DUMP("row_data", row_data, MY_MIN(length, 32));
 #endif
 
   DBUG_ASSERT(m_rows_buf <= m_rows_cur);
diff --git a/sql/mdl.h b/sql/mdl.h
index 477f4df7807..c778dbbc1d7 100644
--- a/sql/mdl.h
+++ b/sql/mdl.h
@@ -288,7 +288,7 @@ public:
       character set is utf-8, we can safely assume that no
       character starts with a zero byte.
     */
-    return memcmp(m_ptr, rhs->m_ptr, min(m_length, rhs->m_length));
+    return memcmp(m_ptr, rhs->m_ptr, MY_MIN(m_length, rhs->m_length));
   }
 
   MDL_key(const MDL_key *rhs)
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
index e6cbed7eb13..04557a636d5 100644
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@@ -1647,7 +1647,7 @@ int DsMrr_impl::dsmrr_explain_info(uint mrr_mode, char *str, size_t size)
       used_str= rowid_ordered;
 
     uint used_str_len= strlen(used_str);
-    uint copy_len= min(used_str_len, size);
+    uint copy_len= MY_MIN(used_str_len, size);
     memcpy(str, used_str, size);
     return copy_len;
   }
@@ -1708,7 +1708,7 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
   else
   {
     cost->reset();
-    *buffer_size= max(*buffer_size, 
+    *buffer_size= MY_MAX(*buffer_size, 
                       (size_t)(1.2*rows_in_last_step) * elem_size + 
                       primary_file->ref_length + table->key_info[keynr].key_length);
   }
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 2607297f2c9..8e8414a7acc 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -3915,7 +3915,7 @@ static int init_common_variables()
       can't get max_connections*5 but still got no less than was
       requested (value of wanted_files).
     */
-    max_open_files= max(max(wanted_files,
+    max_open_files= MY_MAX(MY_MAX(wanted_files,
                             (max_connections + extra_max_connections)*5),
                         open_files_limit);
     files= my_set_max_open_files(max_open_files);
@@ -3928,15 +3928,15 @@ static int init_common_variables()
           If we have requested too much file handles than we bring
           max_connections in supported bounds.
         */
-        max_connections= (ulong) min(files-10-TABLE_OPEN_CACHE_MIN*2,
+        max_connections= (ulong) MY_MIN(files-10-TABLE_OPEN_CACHE_MIN*2,
                                      max_connections);
         /*
           Decrease table_cache_size according to max_connections, but
-          not below TABLE_OPEN_CACHE_MIN.  Outer min() ensures that we
+          not below TABLE_OPEN_CACHE_MIN.  Outer MY_MIN() ensures that we
           never increase table_cache_size automatically (that could
           happen if max_connections is decreased above).
         */
-        table_cache_size= (ulong) min(max((files-10-max_connections)/2,
+        table_cache_size= (ulong) MY_MIN(MY_MAX((files-10-max_connections)/2,
                                           TABLE_OPEN_CACHE_MIN),
                                       table_cache_size);
 	DBUG_PRINT("warning",
diff --git a/sql/mysqld.h b/sql/mysqld.h
index 716423f9bd2..67c9f4e68ba 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -24,6 +24,7 @@
 #include "mysql/psi/mysql_file.h"          /* MYSQL_FILE */
 #include "sql_list.h"                      /* I_List */
 #include "sql_cmd.h"
+#include <my_rnd.h>
 
 class THD;
 struct handlerton;
diff --git a/sql/net_serv.cc b/sql/net_serv.cc
index b6890ab9fda..a9e3af13403 100644
--- a/sql/net_serv.cc
+++ b/sql/net_serv.cc
@@ -792,7 +792,7 @@ static my_bool my_net_skip_rest(NET *net, uint32 remain, thr_alarm_t *alarmed,
   {
     while (remain > 0)
     {
-      size_t length= min(remain, net->max_packet);
+      size_t length= MY_MIN(remain, net->max_packet);
       if (net_safe_read(net, net->buff, length, alarmed))
 	DBUG_RETURN(1);
       update_statistics(thd_increment_bytes_received(length));
@@ -989,7 +989,7 @@ my_real_read(NET *net, size_t *complen)
 	len=uint3korr(net->buff+net->where_b);
 	if (!len)				/* End of big multi-packet */
 	  goto end;
-	helping = max(len,*complen) + net->where_b;
+	helping = MY_MAX(len,*complen) + net->where_b;
 	/* The necessary size of net->buff */
 	if (helping >= net->max_packet)
 	{
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 2205d2fcab4..e3bca89d6df 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3079,7 +3079,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     group_trp= get_best_group_min_max(&param, tree, best_read_time);
     if (group_trp)
     {
-      param.table->quick_condition_rows= min(group_trp->records,
+      param.table->quick_condition_rows= MY_MIN(group_trp->records,
                                              head->stat_records());
       if (group_trp->read_cost < best_read_time)
       {
@@ -4757,7 +4757,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
     {
       imerge_trp->read_cost= imerge_cost;
       imerge_trp->records= non_cpk_scan_records + cpk_scan_records;
-      imerge_trp->records= min(imerge_trp->records,
+      imerge_trp->records= MY_MIN(imerge_trp->records,
                                param->table->stat_records());
       imerge_trp->range_scans= range_scans;
       imerge_trp->range_scans_end= range_scans + n_child_scans;
@@ -5345,7 +5345,7 @@ bool prepare_search_best_index_intersect(PARAM *param,
   this number by #r.
 
   If we do not make any assumptions then we can only state that
-     #r<=min(#r1,#r2).
+     #r<=MY_MIN(#r1,#r2).
   With this estimate we can't say that the index intersection scan will be 
   cheaper than the cheapest index scan.
 
@@ -5378,7 +5378,7 @@ bool prepare_search_best_index_intersect(PARAM *param,
   #rt2_0 of the same range for sub-index idx2_0(dept) of the index idx2.
   The current code does not make an estimate either for #rt1_0, or for #rt2_0,
   but it can be adjusted to provide those numbers.
-  Alternatively, min(rec_per_key) for (dept) could be used to get an upper 
+  Alternatively, MY_MIN(rec_per_key) for (dept) could be used to get an upper 
   bound for the value of sel(Rt1&Rt2). Yet this statistics is not provided
   now.  
  
@@ -5389,7 +5389,7 @@ bool prepare_search_best_index_intersect(PARAM *param,
 
   sel(Rt1&Rt2)=sel(dept=5)*sel(last_name='Sm5')*sel(first_name='Robert')
   =sel(Rt2)*sel(dept=5)
-  Here max(rec_per_key) for (dept) could be used to get an upper bound for
+  Here MY_MAX(rec_per_key) for (dept) could be used to get an upper bound for
   the value of sel(Rt1&Rt2).
   
   When the intersected indexes have different major columns, but some
@@ -5442,9 +5442,9 @@ bool prepare_search_best_index_intersect(PARAM *param,
     f_1 = rec_per_key[first_name]/rec_per_key[last_name].
   The the number of records in the range tree:
     Rt_0:  (first_name='Robert' OR first_name='Bob')
-  for the sub-index (first_name) is not greater than max(#r*f_1, #t).
+  for the sub-index (first_name) is not greater than MY_MAX(#r*f_1, #t).
   Strictly speaking, we can state only that it's not greater than 
-  max(#r*max_f_1, #t), where
+  MY_MAX(#r*max_f_1, #t), where
     max_f_1= max_rec_per_key[first_name]/min_rec_per_key[last_name].
   Yet, if #r/#t is big enough (and this is the case of an index intersection,
   because using this index range with a single index scan is cheaper than
@@ -8641,7 +8641,7 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
   if (!key1)
     return &null_element;			// Impossible ranges
   key1->use_count++;
-  key1->max_part_no= max(key2->max_part_no, key2->part+1);
+  key1->max_part_no= MY_MAX(key2->max_part_no, key2->part+1);
   return key1;
 }
 
@@ -8734,7 +8734,7 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
   key1->use_count--;
   key2->use_count--;
   SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0;
-  uint max_part_no= max(key1->max_part_no, key2->max_part_no);
+  uint max_part_no= MY_MAX(key1->max_part_no, key2->max_part_no);
 
   while (e1 && e2)
   {
@@ -8932,7 +8932,7 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
         b:      [----
    */
 
-  uint max_part_no= max(key1->max_part_no, key2->max_part_no);
+  uint max_part_no= MY_MAX(key1->max_part_no, key2->max_part_no);
 
   for (key2=key2->first(); key2; )
   {
@@ -9142,11 +9142,11 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
           are merged into one range by deleting first...last-1 from
           the key1 tree. In the figure, this applies to first and the
           two consecutive ranges. The range of last is then extended:
-            * last.min: Set to min(key2.min, first.min)
+            * last.min: Set to MY_MIN(key2.min, first.min)
             * last.max: If there is a last->next that overlaps key2 (i.e.,
                         last->next has a different next_key_part):
                                         Set adjacent to last->next.min
-                        Otherwise:      Set to max(key2.max, last.max)
+                        Otherwise:      Set to MY_MAX(key2.max, last.max)
 
           Result:
           key2:  [****----------------------*******]
@@ -9200,7 +9200,7 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
                      ^                 ^
                      last              different next_key_part
 
-              Extend range of last up to max(last.max, key2.max):
+              Extend range of last up to MY_MAX(last.max, key2.max):
               key2:    [--------*****]
               key1:  [***----------**] [xxxx]
              */
@@ -10041,7 +10041,7 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
       param->table->quick_key_parts[keynr]= param->max_key_part+1;
       param->table->quick_n_ranges[keynr]= param->range_count;
       param->table->quick_condition_rows=
-        min(param->table->quick_condition_rows, rows);
+        MY_MIN(param->table->quick_condition_rows, rows);
       param->table->quick_rows[keynr]= rows;
     }
   }
@@ -11814,7 +11814,7 @@ cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
 
   TODO
   - What happens if the query groups by the MIN/MAX field, and there is no
-    other field as in: "select min(a) from t1 group by a" ?
+    other field as in: "select MY_MIN(a) from t1 group by a" ?
   - We assume that the general correctness of the GROUP-BY query was checked
     before this point. Is this correct, or do we have to check it completely?
   - Lift the limitation in condition (B3), that is, make this access method 
@@ -12075,7 +12075,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
         cur_group_prefix_len+= cur_part->store_length;
         used_key_parts_map.set_bit(key_part_nr);
         ++cur_group_key_parts;
-        max_key_part= max(max_key_part,key_part_nr);
+        max_key_part= MY_MAX(max_key_part,key_part_nr);
       }
       /*
         Check that used key parts forms a prefix of the index.
@@ -12741,9 +12741,9 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
     {
       double blocks_per_group= (double) num_blocks / (double) num_groups;
       p_overlap= (blocks_per_group * (keys_per_subgroup - 1)) / keys_per_group;
-      p_overlap= min(p_overlap, 1.0);
+      p_overlap= MY_MIN(p_overlap, 1.0);
     }
-    io_cost= (double) min(num_groups * (1 + p_overlap), num_blocks);
+    io_cost= (double) MY_MIN(num_groups * (1 + p_overlap), num_blocks);
   }
   else
     io_cost= (keys_per_group > keys_per_block) ?
diff --git a/sql/opt_range.h b/sql/opt_range.h
index c59b2a7eb02..fd9d0b3923f 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -104,7 +104,7 @@ class QUICK_RANGE :public Sql_alloc {
   void make_min_endpoint(key_range *kr, uint prefix_length, 
                          key_part_map keypart_map) {
     make_min_endpoint(kr);
-    kr->length= min(kr->length, prefix_length);
+    kr->length= MY_MIN(kr->length, prefix_length);
     kr->keypart_map&= keypart_map;
   }
   
@@ -142,7 +142,7 @@ class QUICK_RANGE :public Sql_alloc {
   void make_max_endpoint(key_range *kr, uint prefix_length, 
                          key_part_map keypart_map) {
     make_max_endpoint(kr);
-    kr->length= min(kr->length, prefix_length);
+    kr->length= MY_MIN(kr->length, prefix_length);
     kr->keypart_map&= keypart_map;
   }
 
diff --git a/sql/opt_range_mrr.cc b/sql/opt_range_mrr.cc
index 1f4e36178db..e03a1e2e644 100644
--- a/sql/opt_range_mrr.cc
+++ b/sql/opt_range_mrr.cc
@@ -293,7 +293,7 @@ walk_up_n_right:
     }
   }
   seq->param->range_count++;
-  seq->param->max_key_part=max(seq->param->max_key_part,key_tree->part);
+  seq->param->max_key_part=MY_MAX(seq->param->max_key_part,key_tree->part);
   return 0;
 }
 
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 8cd4ba08ff3..7780a7921e5 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -2175,7 +2175,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
           double rows= 1.0;
           while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
             rows *= join->map2table[tableno]->table->quick_condition_rows;
-          sjm->rows= min(sjm->rows, rows);
+          sjm->rows= MY_MIN(sjm->rows, rows);
         }
         memcpy(sjm->positions, join->best_positions + join->const_tables, 
                sizeof(POSITION) * n_tables);
@@ -4041,7 +4041,7 @@ SJ_TMP_TABLE::create_sj_weedout_tmp_table(THD *thd)
     share->max_rows= ~(ha_rows) 0;
   else
     share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ?
-                                 min(thd->variables.tmp_table_size,
+                                 MY_MIN(thd->variables.tmp_table_size,
                                      thd->variables.max_heap_table_size) :
                                  thd->variables.tmp_table_size) /
 			         share->reclength);
@@ -5153,7 +5153,7 @@ bool setup_jtbm_semi_joins(JOIN *join, List<TABLE_LIST> *join_list,
           0 or 1 record. Examples of both cases:
 
             select * from ot where col in (select ... from it where 2>3) 
-            select * from ot where col in (select min(it.key) from it)
+            select * from ot where col in (select MY_MIN(it.key) from it)
           
           in this case, the subquery predicate has not been setup for
           materialization. In particular, there is no materialized temp.table.
diff --git a/sql/opt_table_elimination.cc b/sql/opt_table_elimination.cc
index 33164c1ed12..e44b6fdf5e0 100644
--- a/sql/opt_table_elimination.cc
+++ b/sql/opt_table_elimination.cc
@@ -328,7 +328,7 @@ const size_t Dep_value_table::iterator_size=
   ALIGN_SIZE(sizeof(Dep_value_table::Module_iter));
 
 const size_t Dep_value::iterator_size=
-  max(Dep_value_table::iterator_size, Dep_value_field::iterator_size);
+  MY_MAX(Dep_value_table::iterator_size, Dep_value_field::iterator_size);
 
 
 /*
@@ -441,7 +441,7 @@ const size_t Dep_module_key::iterator_size=
   ALIGN_SIZE(sizeof(Dep_module_key::Value_iter));
 
 const size_t Dep_module::iterator_size=
-  max(Dep_module_expr::iterator_size, Dep_module_key::iterator_size);
+  MY_MAX(Dep_module_expr::iterator_size, Dep_module_key::iterator_size);
 
 
 /*
diff --git a/sql/password.c b/sql/password.c
index 947620ddf7a..954daf2d8d1 100644
--- a/sql/password.c
+++ b/sql/password.c
@@ -60,12 +60,14 @@
 
 *****************************************************************************/
 
-#include <password.h>
 #include <my_global.h>
 #include <my_sys.h>
 #include <m_string.h>
+#include <password.h>
+#include <mysql.h>
+#include <my_rnd.h>
 #include <sha1.h>
-#include "mysql.h"
+#include <crypt_genhash_impl.h>
 
 /************ MySQL 3.23-4.0 authentication routines: untouched ***********/
 
@@ -372,6 +374,47 @@ my_crypt(char *to, const uchar *s1, const uchar *s2, uint len)
 }
 
 
+#if defined(HAVE_OPENSSL)
+void my_make_scrambled_password(char *to, const char *password,
+                                size_t pass_len)
+{
+
+  char salt[CRYPT_SALT_LENGTH + 1];
+  
+  generate_user_salt(salt, CRYPT_SALT_LENGTH + 1);
+  my_crypt_genhash(to,
+                     CRYPT_MAX_PASSWORD_SIZE,
+                     password,
+                     pass_len,
+                     salt,
+                     0);
+
+}
+#endif
+/**
+  Compute two stage SHA1 hash of the password :
+
+    hash_stage1=sha1("password")
+    hash_stage2=sha1(hash_stage1)
+
+  @param password    [IN]   Password string.
+  @param pass_len    [IN]   Length of the password.
+  @param hash_stage1 [OUT]  sha1(password)
+  @param hash_stage2 [OUT]  sha1(hash_stage1)
+*/
+
+inline static
+void compute_two_stage_sha1_hash(const char *password, size_t pass_len,
+                                 uint8 *hash_stage1, uint8 *hash_stage2)
+{
+  /* Stage 1: hash password */
+  compute_sha1_hash(hash_stage1, password, pass_len);
+
+  /* Stage 2 : hash first stage's output. */
+  compute_sha1_hash(hash_stage2, (const char *) hash_stage1, SHA1_HASH_SIZE);
+}
+
+
 /*
     MySQL 4.1.1 password hashing: SHA conversion (see RFC 2289, 3174) twice
     applied to the password string, and then produced octet sequence is
@@ -379,27 +422,20 @@ my_crypt(char *to, const uchar *s1, const uchar *s2, uint len)
     The result of this function is used as return value from PASSWORD() and
     is stored in the database.
   SYNOPSIS
-    my_make_scrambled_password()
+    my_make_scrambled_password_sha1()
     buf       OUT buffer of size 2*SHA1_HASH_SIZE + 2 to store hex string
     password  IN  password string
     pass_len  IN  length of password string
 */
 
-void my_make_scrambled_password(char *to, const char *password,
-                                size_t pass_len)
+void my_make_scrambled_password_sha1(char *to, const char *password,
+                                     size_t pass_len)
 {
-  SHA1_CONTEXT sha1_context;
   uint8 hash_stage2[SHA1_HASH_SIZE];
 
-  mysql_sha1_reset(&sha1_context);
-  /* stage 1: hash password */
-  mysql_sha1_input(&sha1_context, (uint8 *) password, (uint) pass_len);
-  mysql_sha1_result(&sha1_context, (uint8 *) to);
-  /* stage 2: hash stage1 output */
-  mysql_sha1_reset(&sha1_context);
-  mysql_sha1_input(&sha1_context, (uint8 *) to, SHA1_HASH_SIZE);
-  /* separate buffer is used to pass 'to' in octet2hex */
-  mysql_sha1_result(&sha1_context, hash_stage2);
+  /* Two stage SHA1 hash of the password. */
+  compute_two_stage_sha1_hash(password, pass_len, (uint8 *) to, hash_stage2);
+
   /* convert hash_stage2 to hex string */
   *to++= PVERSION41_CHAR;
   octet2hex(to, (const char*) hash_stage2, SHA1_HASH_SIZE);
@@ -419,7 +455,7 @@ void my_make_scrambled_password(char *to, const char *password,
 
 void make_scrambled_password(char *to, const char *password)
 {
-  my_make_scrambled_password(to, password, strlen(password));
+  my_make_scrambled_password_sha1(to, password, strlen(password));
 }
 
 
@@ -443,24 +479,16 @@ void make_scrambled_password(char *to, const char *password)
 void
 scramble(char *to, const char *message, const char *password)
 {
-  SHA1_CONTEXT sha1_context;
   uint8 hash_stage1[SHA1_HASH_SIZE];
   uint8 hash_stage2[SHA1_HASH_SIZE];
 
-  mysql_sha1_reset(&sha1_context);
-  /* stage 1: hash password */
-  mysql_sha1_input(&sha1_context, (uint8 *) password, (uint) strlen(password));
-  mysql_sha1_result(&sha1_context, hash_stage1);
-  /* stage 2: hash stage 1; note that hash_stage2 is stored in the database */
-  mysql_sha1_reset(&sha1_context);
-  mysql_sha1_input(&sha1_context, hash_stage1, SHA1_HASH_SIZE);
-  mysql_sha1_result(&sha1_context, hash_stage2);
+  /* Two stage SHA1 hash of the password. */
+  compute_two_stage_sha1_hash(password, strlen(password), hash_stage1,
+                              hash_stage2);
+
   /* create crypt string as sha1(message, hash_stage2) */;
-  mysql_sha1_reset(&sha1_context);
-  mysql_sha1_input(&sha1_context, (const uint8 *) message, SCRAMBLE_LENGTH);
-  mysql_sha1_input(&sha1_context, hash_stage2, SHA1_HASH_SIZE);
-  /* xor allows 'from' and 'to' overlap: lets take advantage of it */
-  mysql_sha1_result(&sha1_context, (uint8 *) to);
+  compute_sha1_hash_multi((uint8 *) to, message, SCRAMBLE_LENGTH,
+                          (const char *) hash_stage2, SHA1_HASH_SIZE);
   my_crypt(to, (const uchar *) to, hash_stage1, SCRAMBLE_LENGTH);
 }
 
@@ -472,7 +500,7 @@ scramble(char *to, const char *message, const char *password)
     null-terminated, reply and hash_stage2 must be at least SHA1_HASH_SIZE
     long (if not, something fishy is going on).
   SYNOPSIS
-    check_scramble()
+    check_scramble_sha1()
     scramble     clients' reply, presumably produced by scramble()
     message      original random string, previously sent to client
                  (presumably second argument of scramble()), must be 
@@ -486,27 +514,30 @@ scramble(char *to, const char *message, const char *password)
 */
 
 my_bool
-check_scramble(const uchar *scramble_arg, const char *message,
-               const uint8 *hash_stage2)
+check_scramble_sha1(const uchar *scramble_arg, const char *message,
+                    const uint8 *hash_stage2)
 {
-  SHA1_CONTEXT sha1_context;
   uint8 buf[SHA1_HASH_SIZE];
   uint8 hash_stage2_reassured[SHA1_HASH_SIZE];
 
-  mysql_sha1_reset(&sha1_context);
   /* create key to encrypt scramble */
-  mysql_sha1_input(&sha1_context, (const uint8 *) message, SCRAMBLE_LENGTH);
-  mysql_sha1_input(&sha1_context, hash_stage2, SHA1_HASH_SIZE);
-  mysql_sha1_result(&sha1_context, buf);
+  compute_sha1_hash_multi(buf, message, SCRAMBLE_LENGTH,
+                          (const char *) hash_stage2, SHA1_HASH_SIZE);
   /* encrypt scramble */
-    my_crypt((char *) buf, buf, scramble_arg, SCRAMBLE_LENGTH);
+  my_crypt((char *) buf, buf, scramble_arg, SCRAMBLE_LENGTH);
+
   /* now buf supposedly contains hash_stage1: so we can get hash_stage2 */
-  mysql_sha1_reset(&sha1_context);
-  mysql_sha1_input(&sha1_context, buf, SHA1_HASH_SIZE);
-  mysql_sha1_result(&sha1_context, hash_stage2_reassured);
+  compute_sha1_hash(hash_stage2_reassured, (const char *) buf, SHA1_HASH_SIZE);
+
   return test(memcmp(hash_stage2, hash_stage2_reassured, SHA1_HASH_SIZE));
 }
 
+my_bool
+check_scramble(const uchar *scramble_arg, const char *message,
+               const uint8 *hash_stage2)
+{
+  return check_scramble_sha1(scramble_arg, message, hash_stage2);
+}
 
 /*
   Convert scrambled password from asciiz hex string to binary form.
diff --git a/sql/protocol.cc b/sql/protocol.cc
index f6e9e9e62e1..bc3133881af 100644
--- a/sql/protocol.cc
+++ b/sql/protocol.cc
@@ -233,7 +233,7 @@ net_send_ok(THD *thd,
     pos+=2;
 
     /* We can only return up to 65535 warnings in two bytes */
-    uint tmp= min(statement_warn_count, 65535);
+    uint tmp= MY_MIN(statement_warn_count, 65535);
     int2store(pos, tmp);
     pos+= 2;
   }
@@ -329,7 +329,7 @@ static bool write_eof_packet(THD *thd, NET *net,
       Don't send warn count during SP execution, as the warn_list
       is cleared between substatements, and mysqltest gets confused
     */
-    uint tmp= min(statement_warn_count, 65535);
+    uint tmp= MY_MIN(statement_warn_count, 65535);
     buff[0]= 254;
     int2store(buff+1, tmp);
     /*
@@ -606,17 +606,17 @@ void net_send_progress_packet(THD *thd)
   *pos++= (uchar) 1;                            // Number of strings
   *pos++= (uchar) thd->progress.stage + 1;
   /*
-    We have the max() here to avoid problems if max_stage is not set,
+    We have the MY_MAX() here to avoid problems if max_stage is not set,
     which may happen during automatic repair of table
   */
-  *pos++= (uchar) max(thd->progress.max_stage, thd->progress.stage + 1);
+  *pos++= (uchar) MY_MAX(thd->progress.max_stage, thd->progress.stage + 1);
   progress= 0;
   if (thd->progress.max_counter)
     progress= 100000ULL * thd->progress.counter / thd->progress.max_counter;
   int3store(pos, progress);                          // Between 0 & 100000
   pos+= 3;
   pos= net_store_data(pos, (const uchar*) proc_info,
-                      min(length, sizeof(buff)-7));
+                      MY_MIN(length, sizeof(buff)-7));
   net_write_command(&thd->net, (uchar) 255, progress_header,
                     sizeof(progress_header), (uchar*) buff,
                     (uint) (pos - buff));
diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc
index 3e02b555dc0..c0393300fcf 100644
--- a/sql/rpl_mi.cc
+++ b/sql/rpl_mi.cc
@@ -151,7 +151,7 @@ void init_master_log_pos(Master_info* mi)
     if CHANGE MASTER did not specify it.  (no data loss in conversion
     as hb period has a max)
   */
-  mi->heartbeat_period= (float) min(SLAVE_MAX_HEARTBEAT_PERIOD,
+  mi->heartbeat_period= (float) MY_MIN(SLAVE_MAX_HEARTBEAT_PERIOD,
                                     (slave_net_timeout/2.0));
   DBUG_ASSERT(mi->heartbeat_period > (float) 0.001
               || mi->heartbeat_period == 0);
@@ -702,7 +702,7 @@ void create_logfile_name_with_suffix(char *res_file_name, uint length,
     length-= (suffix->length - ext_pos); /* Leave place for extension */
     p= res_file_name + ext_pos;
     *p++= '-';                           /* Add separator */
-    p= strmake(p, res, min((size_t) (length - (p - res_file_name)),
+    p= strmake(p, res, MY_MIN((size_t) (length - (p - res_file_name)),
                            res_length));
     /* Add back extension. We have checked above that there is space for it */
     strmov(p, ext);
diff --git a/sql/rpl_record.cc b/sql/rpl_record.cc
index 99bf8a82004..e219177cd80 100644
--- a/sql/rpl_record.cc
+++ b/sql/rpl_record.cc
@@ -362,7 +362,7 @@ unpack_row(Relay_log_info const *rli,
   /*
     throw away master's extra fields
   */
-  uint max_cols= min(tabledef->size(), cols->n_bits);
+  uint max_cols= MY_MIN(tabledef->size(), cols->n_bits);
   for (; i < max_cols; i++)
   {
     if (bitmap_is_set(cols, i))
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index 2e74acc0345..5a9e342e458 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -735,7 +735,7 @@ int Relay_log_info::wait_for_pos(THD* thd, String* log_name,
   ulong log_name_extension;
   char log_name_tmp[FN_REFLEN]; //make a char[] from String
 
-  strmake(log_name_tmp, log_name->ptr(), min(log_name->length(), FN_REFLEN-1));
+  strmake(log_name_tmp, log_name->ptr(), MY_MIN(log_name->length(), FN_REFLEN-1));
 
   char *p= fn_ext(log_name_tmp);
   char *p_end;
@@ -745,7 +745,7 @@ int Relay_log_info::wait_for_pos(THD* thd, String* log_name,
     goto err;
   }
   // Convert 0-3 to 4
-  log_pos= max(log_pos, BIN_LOG_HEADER_SIZE);
+  log_pos= MY_MAX(log_pos, BIN_LOG_HEADER_SIZE);
   /* p points to '.' */
   log_name_extension= strtoul(++p, &p_end, 10);
   /*
diff --git a/sql/rpl_utility.cc b/sql/rpl_utility.cc
index 1b9e744bcc1..33e04e488cb 100644
--- a/sql/rpl_utility.cc
+++ b/sql/rpl_utility.cc
@@ -779,7 +779,7 @@ table_def::compatible_with(THD *thd, Relay_log_info *rli,
   /*
     We only check the initial columns for the tables.
   */
-  uint const cols_to_check= min(table->s->fields, size());
+  uint const cols_to_check= MY_MIN(table->s->fields, size());
   TABLE *tmp_table= NULL;
 
   for (uint col= 0 ; col < cols_to_check ; ++col)
diff --git a/sql/rpl_utility.h b/sql/rpl_utility.h
index 79f4517c492..19a5f621f69 100644
--- a/sql/rpl_utility.h
+++ b/sql/rpl_utility.h
@@ -287,7 +287,7 @@ public:
   do {                                             \
     char buf[256];                                 \
     uint i;                                        \
-    for (i = 0 ; i < min(sizeof(buf) - 1, (BS)->n_bits) ; i++) \
+    for (i = 0 ; i < MY_MIN(sizeof(buf) - 1, (BS)->n_bits) ; i++) \
       buf[i] = bitmap_is_set((BS), i) ? '1' : '0'; \
     buf[i] = '\0';                                 \
     DBUG_PRINT((N), ((FRM), buf));                 \
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt
index 49f35719a77..156ec759099 100644
--- a/sql/share/errmsg-utf8.txt
+++ b/sql/share/errmsg-utf8.txt
@@ -1,4 +1,4 @@
-languages czech=cze latin2, danish=dan latin1, dutch=nla latin1, english=eng latin1, estonian=est latin7, french=fre latin1, german=ger latin1, greek=greek greek, hungarian=hun latin2, italian=ita latin1, japanese=jpn ujis, japanese-sjis=jps sjis, korean=kor euckr, norwegian-ny=norwegian-ny latin1, norwegian=nor latin1, polish=pol latin2, portuguese=por latin1, romanian=rum latin2, russian=rus koi8r, serbian=serbian cp1250, slovak=slo latin2, spanish=spa latin1, swedish=swe latin1, ukrainian=ukr koi8u;
+languages czech=cze latin2, danish=dan latin1, dutch=nla latin1, english=eng latin1, estonian=est latin7, french=fre latin1, german=ger latin1, greek=greek greek, hungarian=hun latin2, italian=ita latin1, japanese=jpn ujis, korean=kor euckr, norwegian-ny=norwegian-ny latin1, norwegian=nor latin1, polish=pol latin2, portuguese=por latin1, romanian=rum latin2, russian=rus koi8r, serbian=serbian cp1250, slovak=slo latin2, spanish=spa latin1, swedish=swe latin1, ukrainian=ukr koi8u, bulgarian=bgn cp1251;
 
 default-language eng
 
@@ -51,7 +51,7 @@ ER_YES
         spa "SI"
         ukr "ТАК"
 ER_CANT_CREATE_FILE  
-        cze "Nemohu vytvo-Břit soubor '%-.200s' (chybový kód: %M)"
+        cze "Nemohu vytvořit soubor '%-.200s' (chybový kód: %M)"
         dan "Kan ikke oprette filen '%-.200s' (Fejlkode: %M)"
         nla "Kan file '%-.200s' niet aanmaken (Errcode: %M)"
         eng "Can't create file '%-.200s' (errno: %M)"
@@ -61,7 +61,7 @@ ER_CANT_CREATE_FILE
         greek "Αδύνατη η δημιουργία του αρχείου '%-.200s' (κωδικός λάθους: %M)"
         hun "A '%-.200s' file nem hozhato letre (hibakod: %M)"
         ita "Impossibile creare il file '%-.200s' (errno: %M)"
-        jpn "'%-.200s' ファイルが作れません (errno: %M)"
+        jpn "ファイル '%-.200s' を作成できません。(エラー番号: %M)"
         kor "화일 '%-.200s'를 만들지 못했습니다. (에러번호: %M)"
         nor "Kan ikke opprette fila '%-.200s' (Feilkode: %M)"
         norwegian-ny "Kan ikkje opprette fila '%-.200s' (Feilkode: %M)"
@@ -75,18 +75,17 @@ ER_CANT_CREATE_FILE
         swe "Kan inte skapa filen '%-.200s' (Felkod: %M)"
         ukr "Не можу створити файл '%-.200s' (помилка: %M)"
 ER_CANT_CREATE_TABLE  
-        cze "Nemohu vytvo-Břit tabulku '%-.200s' (chybový kód: %M)"
+        cze "Nemohu vytvořit tabulku '%-.200s' (chybový kód: %M)"
         dan "Kan ikke oprette tabellen '%-.200s' (Fejlkode: %M)"
         nla "Kan tabel '%-.200s' niet aanmaken (Errcode: %M)"
         eng "Can't create table '%-.200s' (errno: %M)"
-        jps "'%-.200s' テーブルが作れません.(errno: %M)",
         est "Ei suuda luua tabelit '%-.200s' (veakood: %M)"
         fre "Ne peut créer la table '%-.200s' (Errcode: %M)"
         ger "Kann Tabelle '%-.200s' nicht erzeugen (Fehler: %M)"
         greek "Αδύνατη η δημιουργία του πίνακα '%-.200s' (κωδικός λάθους: %M)"
         hun "A '%-.200s' tabla nem hozhato letre (hibakod: %M)"
         ita "Impossibile creare la tabella '%-.200s' (errno: %M)"
-        jpn "'%-.200s' テーブルが作れません.(errno: %M)"
+        jpn "表 '%-.200s' を作成できません。(エラー番号: %M)"
         kor "테이블 '%-.200s'를 만들지 못했습니다. (에러번호: %M)"
         nor "Kan ikke opprette tabellen '%-.200s' (Feilkode: %M)"
         norwegian-ny "Kan ikkje opprette tabellen '%-.200s' (Feilkode: %M)"
@@ -100,18 +99,17 @@ ER_CANT_CREATE_TABLE
         swe "Kan inte skapa tabellen '%-.200s' (Felkod: %M)"
         ukr "Не можу створити таблицю '%-.200s' (помилка: %M)"
 ER_CANT_CREATE_DB  
-        cze "Nemohu vytvo-Břit databázi '%-.192s' (chybový kód: %M)"
+        cze "Nemohu vytvořit databázi '%-.192s' (chybový kód: %M)"
         dan "Kan ikke oprette databasen '%-.192s' (Fejlkode: %M)"
         nla "Kan database '%-.192s' niet aanmaken (Errcode: %M)"
         eng "Can't create database '%-.192s' (errno: %M)"
-        jps "'%-.192s' データベースが作れません (errno: %M)",
         est "Ei suuda luua andmebaasi '%-.192s' (veakood: %M)"
         fre "Ne peut créer la base '%-.192s' (Erreur %M)"
         ger "Kann Datenbank '%-.192s' nicht erzeugen (Fehler: %M)"
         greek "Αδύνατη η δημιουργία της βάσης δεδομένων '%-.192s' (κωδικός λάθους: %M)"
         hun "Az '%-.192s' adatbazis nem hozhato letre (hibakod: %M)"
         ita "Impossibile creare il database '%-.192s' (errno: %M)"
-        jpn "'%-.192s' データベースが作れません (errno: %M)"
+        jpn "データベース '%-.192s' を作成できません。(エラー番号: %M)"
         kor "데이타베이스 '%-.192s'를 만들지 못했습니다.. (에러번호: %M)"
         nor "Kan ikke opprette databasen '%-.192s' (Feilkode: %M)"
         norwegian-ny "Kan ikkje opprette databasen '%-.192s' (Feilkode: %M)"
@@ -125,18 +123,17 @@ ER_CANT_CREATE_DB
         swe "Kan inte skapa databasen '%-.192s' (Felkod: %M)"
         ukr "Не можу створити базу данних '%-.192s' (помилка: %M)"
 ER_DB_CREATE_EXISTS  
-        cze "Nemohu vytvo-Břit databázi '%-.192s'; databáze již existuje"
+        cze "Nemohu vytvořit databázi '%-.192s'; databáze již existuje"
         dan "Kan ikke oprette databasen '%-.192s'; databasen eksisterer"
         nla "Kan database '%-.192s' niet aanmaken; database bestaat reeds"
         eng "Can't create database '%-.192s'; database exists"
-        jps "'%-.192s' データベースが作れません.既にそのデータベースが存在します",
         est "Ei suuda luua andmebaasi '%-.192s': andmebaas juba eksisteerib"
         fre "Ne peut créer la base '%-.192s'; elle existe déjà"
         ger "Kann Datenbank '%-.192s' nicht erzeugen. Datenbank existiert bereits"
         greek "Αδύνατη η δημιουργία της βάσης δεδομένων '%-.192s'; Η βάση δεδομένων υπάρχει ήδη"
         hun "Az '%-.192s' adatbazis nem hozhato letre Az adatbazis mar letezik"
         ita "Impossibile creare il database '%-.192s'; il database esiste"
-        jpn "'%-.192s' データベースが作れません.既にそのデータベースが存在します"
+        jpn "データベース '%-.192s' を作成できません。データベースはすでに存在します。"
         kor "데이타베이스 '%-.192s'를 만들지 못했습니다.. 데이타베이스가 존재함"
         nor "Kan ikke opprette databasen '%-.192s'; databasen eksisterer"
         norwegian-ny "Kan ikkje opprette databasen '%-.192s'; databasen eksisterer"
@@ -150,18 +147,17 @@ ER_DB_CREATE_EXISTS
         swe "Databasen '%-.192s' existerar redan"
         ukr "Не можу створити базу данних '%-.192s'. База данних існує"
 ER_DB_DROP_EXISTS  
-        cze "Nemohu zru-Bšit databázi '%-.192s', databáze neexistuje"
+        cze "Nemohu zrušit databázi '%-.192s', databáze neexistuje"
         dan "Kan ikke slette (droppe) '%-.192s'; databasen eksisterer ikke"
         nla "Kan database '%-.192s' niet verwijderen; database bestaat niet"
         eng "Can't drop database '%-.192s'; database doesn't exist"
-        jps "'%-.192s' データベースを破棄できません. そのデータベースがないのです.",
         est "Ei suuda kustutada andmebaasi '%-.192s': andmebaasi ei eksisteeri"
         fre "Ne peut effacer la base '%-.192s'; elle n'existe pas"
         ger "Kann Datenbank '%-.192s' nicht löschen; Datenbank nicht vorhanden"
         greek "Αδύνατη η διαγραφή της βάσης δεδομένων '%-.192s'. Η βάση δεδομένων δεν υπάρχει"
         hun "A(z) '%-.192s' adatbazis nem szuntetheto meg. Az adatbazis nem letezik"
         ita "Impossibile cancellare '%-.192s'; il database non esiste"
-        jpn "'%-.192s' データベースを破棄できません. そのデータベースがないのです."
+        jpn "データベース '%-.192s' を削除できません。データベースは存在しません。"
         kor "데이타베이스 '%-.192s'를 제거하지 못했습니다. 데이타베이스가 존재하지 않음 "
         nor "Kan ikke fjerne (drop) '%-.192s'; databasen eksisterer ikke"
         norwegian-ny "Kan ikkje fjerne (drop) '%-.192s'; databasen eksisterer ikkje"
@@ -175,18 +171,17 @@ ER_DB_DROP_EXISTS
         swe "Kan inte radera databasen '%-.192s'; databasen finns inte"
         ukr "Не можу видалити базу данних '%-.192s'. База данних не існує"
 ER_DB_DROP_DELETE  
-        cze "Chyba p-Bři rušení databáze (nemohu vymazat '%-.192s', chyba %M)"
+        cze "Chyba při rušení databáze (nemohu vymazat '%-.192s', chyba %M)"
         dan "Fejl ved sletning (drop) af databasen (kan ikke slette '%-.192s', Fejlkode %M)"
         nla "Fout bij verwijderen database (kan '%-.192s' niet verwijderen, Errcode: %M)"
         eng "Error dropping database (can't delete '%-.192s', errno: %M)"
-        jps "データベース破棄エラー ('%-.192s' を削除できません, errno: %M)",
         est "Viga andmebaasi kustutamisel (ei suuda kustutada faili '%-.192s', veakood: %M)"
         fre "Ne peut effacer la base '%-.192s' (erreur %M)"
         ger "Fehler beim Löschen der Datenbank ('%-.192s' kann nicht gelöscht werden, Fehler: %M)"
         greek "Παρουσιάστηκε πρόβλημα κατά τη διαγραφή της βάσης δεδομένων (αδύνατη η διαγραφή '%-.192s', κωδικός λάθους: %M)"
         hun "Adatbazis megszuntetesi hiba ('%-.192s' nem torolheto, hibakod: %M)"
         ita "Errore durante la cancellazione del database (impossibile cancellare '%-.192s', errno: %M)"
-        jpn "データベース破棄エラー ('%-.192s' を削除できません, errno: %M)"
+        jpn "データベース削除エラー ('%-.192s' を削除できません。エラー番号: %M)"
         kor "데이타베이스 제거 에러('%-.192s'를 삭제할 수 없읍니다, 에러번호: %M)"
         nor "Feil ved fjerning (drop) av databasen (kan ikke slette '%-.192s', feil %M)"
         norwegian-ny "Feil ved fjerning (drop) av databasen (kan ikkje slette '%-.192s', feil %M)"
@@ -200,18 +195,17 @@ ER_DB_DROP_DELETE
         swe "Fel vid radering av databasen (Kan inte radera '%-.192s'. Felkod: %M)"
         ukr "Не можу видалити базу данних (Не можу видалити '%-.192s', помилка: %M)"
 ER_DB_DROP_RMDIR  
-        cze "Chyba p-Bři rušení databáze (nemohu vymazat adresář '%-.192s', chyba %M)"
+        cze "Chyba při rušení databáze (nemohu vymazat adresář '%-.192s', chyba %M)"
         dan "Fejl ved sletting af database (kan ikke slette folderen '%-.192s', Fejlkode %M)"
         nla "Fout bij verwijderen database (kan rmdir '%-.192s' niet uitvoeren, Errcode: %M)"
         eng "Error dropping database (can't rmdir '%-.192s', errno: %M)"
-        jps "データベース破棄エラー ('%-.192s' を rmdir できません, errno: %M)",
         est "Viga andmebaasi kustutamisel (ei suuda kustutada kataloogi '%-.192s', veakood: %M)"
         fre "Erreur en effaçant la base (rmdir '%-.192s', erreur %M)"
         ger "Fehler beim Löschen der Datenbank (Verzeichnis '%-.192s' kann nicht gelöscht werden, Fehler: %M)"
         greek "Παρουσιάστηκε πρόβλημα κατά τη διαγραφή της βάσης δεδομένων (αδύνατη η διαγραφή του φακέλλου '%-.192s', κωδικός λάθους: %M)"
         hun "Adatbazis megszuntetesi hiba ('%-.192s' nem szuntetheto meg, hibakod: %M)"
         ita "Errore durante la cancellazione del database (impossibile rmdir '%-.192s', errno: %M)"
-        jpn "データベース破棄エラー ('%-.192s' を rmdir できません, errno: %M)"
+       jpn "データベース削除エラー (ディレクトリ '%-.192s' を削除できません。エラー番号: %M)"
         kor "데이타베이스 제거 에러(rmdir '%-.192s'를 할 수 없읍니다, 에러번호: %M)"
         nor "Feil ved sletting av database (kan ikke slette katalogen '%-.192s', feil %M)"
         norwegian-ny "Feil ved sletting av database (kan ikkje slette katalogen '%-.192s', feil %M)"
@@ -225,18 +219,17 @@ ER_DB_DROP_RMDIR
         swe "Fel vid radering av databasen (Kan inte radera biblioteket '%-.192s'. Felkod: %M)"
         ukr "Не можу видалити базу данних (Не можу видалити теку '%-.192s', помилка: %M)"
 ER_CANT_DELETE_FILE  
-        cze "Chyba p-Bři výmazu '%-.192s' (chybový kód: %M)"
+        cze "Chyba při výmazu '%-.192s' (chybový kód: %M)"
         dan "Fejl ved sletning af '%-.192s' (Fejlkode: %M)"
         nla "Fout bij het verwijderen van '%-.192s' (Errcode: %M)"
         eng "Error on delete of '%-.192s' (errno: %M)"
-        jps "'%-.192s' の削除がエラー (errno: %M)",
         est "Viga '%-.192s' kustutamisel (veakood: %M)"
         fre "Erreur en effaçant '%-.192s' (Errcode: %M)"
         ger "Fehler beim Löschen von '%-.192s' (Fehler: %M)"
         greek "Παρουσιάστηκε πρόβλημα κατά τη διαγραφή '%-.192s' (κωδικός λάθους: %M)"
         hun "Torlesi hiba: '%-.192s' (hibakod: %M)"
         ita "Errore durante la cancellazione di '%-.192s' (errno: %M)"
-        jpn "'%-.192s' の削除がエラー (errno: %M)"
+	jpn "ファイル '%-.192s' の削除エラー (エラー番号: %M)"
         kor "'%-.192s' 삭제 중 에러 (에러번호: %M)"
         nor "Feil ved sletting av '%-.192s' (Feilkode: %M)"
         norwegian-ny "Feil ved sletting av '%-.192s' (Feilkode: %M)"
@@ -250,18 +243,17 @@ ER_CANT_DELETE_FILE
         swe "Kan inte radera filen '%-.192s' (Felkod: %M)"
         ukr "Не можу видалити '%-.192s' (помилка: %M)"
 ER_CANT_FIND_SYSTEM_REC  
-        cze "Nemohu -Bčíst záznam v systémové tabulce"
+        cze "Nemohu číst záznam v systémové tabulce"
         dan "Kan ikke læse posten i systemfolderen"
         nla "Kan record niet lezen in de systeem tabel"
         eng "Can't read record in system table"
-        jps "system table のレコードを読む事ができませんでした",
         est "Ei suuda lugeda kirjet süsteemsest tabelist"
         fre "Ne peut lire un enregistrement de la table 'system'"
         ger "Datensatz in der Systemtabelle nicht lesbar"
         greek "Αδύνατη η ανάγνωση εγγραφής από πίνακα του συστήματος"
         hun "Nem olvashato rekord a rendszertablaban"
         ita "Impossibile leggere il record dalla tabella di sistema"
-        jpn "system table のレコードを読む事ができませんでした"
+        jpn "システム表のレコードを読み込めません。"
         kor "system 테이블에서 레코드를 읽을 수 없습니다."
         nor "Kan ikke lese posten i systemkatalogen"
         norwegian-ny "Kan ikkje lese posten i systemkatalogen"
@@ -275,18 +267,17 @@ ER_CANT_FIND_SYSTEM_REC
         swe "Hittar inte posten i systemregistret"
         ukr "Не можу зчитати запис з системної таблиці"
 ER_CANT_GET_STAT  
-        cze "Nemohu z-Bískat stav '%-.200s' (chybový kód: %M)"
+        cze "Nemohu získat stav '%-.200s' (chybový kód: %M)"
         dan "Kan ikke læse status af '%-.200s' (Fejlkode: %M)"
         nla "Kan de status niet krijgen van '%-.200s' (Errcode: %M)"
         eng "Can't get status of '%-.200s' (errno: %M)"
-        jps "'%-.200s' のステイタスが得られません. (errno: %M)",
         est "Ei suuda lugeda '%-.200s' olekut (veakood: %M)"
         fre "Ne peut obtenir le status de '%-.200s' (Errcode: %M)"
         ger "Kann Status von '%-.200s' nicht ermitteln (Fehler: %M)"
         greek "Αδύνατη η λήψη πληροφοριών για την κατάσταση του '%-.200s' (κωδικός λάθους: %M)"
         hun "A(z) '%-.200s' statusza nem allapithato meg (hibakod: %M)"
         ita "Impossibile leggere lo stato di '%-.200s' (errno: %M)"
-        jpn "'%-.200s' のステイタスが得られません. (errno: %M)"
+        jpn "'%-.200s' の状態を取得できません。(エラー番号: %M)"
         kor "'%-.200s'의 상태를 얻지 못했습니다. (에러번호: %M)"
         nor "Kan ikke lese statusen til '%-.200s' (Feilkode: %M)"
         norwegian-ny "Kan ikkje lese statusen til '%-.200s' (Feilkode: %M)"
@@ -300,18 +291,17 @@ ER_CANT_GET_STAT
         swe "Kan inte läsa filinformationen (stat) från '%-.200s' (Felkod: %M)"
         ukr "Не можу отримати статус '%-.200s' (помилка: %M)"
 ER_CANT_GET_WD  
-        cze "Chyba p-Bři zjišťování pracovní adresář (chybový kód: %M)"
+        cze "Chyba při zjišťování pracovní adresář (chybový kód: %M)"
         dan "Kan ikke læse aktive folder (Fejlkode: %M)"
         nla "Kan de werkdirectory niet krijgen (Errcode: %M)"
         eng "Can't get working directory (errno: %M)"
-        jps "working directory を得る事ができませんでした (errno: %M)",
         est "Ei suuda identifitseerida jooksvat kataloogi (veakood: %M)"
         fre "Ne peut obtenir le répertoire de travail (Errcode: %M)"
         ger "Kann Arbeitsverzeichnis nicht ermitteln (Fehler: %M)"
         greek "Ο φάκελλος εργασίας δεν βρέθηκε (κωδικός λάθους: %M)"
         hun "A munkakonyvtar nem allapithato meg (hibakod: %M)"
         ita "Impossibile leggere la directory di lavoro (errno: %M)"
-        jpn "working directory を得る事ができませんでした (errno: %M)"
+        jpn "作業ディレクトリを取得できません。(エラー番号: %M)"
         kor "수행 디렉토리를 찾지 못했습니다. (에러번호: %M)"
         nor "Kan ikke lese aktiv katalog(Feilkode: %M)"
         norwegian-ny "Kan ikkje lese aktiv katalog(Feilkode: %M)"
@@ -325,18 +315,17 @@ ER_CANT_GET_WD
         swe "Kan inte inte läsa aktivt bibliotek. (Felkod: %M)"
         ukr "Не можу визначити робочу теку (помилка: %M)"
 ER_CANT_LOCK  
-        cze "Nemohu uzamknout soubor (chybov-Bý kód: %M)"
+        cze "Nemohu uzamknout soubor (chybový kód: %M)"
         dan "Kan ikke låse fil (Fejlkode: %M)"
         nla "Kan de file niet blokeren (Errcode: %M)"
         eng "Can't lock file (errno: %M)"
-        jps "ファイルをロックできません (errno: %M)",
         est "Ei suuda lukustada faili (veakood: %M)"
         fre "Ne peut verrouiller le fichier (Errcode: %M)"
         ger "Datei kann nicht gesperrt werden (Fehler: %M)"
         greek "Το αρχείο δεν μπορεί να κλειδωθεί (κωδικός λάθους: %M)"
         hun "A file nem zarolhato. (hibakod: %M)"
         ita "Impossibile il locking il file (errno: %M)"
-        jpn "ファイルをロックできません (errno: %M)"
+        jpn "ファイルをロックできません。(エラー番号: %M)"
         kor "화일을 잠그지(lock) 못했습니다. (에러번호: %M)"
         nor "Kan ikke låse fila (Feilkode: %M)"
         norwegian-ny "Kan ikkje låse fila (Feilkode: %M)"
@@ -350,18 +339,17 @@ ER_CANT_LOCK
         swe "Kan inte låsa filen. (Felkod: %M)"
         ukr "Не можу заблокувати файл (помилка: %M)"
 ER_CANT_OPEN_FILE  
-        cze "Nemohu otev-Břít soubor '%-.200s' (chybový kód: %M)"
+	cze "Nemohu otevřít soubor '%-.200s' (chybový kód: %M)"
         dan "Kan ikke åbne fil: '%-.200s' (Fejlkode: %M)"
         nla "Kan de file '%-.200s' niet openen (Errcode: %M)"
         eng "Can't open file: '%-.200s' (errno: %M)"
-        jps "'%-.200s' ファイルを開く事ができません (errno: %M)",
         est "Ei suuda avada faili '%-.200s' (veakood: %M)"
         fre "Ne peut ouvrir le fichier: '%-.200s' (Errcode: %M)"
         ger "Kann Datei '%-.200s' nicht öffnen (Fehler: %M)"
         greek "Δεν είναι δυνατό να ανοιχτεί το αρχείο: '%-.200s' (κωδικός λάθους: %M)"
         hun "A '%-.200s' file nem nyithato meg (hibakod: %M)"
         ita "Impossibile aprire il file: '%-.200s' (errno: %M)"
-        jpn "'%-.200s' ファイルを開く事ができません (errno: %M)"
+        jpn "ファイル '%-.200s' をオープンできません。(エラー番号: %M)"
         kor "화일을 열지 못했습니다.: '%-.200s' (에러번호: %M)"
         nor "Kan ikke åpne fila: '%-.200s' (Feilkode: %M)"
         norwegian-ny "Kan ikkje åpne fila: '%-.200s' (Feilkode: %M)"
@@ -375,18 +363,17 @@ ER_CANT_OPEN_FILE
         swe "Kan inte använda '%-.200s' (Felkod: %M)"
         ukr "Не можу відкрити файл: '%-.200s' (помилка: %M)"
 ER_FILE_NOT_FOUND  
-        cze "Nemohu naj-Bít soubor '%-.200s' (chybový kód: %M)"
+        cze "Nemohu najít soubor '%-.200s' (chybový kód: %M)"
         dan "Kan ikke finde fila: '%-.200s' (Fejlkode: %M)"
         nla "Kan de file: '%-.200s' niet vinden (Errcode: %M)"
         eng "Can't find file: '%-.200s' (errno: %M)"
-        jps "'%-.200s' ファイルを見付ける事ができません.(errno: %M)",
         est "Ei suuda leida faili '%-.200s' (veakood: %M)"
         fre "Ne peut trouver le fichier: '%-.200s' (Errcode: %M)"
         ger "Kann Datei '%-.200s' nicht finden (Fehler: %M)"
         greek "Δεν βρέθηκε το αρχείο: '%-.200s' (κωδικός λάθους: %M)"
         hun "A(z) '%-.200s' file nem talalhato (hibakod: %M)"
         ita "Impossibile trovare il file: '%-.200s' (errno: %M)"
-        jpn "'%-.200s' ファイルを見付ける事ができません.(errno: %M)"
+        jpn "ファイル '%-.200s' が見つかりません。(エラー番号: %M)"
         kor "화일을 찾지 못했습니다.: '%-.200s' (에러번호: %M)"
         nor "Kan ikke finne fila: '%-.200s' (Feilkode: %M)"
         norwegian-ny "Kan ikkje finne fila: '%-.200s' (Feilkode: %M)"
@@ -400,18 +387,17 @@ ER_FILE_NOT_FOUND
         swe "Hittar inte filen '%-.200s' (Felkod: %M)"
         ukr "Не можу знайти файл: '%-.200s' (помилка: %M)"
 ER_CANT_READ_DIR  
-        cze "Nemohu -Bčíst adresář '%-.192s' (chybový kód: %M)"
+        cze "Nemohu číst adresář '%-.192s' (chybový kód: %M)"
         dan "Kan ikke læse folder '%-.192s' (Fejlkode: %M)"
         nla "Kan de directory niet lezen van '%-.192s' (Errcode: %M)"
         eng "Can't read dir of '%-.192s' (errno: %M)"
-        jps "'%-.192s' ディレクトリが読めません.(errno: %M)",
         est "Ei suuda lugeda kataloogi '%-.192s' (veakood: %M)"
         fre "Ne peut lire le répertoire de '%-.192s' (Errcode: %M)"
         ger "Verzeichnis von '%-.192s' nicht lesbar (Fehler: %M)"
         greek "Δεν είναι δυνατό να διαβαστεί ο φάκελλος του '%-.192s' (κωδικός λάθους: %M)"
         hun "A(z) '%-.192s' konyvtar nem olvashato. (hibakod: %M)"
         ita "Impossibile leggere la directory di '%-.192s' (errno: %M)"
-        jpn "'%-.192s' ディレクトリが読めません.(errno: %M)"
+        jpn "ディレクトリ '%-.192s' を読み込めません。(エラー番号: %M)"
         kor "'%-.192s'디렉토리를 읽지 못했습니다. (에러번호: %M)"
         nor "Kan ikke lese katalogen '%-.192s' (Feilkode: %M)"
         norwegian-ny "Kan ikkje lese katalogen '%-.192s' (Feilkode: %M)"
@@ -425,18 +411,17 @@ ER_CANT_READ_DIR
         swe "Kan inte läsa från bibliotek '%-.192s' (Felkod: %M)"
         ukr "Не можу прочитати теку '%-.192s' (помилка: %M)"
 ER_CANT_SET_WD  
-        cze "Nemohu zm-Běnit adresář na '%-.192s' (chybový kód: %M)"
+	cze "Nemohu změnit adresář na '%-.192s' (chybový kód: %M)"
         dan "Kan ikke skifte folder til '%-.192s' (Fejlkode: %M)"
         nla "Kan de directory niet veranderen naar '%-.192s' (Errcode: %M)"
         eng "Can't change dir to '%-.192s' (errno: %M)"
-        jps "'%-.192s' ディレクトリに chdir できません.(errno: %M)",
         est "Ei suuda siseneda kataloogi '%-.192s' (veakood: %M)"
         fre "Ne peut changer le répertoire pour '%-.192s' (Errcode: %M)"
         ger "Kann nicht in das Verzeichnis '%-.192s' wechseln (Fehler: %M)"
         greek "Αδύνατη η αλλαγή του τρέχοντος καταλόγου σε '%-.192s' (κωδικός λάθους: %M)"
         hun "Konyvtarvaltas nem lehetseges a(z) '%-.192s'-ba. (hibakod: %M)"
         ita "Impossibile cambiare la directory in '%-.192s' (errno: %M)"
-        jpn "'%-.192s' ディレクトリに chdir できません.(errno: %M)"
+        jpn "ディレクトリ '%-.192s' に移動できません。(エラー番号: %M)"
         kor "'%-.192s'디렉토리로 이동할 수 없었습니다. (에러번호: %M)"
         nor "Kan ikke skifte katalog til '%-.192s' (Feilkode: %M)"
         norwegian-ny "Kan ikkje skifte katalog til '%-.192s' (Feilkode: %M)"
@@ -450,7 +435,7 @@ ER_CANT_SET_WD
         swe "Kan inte byta till '%-.192s' (Felkod: %M)"
         ukr "Не можу перейти у теку '%-.192s' (помилка: %M)"
 ER_CHECKREAD  
-        cze "Z-Báznam byl změněn od posledního čtení v tabulce '%-.192s'"
+        cze "Záznam byl změněn od posledního čtení v tabulce '%-.192s'"
         dan "Posten er ændret siden sidste læsning '%-.192s'"
         nla "Record is veranderd sinds de laatste lees activiteit in de tabel '%-.192s'"
         eng "Record has changed since last read in table '%-.192s'"
@@ -460,6 +445,7 @@ ER_CHECKREAD
         greek "Η εγγραφή έχει αλλάξει από την τελευταία φορά που ανασύρθηκε από τον πίνακα '%-.192s'"
         hun "A(z) '%-.192s' tablaban talalhato rekord megvaltozott az utolso olvasas ota"
         ita "Il record e` cambiato dall'ultima lettura della tabella '%-.192s'"
+        jpn "表 '%-.192s' の最後の読み込み時点から、レコードが変化しました。"
         kor "테이블 '%-.192s'에서 마지막으로 읽은 후 Record가 변경되었습니다."
         nor "Posten har blitt endret siden den ble lest '%-.192s'"
         norwegian-ny "Posten har vorte endra sidan den sist vart lesen '%-.192s'"
@@ -472,44 +458,42 @@ ER_CHECKREAD
         spa "El registro ha cambiado desde la ultima lectura de la tabla '%-.192s'"
         swe "Posten har förändrats sedan den lästes i register '%-.192s'"
         ukr "Запис було змінено з часу останнього читання з таблиці '%-.192s'"
-ER_DISK_FULL  
-        cze "Disk je pln-Bý (%s), čekám na uvolnění nějakého místa ..."
-        dan "Ikke mere diskplads (%s). Venter på at få frigjort plads..."
-        nla "Schijf vol (%s). Aan het wachten totdat er ruimte vrij wordt gemaakt..."
-        eng "Disk full (%s); waiting for someone to free some space..."
-        jps "Disk full (%s). 誰かが何かを減らすまでまってください...",
-        est "Ketas täis (%s). Ootame kuni tekib vaba ruumi..."
-        fre "Disque plein (%s). J'attend que quelqu'un libère de l'espace..."
-        ger "Festplatte voll (%s). Warte, bis jemand Platz schafft ..."
-        greek "Δεν υπάρχει χώρος στο δίσκο (%s). Παρακαλώ, περιμένετε να ελευθερωθεί χώρος..."
-        hun "A lemez megtelt (%s)."
-        ita "Disco pieno (%s). In attesa che qualcuno liberi un po' di spazio..."
-        jpn "Disk full (%s). 誰かが何かを減らすまでまってください..."
-        kor "Disk full (%s). 다른 사람이 지울때까지 기다립니다..."
-        nor "Ikke mer diskplass (%s). Venter på å få frigjort plass..."
-        norwegian-ny "Ikkje meir diskplass (%s). Ventar på å få frigjort plass..."
-        pol "Dysk pełny (%s). Oczekiwanie na zwolnienie miejsca..."
-        por "Disco cheio (%s). Aguardando alguém liberar algum espaço..."
-        rum "Hard-disk-ul este plin (%s). Astept sa se elibereze ceva spatiu..."
-        rus "Диск заполнен. (%s). Ожидаем, пока кто-то не уберет после себя мусор..."
-        serbian "Disk je pun (%s). Čekam nekoga da dođe i oslobodi nešto mesta..."
-        slo "Disk je plný (%s), čakám na uvoľnenie miesta..."
-        spa "Disco lleno (%s). Esperando para que se libere algo de espacio..."
-        swe "Disken är full (%s). Väntar tills det finns ledigt utrymme..."
-        ukr "Диск заповнений (%s). Вичикую, доки звільниться трохи місця..."
+ER_DISK_FULL
+        cze "Disk je plný (%s), čekám na uvolnění nějakého místa ... (chybový kód: %M)"
+        dan "Ikke mere diskplads (%s). Venter på at få frigjort plads... (Fejlkode: %M)"
+        nla "Schijf vol (%s). Aan het wachten totdat er ruimte vrij wordt gemaakt... (Errcode: %M)"
+        eng "Disk full (%s); waiting for someone to free some space... (errno: %M)"
+        est "Ketas täis (%s). Ootame kuni tekib vaba ruumi... (veakood: %M)"
+        fre "Disque plein (%s). J'attend que quelqu'un libère de l'espace... (Errcode: %M)"
+        ger "Festplatte voll (%s). Warte, bis jemand Platz schafft ... (Fehler: %M)"
+        greek "Δεν υπάρχει χώρος στο δίσκο (%s). Παρακαλώ, περιμένετε να ελευθερωθεί χώρος... (κωδικός λάθους: %M)"
+        hun "A lemez megtelt (%s). (hibakod: %M)"
+        ita "Disco pieno (%s). In attesa che qualcuno liberi un po' di spazio... (errno: %M)"
+        jpn "ディスク領域不足です(%s)。(エラー番号: %M)"
+        kor "Disk full (%s). 다른 사람이 지울때까지 기다립니다... (에러번호: %M)"
+        nor "Ikke mer diskplass (%s). Venter på å få frigjort plass... (Feilkode: %M)"
+        norwegian-ny "Ikkje meir diskplass (%s). Ventar på å få frigjort plass... (Feilkode: %M)"
+        pol "Dysk pełny (%s). Oczekiwanie na zwolnienie miejsca... (Kod błędu: %M)"
+        por "Disco cheio (%s). Aguardando alguém liberar algum espaço... (erro no. %M)"
+        rum "Hard-disk-ul este plin (%s). Astept sa se elibereze ceva spatiu... (Eroare: %M)"
+        rus "Диск заполнен. (%s). Ожидаем, пока кто-то не уберет после себя мусор... (ошибка: %M)"
+        serbian "Disk je pun (%s). Čekam nekoga da dođe i oslobodi nešto mesta... (errno: %M)"
+        slo "Disk je plný (%s), čakám na uvoľnenie miesta... (chybový kód: %M)"
+        spa "Disco lleno (%s). Esperando para que se libere algo de espacio... (Error: %M)"
+        swe "Disken är full (%s). Väntar tills det finns ledigt utrymme... (Felkod: %M)"
+        ukr "Диск заповнений (%s). Вичикую, доки звільниться трохи місця... (помилка: %M)"
 ER_DUP_KEY 23000 
-        cze "Nemohu zapsat, zdvojen-Bý klíč v tabulce '%-.192s'"
+        cze "Nemohu zapsat, zdvojený klíč v tabulce '%-.192s'"
         dan "Kan ikke skrive, flere ens nøgler i tabellen '%-.192s'"
         nla "Kan niet schrijven, dubbele zoeksleutel in tabel '%-.192s'"
         eng "Can't write; duplicate key in table '%-.192s'"
-        jps "table '%-.192s' に key が重複していて書きこめません",
         est "Ei saa kirjutada, korduv võti tabelis '%-.192s'"
         fre "Ecriture impossible, doublon dans une clé de la table '%-.192s'"
         ger "Kann nicht speichern, Grund: doppelter Schlüssel in Tabelle '%-.192s'"
         greek "Δεν είναι δυνατή η καταχώρηση, η τιμή υπάρχει ήδη στον πίνακα '%-.192s'"
         hun "Irasi hiba, duplikalt kulcs a '%-.192s' tablaban."
         ita "Scrittura impossibile: chiave duplicata nella tabella '%-.192s'"
-        jpn "table '%-.192s' に key が重複していて書きこめません"
+        jpn "書き込めません。表 '%-.192s' に重複するキーがあります。"
         kor "기록할 수 없읍니다., 테이블 '%-.192s'에서 중복 키"
         nor "Kan ikke skrive, flere like nøkler i tabellen '%-.192s'"
         norwegian-ny "Kan ikkje skrive, flere like nyklar i tabellen '%-.192s'"
@@ -523,7 +507,7 @@ ER_DUP_KEY 23000
         swe "Kan inte skriva, dubbel söknyckel i register '%-.192s'"
         ukr "Не можу записати, дублюючийся ключ в таблиці '%-.192s'"
 ER_ERROR_ON_CLOSE  
-        cze "Chyba p-Bři zavírání '%-.192s' (chybový kód: %M)"
+	cze "Chyba při zavírání '%-.192s' (chybový kód: %M)"
         dan "Fejl ved lukning af '%-.192s' (Fejlkode: %M)"
         nla "Fout bij het sluiten van '%-.192s' (Errcode: %M)"
         eng "Error on close of '%-.192s' (errno: %M)"
@@ -533,6 +517,7 @@ ER_ERROR_ON_CLOSE
         greek "Παρουσιάστηκε πρόβλημα κλείνοντας το '%-.192s' (κωδικός λάθους: %M)"
         hun "Hiba a(z) '%-.192s' zarasakor. (hibakod: %M)"
         ita "Errore durante la chiusura di '%-.192s' (errno: %M)"
+        jpn "'%-.192s' のクローズ時エラー (エラー番号: %M)"
         kor "'%-.192s'닫는 중 에러 (에러번호: %M)"
         nor "Feil ved lukking av '%-.192s' (Feilkode: %M)"
         norwegian-ny "Feil ved lukking av '%-.192s' (Feilkode: %M)"
@@ -546,18 +531,17 @@ ER_ERROR_ON_CLOSE
         swe "Fick fel vid stängning av '%-.192s' (Felkod: %M)"
         ukr "Не можу закрити '%-.192s' (помилка: %M)"
 ER_ERROR_ON_READ  
-        cze "Chyba p-Bři čtení souboru '%-.200s' (chybový kód: %M)"
+        cze "Chyba při čtení souboru '%-.200s' (chybový kód: %M)"
         dan "Fejl ved læsning af '%-.200s' (Fejlkode: %M)"
         nla "Fout bij het lezen van file '%-.200s' (Errcode: %M)"
         eng "Error reading file '%-.200s' (errno: %M)"
-        jps "'%-.200s' ファイルの読み込みエラー (errno: %M)",
         est "Viga faili '%-.200s' lugemisel (veakood: %M)"
         fre "Erreur en lecture du fichier '%-.200s' (Errcode: %M)"
         ger "Fehler beim Lesen der Datei '%-.200s' (Fehler: %M)"
         greek "Πρόβλημα κατά την ανάγνωση του αρχείου '%-.200s' (κωδικός λάθους: %M)"
         hun "Hiba a '%-.200s'file olvasasakor. (hibakod: %M)"
         ita "Errore durante la lettura del file '%-.200s' (errno: %M)"
-        jpn "'%-.200s' ファイルの読み込みエラー (errno: %M)"
+        jpn "ファイル '%-.200s' の読み込みエラー (エラー番号: %M)"
         kor "'%-.200s'화일 읽기 에러 (에러번호: %M)"
         nor "Feil ved lesing av '%-.200s' (Feilkode: %M)"
         norwegian-ny "Feil ved lesing av '%-.200s' (Feilkode: %M)"
@@ -571,18 +555,17 @@ ER_ERROR_ON_READ
         swe "Fick fel vid läsning av '%-.200s' (Felkod %M)"
         ukr "Не можу прочитати файл '%-.200s' (помилка: %M)"
 ER_ERROR_ON_RENAME  
-        cze "Chyba p-Bři přejmenování '%-.210s' na '%-.210s' (chybový kód: %M)"
+        cze "Chyba při přejmenování '%-.210s' na '%-.210s' (chybový kód: %M)"
         dan "Fejl ved omdøbning af '%-.210s' til '%-.210s' (Fejlkode: %M)"
         nla "Fout bij het hernoemen van '%-.210s' naar '%-.210s' (Errcode: %M)"
         eng "Error on rename of '%-.210s' to '%-.210s' (errno: %M)"
-        jps "'%-.210s' を '%-.210s' に rename できません (errno: %M)",
         est "Viga faili '%-.210s' ümbernimetamisel '%-.210s'-ks (veakood: %M)"
         fre "Erreur en renommant '%-.210s' en '%-.210s' (Errcode: %M)"
         ger "Fehler beim Umbenennen von '%-.210s' in '%-.210s' (Fehler: %M)"
         greek "Πρόβλημα κατά την μετονομασία του αρχείου '%-.210s' to '%-.210s' (κωδικός λάθους: %M)"
         hun "Hiba a '%-.210s' file atnevezesekor '%-.210s'. (hibakod: %M)"
         ita "Errore durante la rinominazione da '%-.210s' a '%-.210s' (errno: %M)"
-        jpn "'%-.210s' を '%-.210s' に rename できません (errno: %M)"
+        jpn "'%-.210s' の名前を '%-.210s' に変更できません (エラー番号: %M)"
         kor "'%-.210s'를 '%-.210s'로 이름 변경중 에러 (에러번호: %M)"
         nor "Feil ved omdøping av '%-.210s' til '%-.210s' (Feilkode: %M)"
         norwegian-ny "Feil ved omdøyping av '%-.210s' til '%-.210s' (Feilkode: %M)"
@@ -596,18 +579,17 @@ ER_ERROR_ON_RENAME
         swe "Kan inte byta namn från '%-.210s' till '%-.210s' (Felkod: %M)"
         ukr "Не можу перейменувати '%-.210s' у '%-.210s' (помилка: %M)"
 ER_ERROR_ON_WRITE  
-        cze "Chyba p-Bři zápisu do souboru '%-.200s' (chybový kód: %M)"
+	cze "Chyba při zápisu do souboru '%-.200s' (chybový kód: %M)"
         dan "Fejl ved skriving av filen '%-.200s' (Fejlkode: %M)"
         nla "Fout bij het wegschrijven van file '%-.200s' (Errcode: %M)"
         eng "Error writing file '%-.200s' (errno: %M)"
-        jps "'%-.200s' ファイルを書く事ができません (errno: %M)",
         est "Viga faili '%-.200s' kirjutamisel (veakood: %M)"
         fre "Erreur d'écriture du fichier '%-.200s' (Errcode: %M)"
         ger "Fehler beim Speichern der Datei '%-.200s' (Fehler: %M)"
         greek "Πρόβλημα κατά την αποθήκευση του αρχείου '%-.200s' (κωδικός λάθους: %M)"
         hun "Hiba a '%-.200s' file irasakor. (hibakod: %M)"
         ita "Errore durante la scrittura del file '%-.200s' (errno: %M)"
-        jpn "'%-.200s' ファイルを書く事ができません (errno: %M)"
+        jpn "ファイル '%-.200s' の書き込みエラー (エラー番号: %M)"
         kor "'%-.200s'화일 기록 중 에러 (에러번호: %M)"
         nor "Feil ved skriving av fila '%-.200s' (Feilkode: %M)"
         norwegian-ny "Feil ved skriving av fila '%-.200s' (Feilkode: %M)"
@@ -621,18 +603,17 @@ ER_ERROR_ON_WRITE
         swe "Fick fel vid skrivning till '%-.200s' (Felkod %M)"
         ukr "Не можу записати файл '%-.200s' (помилка: %M)"
 ER_FILE_USED  
-        cze "'%-.192s' je zam-Bčen proti změnám"
+        cze "'%-.192s' je zamčen proti změnám"
         dan "'%-.192s' er låst mod opdateringer"
         nla "'%-.192s' is geblokeerd tegen veranderingen"
         eng "'%-.192s' is locked against change"
-        jps "'%-.192s' はロックされています",
         est "'%-.192s' on lukustatud muudatuste vastu"
         fre "'%-.192s' est verrouillé contre les modifications"
         ger "'%-.192s' ist für Änderungen gesperrt"
         greek "'%-.192s' δεν επιτρέπονται αλλαγές"
         hun "'%-.192s' a valtoztatas ellen zarolva"
         ita "'%-.192s' e` soggetto a lock contro i cambiamenti"
-        jpn "'%-.192s' はロックされています"
+        jpn "'%-.192s' はロックされています。"
         kor "'%-.192s'가 변경할 수 없도록 잠겨있읍니다."
         nor "'%-.192s' er låst mot oppdateringer"
         norwegian-ny "'%-.192s' er låst mot oppdateringar"
@@ -646,18 +627,17 @@ ER_FILE_USED
         swe "'%-.192s' är låst mot användning"
         ukr "'%-.192s' заблокований на внесення змін"
 ER_FILSORT_ABORT  
-        cze "T-Břídění přerušeno"
+        cze "Třídění přerušeno"
         dan "Sortering afbrudt"
         nla "Sorteren afgebroken"
         eng "Sort aborted"
-        jps "Sort 中断",
         est "Sorteerimine katkestatud"
         fre "Tri alphabétique abandonné"
         ger "Sortiervorgang abgebrochen"
         greek "Η διαδικασία ταξινόμισης ακυρώθηκε"
         hun "Sikertelen rendezes"
         ita "Operazione di ordinamento abbandonata"
-        jpn "Sort 中断"
+        jpn "ソート処理を中断しました。"
         kor "소트가 중단되었습니다."
         nor "Sortering avbrutt"
         norwegian-ny "Sortering avbrote"
@@ -675,14 +655,13 @@ ER_FORM_NOT_FOUND
         dan "View '%-.192s' eksisterer ikke for '%-.192s'"
         nla "View '%-.192s' bestaat niet voor '%-.192s'"
         eng "View '%-.192s' doesn't exist for '%-.192s'"
-        jps "View '%-.192s' が '%-.192s' に定義されていません",
         est "Vaade '%-.192s' ei eksisteeri '%-.192s' jaoks"
         fre "La vue (View) '%-.192s' n'existe pas pour '%-.192s'"
         ger "View '%-.192s' existiert für '%-.192s' nicht"
         greek "Το View '%-.192s' δεν υπάρχει για '%-.192s'"
         hun "A(z) '%-.192s' nezet nem letezik a(z) '%-.192s'-hoz"
         ita "La view '%-.192s' non esiste per '%-.192s'"
-        jpn "View '%-.192s' が '%-.192s' に定義されていません"
+        jpn "ビュー '%-.192s' は '%-.192s' に存在しません。"
         kor "뷰 '%-.192s'가 '%-.192s'에서는 존재하지 않읍니다."
         nor "View '%-.192s' eksisterer ikke for '%-.192s'"
         norwegian-ny "View '%-.192s' eksisterar ikkje for '%-.192s'"
@@ -696,7 +675,7 @@ ER_FORM_NOT_FOUND
         swe "Formulär '%-.192s' finns inte i '%-.192s'"
         ukr "Вигляд '%-.192s' не існує для '%-.192s'"
 ER_GET_ERRNO  
-        cze "Obsluha tabulky vr-Bátila chybu %M"
+        cze "Obsluha tabulky vrátila chybu %M"
         dan "Modtog fejl %M fra tabel håndteringen"
         nla "Fout %M van tabel handler"
         eng "Got error %M from storage engine"
@@ -706,7 +685,7 @@ ER_GET_ERRNO
         greek "Ελήφθη μήνυμα λάθους %M από τον χειριστή πίνακα (table handler)"
         hun "%M hibajelzes a tablakezelotol"
         ita "Rilevato l'errore %M dal gestore delle tabelle"
-        jpn "Got error %M from table handler"
+        jpn "ストレージエンジンがエラー %M を返しました。"
         kor "테이블 handler에서 %M 에러가 발생 하였습니다."
         nor "Mottok feil %M fra tabell håndterer"
         norwegian-ny "Mottok feil %M fra tabell handterar"
@@ -720,7 +699,7 @@ ER_GET_ERRNO
         swe "Fick felkod %M från databashanteraren"
         ukr "Отримано помилку %M від дескриптора таблиці"
 ER_ILLEGAL_HA  
-        cze "Obsluha tabulky '%-.192s' nem-Bá tento parametr"
+        cze "Obsluha tabulky '%-.192s' nemá tento parametr"
         dan "Denne mulighed eksisterer ikke for tabeltypen '%-.192s'"
         nla "Tabel handler voor '%-.192s' heeft deze optie niet"
         eng "Table storage engine for '%-.192s' doesn't have this option"
@@ -730,7 +709,7 @@ ER_ILLEGAL_HA
         greek "Ο χειριστής πίνακα (table handler) για '%-.192s' δεν διαθέτει αυτή την επιλογή"
         hun "A(z) '%-.192s' tablakezelonek nincs ilyen opcioja"
         ita "Il gestore delle tabelle per '%-.192s' non ha questa opzione"
-        jpn "Table handler for '%-.192s' doesn't have this option"
+        jpn "表 '%-.192s' のストレージエンジンでは提供されないオプションです。"
         kor "'%-.192s'의 테이블 handler는 이러한 옵션을 제공하지 않읍니다."
         nor "Tabell håndtereren for '%-.192s' har ikke denne muligheten"
         norwegian-ny "Tabell håndteraren for '%-.192s' har ikkje denne moglegheita"
@@ -744,18 +723,17 @@ ER_ILLEGAL_HA
 	swe "Tabellhanteraren for tabell '%-.192s' stödjer ej detta"
         ukr "Дескриптор таблиці '%-.192s' не має цієї властивості"
 ER_KEY_NOT_FOUND  
-        cze "Nemohu naj-Bít záznam v '%-.192s'"
+        cze "Nemohu najít záznam v '%-.192s'"
         dan "Kan ikke finde posten i '%-.192s'"
         nla "Kan record niet vinden in '%-.192s'"
         eng "Can't find record in '%-.192s'"
-        jps "'%-.192s'のなかにレコードが見付かりません",
         est "Ei suuda leida kirjet '%-.192s'-s"
         fre "Ne peut trouver l'enregistrement dans '%-.192s'"
         ger "Kann Datensatz in '%-.192s' nicht finden"
         greek "Αδύνατη η ανεύρεση εγγραφής στο '%-.192s'"
         hun "Nem talalhato a rekord '%-.192s'-ben"
         ita "Impossibile trovare il record in '%-.192s'"
-        jpn "'%-.192s'のなかにレコードが見付かりません"
+        jpn "'%-.192s' にレコードが見つかりません。"
         kor "'%-.192s'에서 레코드를 찾을 수 없읍니다."
         nor "Kan ikke finne posten i '%-.192s'"
         norwegian-ny "Kan ikkje finne posten i '%-.192s'"
@@ -769,18 +747,17 @@ ER_KEY_NOT_FOUND
 	swe "Hittar inte posten '%-.192s'"
         ukr "Не можу записати у '%-.192s'"
 ER_NOT_FORM_FILE  
-        cze "Nespr-Bávná informace v souboru '%-.200s'"
+        cze "Nesprávná informace v souboru '%-.200s'"
         dan "Forkert indhold i: '%-.200s'"
         nla "Verkeerde info in file: '%-.200s'"
         eng "Incorrect information in file: '%-.200s'"
-        jps "ファイル '%-.200s' の info が間違っているようです",
         est "Vigane informatsioon failis '%-.200s'"
         fre "Information erronnée dans le fichier: '%-.200s'"
         ger "Falsche Information in Datei '%-.200s'"
         greek "Λάθος πληροφορίες στο αρχείο: '%-.200s'"
         hun "Ervenytelen info a file-ban: '%-.200s'"
         ita "Informazione errata nel file: '%-.200s'"
-        jpn "ファイル '%-.200s' の info が間違っているようです"
+        jpn "ファイル '%-.200s' 内の情報が不正です。"
         kor "화일의 부정확한 정보: '%-.200s'"
         nor "Feil informasjon i filen: '%-.200s'"
         norwegian-ny "Feil informasjon i fila: '%-.200s'"
@@ -794,18 +771,17 @@ ER_NOT_FORM_FILE
         swe "Felaktig fil: '%-.200s'"
         ukr "Хибна інформація у файлі: '%-.200s'"
 ER_NOT_KEYFILE  
-        cze "Nespr-Bávný klíč pro tabulku '%-.200s'; pokuste se ho opravit"
+        cze "Nesprávný klíč pro tabulku '%-.200s'; pokuste se ho opravit"
         dan "Fejl i indeksfilen til tabellen '%-.200s'; prøv at reparere den"
         nla "Verkeerde zoeksleutel file voor tabel: '%-.200s'; probeer het te repareren"
         eng "Incorrect key file for table '%-.200s'; try to repair it"
-        jps "'%-.200s' テーブルの key file が間違っているようです. 修復をしてください",
         est "Tabeli '%-.200s' võtmefail on vigane; proovi seda parandada"
         fre "Index corrompu dans la table: '%-.200s'; essayez de le réparer"
         ger "Fehlerhafte Index-Datei für Tabelle '%-.200s'; versuche zu reparieren"
         greek "Λάθος αρχείο ταξινόμισης (key file) για τον πίνακα: '%-.200s'; Παρακαλώ, διορθώστε το!"
         hun "Ervenytelen kulcsfile a tablahoz: '%-.200s'; probalja kijavitani!"
         ita "File chiave errato per la tabella : '%-.200s'; prova a riparalo"
-        jpn "'%-.200s' テーブルの key file が間違っているようです. 修復をしてください"
+        jpn "表 '%-.200s' の索引ファイル(key file)の内容が不正です。修復を試行してください。"
         kor "'%-.200s' 테이블의 부정확한 키 존재. 수정하시오!"
         nor "Tabellen '%-.200s' har feil i nøkkelfilen; forsøk å reparer den"
         norwegian-ny "Tabellen '%-.200s' har feil i nykkelfila; prøv å reparere den"
@@ -819,18 +795,17 @@ ER_NOT_KEYFILE
         swe "Fatalt fel vid hantering av register '%-.200s'; kör en reparation"
         ukr "Хибний файл ключей для таблиці: '%-.200s'; Спробуйте його відновити"
 ER_OLD_KEYFILE  
-        cze "Star-Bý klíčový soubor pro '%-.192s'; opravte ho."
+        cze "Starý klíčový soubor pro '%-.192s'; opravte ho."
         dan "Gammel indeksfil for tabellen '%-.192s'; reparer den"
         nla "Oude zoeksleutel file voor tabel '%-.192s'; repareer het!"
         eng "Old key file for table '%-.192s'; repair it!"
-        jps "'%-.192s' テーブルは古い形式の key file のようです; 修復をしてください",
         est "Tabeli '%-.192s' võtmefail on aegunud; paranda see!"
         fre "Vieux fichier d'index pour la table '%-.192s'; réparez le!"
         ger "Alte Index-Datei für Tabelle '%-.192s'. Bitte reparieren"
         greek "Παλαιό αρχείο ταξινόμισης (key file) για τον πίνακα '%-.192s'; Παρακαλώ, διορθώστε το!"
         hun "Regi kulcsfile a '%-.192s'tablahoz; probalja kijavitani!"
         ita "File chiave vecchio per la tabella '%-.192s'; riparalo!"
-        jpn "'%-.192s' テーブルは古い形式の key file のようです; 修復をしてください"
+        jpn "表 '%-.192s' の索引ファイル(key file)は古い形式です。修復してください。"
         kor "'%-.192s' 테이블의 이전버젼의 키 존재. 수정하시오!"
         nor "Gammel nøkkelfil for tabellen '%-.192s'; reparer den!"
         norwegian-ny "Gammel nykkelfil for tabellen '%-.192s'; reparer den!"
@@ -844,18 +819,17 @@ ER_OLD_KEYFILE
         swe "Gammal nyckelfil '%-.192s'; reparera registret"
         ukr "Старий файл ключей для таблиці '%-.192s'; Відновіть його!"
 ER_OPEN_AS_READONLY  
-        cze "'%-.192s' je jen pro -Bčtení"
+        cze "'%-.192s' je jen pro čtení"
         dan "'%-.192s' er skrivebeskyttet"
         nla "'%-.192s' is alleen leesbaar"
         eng "Table '%-.192s' is read only"
-        jps "'%-.192s' は読み込み専用です",
         est "Tabel '%-.192s' on ainult lugemiseks"
         fre "'%-.192s' est en lecture seulement"
         ger "Tabelle '%-.192s' ist nur lesbar"
         greek "'%-.192s' επιτρέπεται μόνο η ανάγνωση"
         hun "'%-.192s' irasvedett"
         ita "'%-.192s' e` di sola lettura"
-        jpn "'%-.192s' は読み込み専用です"
+        jpn "表 '%-.192s' は読み込み専用です。"
         kor "테이블 '%-.192s'는 읽기전용 입니다."
         nor "'%-.192s' er skrivebeskyttet"
         norwegian-ny "'%-.192s' er skrivetryggja"
@@ -869,18 +843,17 @@ ER_OPEN_AS_READONLY
         swe "'%-.192s' är skyddad mot förändring"
         ukr "Таблиця '%-.192s' тільки для читання"
 ER_OUTOFMEMORY HY001 S1001
-        cze "M-Bálo paměti. Přestartujte daemona a zkuste znovu (je potřeba %d bytů)"
+        cze "Málo paměti. Přestartujte daemona a zkuste znovu (je potřeba %d bytů)"
         dan "Ikke mere hukommelse. Genstart serveren og prøv igen (mangler %d bytes)"
         nla "Geen geheugen meer. Herstart server en probeer opnieuw (%d bytes nodig)"
         eng "Out of memory; restart server and try again (needed %d bytes)"
-        jps "Out of memory. デーモンをリスタートしてみてください (%d bytes 必要)",
         est "Mälu  sai otsa. Proovi MariaDB uuesti käivitada (puudu jäi %d baiti)"
         fre "Manque de mémoire. Redémarrez le démon et ré-essayez (%d octets nécessaires)"
         ger "Kein Speicher vorhanden (%d Bytes benötigt). Bitte Server neu starten"
         greek "Δεν υπάρχει διαθέσιμη μνήμη. Προσπαθήστε πάλι, επανεκινώντας τη διαδικασία (demon) (χρειάζονται %d bytes)"
         hun "Nincs eleg memoria. Inditsa ujra a demont, es probalja ismet. (%d byte szukseges.)"
         ita "Memoria esaurita. Fai ripartire il demone e riprova (richiesti %d bytes)"
-        jpn "Out of memory. デーモンをリスタートしてみてください (%d bytes 必要)"
+        jpn "メモリが不足しています。サーバーを再起動してみてください。(%d バイトの割り当てに失敗)"
         kor "Out of memory. 데몬을 재 실행 후 다시 시작하시오 (needed %d bytes)"
         nor "Ikke mer minne. Star på nytt tjenesten og prøv igjen (trengte %d byter)"
         norwegian-ny "Ikkje meir minne. Start på nytt tenesten og prøv igjen (trengte %d bytar)"
@@ -894,18 +867,17 @@ ER_OUTOFMEMORY HY001 S1001
         swe "Oväntat slut på minnet, starta om programmet och försök på nytt (Behövde %d bytes)"
         ukr "Брак пам'яті. Рестартуйте сервер та спробуйте знову (потрібно %d байтів)"
 ER_OUT_OF_SORTMEMORY HY001 S1001
-        cze "M-Bálo paměti pro třídění. Zvyšte velikost třídícího bufferu"
+        cze "Málo paměti pro třídění. Zvyšte velikost třídícího bufferu"
         dan "Ikke mere sorteringshukommelse. Øg sorteringshukommelse (sort buffer size) for serveren"
         nla "Geen geheugen om te sorteren. Verhoog de server sort buffer size"
         eng "Out of sort memory, consider increasing server sort buffer size"
-        jps "Out of sort memory. sort buffer size が足りないようです.",
         est "Mälu sai sorteerimisel otsa. Suurenda MariaDB-i sorteerimispuhvrit"
         fre "Manque de mémoire pour le tri. Augmentez-la."
         ger "Kein Speicher zum Sortieren vorhanden. sort_buffer_size sollte im Server erhöht werden"
         greek "Δεν υπάρχει διαθέσιμη μνήμη για ταξινόμιση. Αυξήστε το sort buffer size για τη διαδικασία (demon)"
         hun "Nincs eleg memoria a rendezeshez. Novelje a rendezo demon puffermeretet"
         ita "Memoria per gli ordinamenti esaurita. Incrementare il 'sort_buffer' al demone"
-        jpn "Out of sort memory. sort buffer size が足りないようです."
+        jpn "ソートメモリが不足しています。ソートバッファサイズ(sort buffer size)の増加を検討してください。"
         kor "Out of sort memory. daemon sort buffer의 크기를 증가시키세요"
         nor "Ikke mer sorteringsminne. Vurder å øke sorteringsminnet (sort buffer size) for tjenesten"
         norwegian-ny "Ikkje meir sorteringsminne. Vurder å auke sorteringsminnet (sorteringsbuffer storleik) for tenesten"
@@ -919,18 +891,17 @@ ER_OUT_OF_SORTMEMORY HY001 S1001
         swe "Sorteringsbufferten räcker inte till. Kontrollera startparametrarna"
         ukr "Брак пам'яті для сортування. Треба збільшити розмір буфера сортування у сервера"
 ER_UNEXPECTED_EOF  
-        cze "Neo-Bčekávaný konec souboru při čtení '%-.192s' (chybový kód: %M)"
+	cze "Neočekávaný konec souboru při čtení '%-.192s' (chybový kód: %M)"
         dan "Uventet afslutning på fil (eof) ved læsning af filen '%-.192s' (Fejlkode: %M)"
         nla "Onverwachte eof gevonden tijdens het lezen van file '%-.192s' (Errcode: %M)"
         eng "Unexpected EOF found when reading file '%-.192s' (errno: %M)"
-        jps "'%-.192s' ファイルを読み込み中に EOF が予期せぬ所で現れました. (errno: %M)",
         est "Ootamatu faililõpumärgend faili '%-.192s' lugemisel (veakood: %M)"
         fre "Fin de fichier inattendue en lisant '%-.192s' (Errcode: %M)"
         ger "Unerwartetes Ende beim Lesen der Datei '%-.192s' (Fehler: %M)"
         greek "Κατά τη διάρκεια της ανάγνωσης, βρέθηκε απροσδόκητα το τέλος του αρχείου '%-.192s' (κωδικός λάθους: %M)"
         hun "Varatlan filevege-jel a '%-.192s'olvasasakor. (hibakod: %M)"
         ita "Fine del file inaspettata durante la lettura del file '%-.192s' (errno: %M)"
-        jpn "'%-.192s' ファイルを読み込み中に EOF が予期せぬ所で現れました. (errno: %M)"
+        jpn "ファイル '%-.192s' を読み込み中に予期せずファイルの終端に達しました。(エラー番号: %M)"
         kor "'%-.192s' 화일을 읽는 도중 잘못된 eof을 발견 (에러번호: %M)"
         nor "Uventet slutt på fil (eof) ved lesing av filen '%-.192s' (Feilkode: %M)"
         norwegian-ny "Uventa slutt på fil (eof) ved lesing av fila '%-.192s' (Feilkode: %M)"
@@ -944,18 +915,17 @@ ER_UNEXPECTED_EOF
         swe "Oväntat filslut vid läsning från '%-.192s' (Felkod: %M)"
         ukr "Хибний кінець файлу '%-.192s' (помилка: %M)"
 ER_CON_COUNT_ERROR 08004 
-        cze "P-Bříliš mnoho spojení"
+        cze "Příliš mnoho spojení"
         dan "For mange forbindelser (connections)"
         nla "Te veel verbindingen"
         eng "Too many connections"
-        jps "接続が多すぎます",
         est "Liiga palju samaaegseid ühendusi"
         fre "Trop de connexions"
         ger "Zu viele Verbindungen"
         greek "Υπάρχουν πολλές συνδέσεις..."
         hun "Tul sok kapcsolat"
         ita "Troppe connessioni"
-        jpn "接続が多すぎます"
+        jpn "接続が多すぎます。"
         kor "너무 많은 연결... max_connection을 증가 시키시오..."
         nor "For mange tilkoblinger (connections)"
         norwegian-ny "For mange tilkoplingar (connections)"
@@ -969,18 +939,17 @@ ER_CON_COUNT_ERROR 08004
         swe "För många anslutningar"
         ukr "Забагато з'єднань"
 ER_OUT_OF_RESOURCES  
-        cze "M-Bálo prostoru/paměti pro thread"
+        cze "Málo prostoru/paměti pro thread"
         dan "Udgået for tråde/hukommelse"
         nla "Geen thread geheugen meer; controleer of mysqld of andere processen al het beschikbare geheugen gebruikt. Zo niet, dan moet u wellicht 'ulimit' gebruiken om mysqld toe te laten meer geheugen te benutten, of u kunt extra swap ruimte toevoegen"
         eng "Out of memory; check if mysqld or some other process uses all available memory; if not, you may have to use 'ulimit' to allow mysqld to use more memory or you can add more swap space"
-        jps "Out of memory;  mysqld かその他のプロセスがメモリーを全て使っているか確認してください. メモリーを使い切っていない場合、'ulimit' を設定して mysqld のメモリー使用限界量を多くするか、swap space を増やしてみてください",
         est "Mälu sai otsa. Võimalik, et aitab swap-i lisamine või käsu 'ulimit' abil MariaDB-le rohkema mälu kasutamise lubamine"
         fre "Manque de 'threads'/mémoire"
         ger "Kein Speicher mehr vorhanden. Prüfen Sie, ob mysqld oder ein anderer Prozess den gesamten Speicher verbraucht. Wenn nicht, sollten Sie mit 'ulimit' dafür sorgen, dass mysqld mehr Speicher benutzen darf, oder mehr Swap-Speicher einrichten"
         greek "Πρόβλημα με τη διαθέσιμη μνήμη (Out of thread space/memory)"
         hun "Elfogyott a thread-memoria"
         ita "Fine dello spazio/memoria per i thread"
-        jpn "Out of memory;  mysqld かその他のプロセスがメモリーを全て使っているか確認してください. メモリーを使い切っていない場合、'ulimit' を設定して mysqld のメモリー使用限界量を多くするか、swap space を増やしてみてください"
+        jpn "メモリが不足しています。mysqld やその他のプロセスがメモリーを使い切っていないか確認して下さい。メモリーを使い切っていない場合、'ulimit'の設定等で mysqld のメモリー使用最大量を多くするか、スワップ領域を増やす必要があるかもしれません。"
 # This message failed to convert from euc-kr, skipped
         nor "Tomt for tråd plass/minne"
         norwegian-ny "Tomt for tråd plass/minne"
@@ -994,18 +963,17 @@ ER_OUT_OF_RESOURCES
         swe "Fick slut på minnet.  Kontrollera om mysqld eller någon annan process använder allt tillgängligt minne. Om inte, försök använda 'ulimit' eller allokera mera swap"
         ukr "Брак пам'яті;  Перевірте чи mysqld або якісь інші процеси використовують усю доступну пам'ять. Як ні, то ви можете скористатися 'ulimit', аби дозволити mysqld використовувати більше пам'яті або ви можете додати більше місця під свап"
 ER_BAD_HOST_ERROR 08S01 
-        cze "Nemohu zjistit jm-Béno stroje pro Vaši adresu"
+        cze "Nemohu zjistit jméno stroje pro Vaši adresu"
         dan "Kan ikke få værtsnavn for din adresse"
         nla "Kan de hostname niet krijgen van uw adres"
         eng "Can't get hostname for your address"
-        jps "その address の hostname が引けません.",
         est "Ei suuda lahendada IP aadressi masina nimeks"
         fre "Ne peut obtenir de hostname pour votre adresse"
         ger "Kann Hostnamen für diese Adresse nicht erhalten"
         greek "Δεν έγινε γνωστό το hostname για την address σας"
         hun "A gepnev nem allapithato meg a cimbol"
         ita "Impossibile risalire al nome dell'host dall'indirizzo (risoluzione inversa)"
-        jpn "その address の hostname が引けません."
+        jpn "IPアドレスからホスト名を解決できません。"
         kor "당신의 컴퓨터의 호스트이름을 얻을 수 없읍니다."
         nor "Kan ikke få tak i vertsnavn for din adresse"
         norwegian-ny "Kan ikkje få tak i vertsnavn for di adresse"
@@ -1019,7 +987,7 @@ ER_BAD_HOST_ERROR 08S01
         swe "Kan inte hitta 'hostname' för din adress"
         ukr "Не можу визначити ім'я хосту для вашої адреси"
 ER_HANDSHAKE_ERROR 08S01 
-        cze "Chyba p-Bři ustavování spojení"
+        cze "Chyba při ustavování spojení"
         dan "Forkert håndtryk (handshake)"
         nla "Verkeerde handshake"
         eng "Bad handshake"
@@ -1029,6 +997,7 @@ ER_HANDSHAKE_ERROR 08S01
         greek "Η αναγνώριση (handshake) δεν έγινε σωστά"
         hun "A kapcsolatfelvetel nem sikerult (Bad handshake)"
         ita "Negoziazione impossibile"
+        jpn "ハンドシェイクエラー"
         nor "Feil håndtrykk (handshake)"
         norwegian-ny "Feil handtrykk (handshake)"
         pol "Zły uchwyt(handshake)"
@@ -1041,18 +1010,17 @@ ER_HANDSHAKE_ERROR 08S01
         swe "Fel vid initiering av kommunikationen med klienten"
         ukr "Невірна установка зв'язку"
 ER_DBACCESS_DENIED_ERROR 42000 
-        cze "P-Břístup pro uživatele '%-.48s'@'%-.64s' k databázi '%-.192s' není povolen"
+        cze "Přístup pro uživatele '%-.48s'@'%-.64s' k databázi '%-.192s' není povolen"
         dan "Adgang nægtet bruger: '%-.48s'@'%-.64s' til databasen '%-.192s'"
         nla "Toegang geweigerd voor gebruiker: '%-.48s'@'%-.64s' naar database '%-.192s'"
         eng "Access denied for user '%-.48s'@'%-.64s' to database '%-.192s'"
-        jps "ユーザー '%-.48s'@'%-.64s' の '%-.192s' データベースへのアクセスを拒否します",
         est "Ligipääs keelatud kasutajale '%-.48s'@'%-.64s' andmebaasile '%-.192s'"
         fre "Accès refusé pour l'utilisateur: '%-.48s'@'@%-.64s'. Base '%-.192s'"
         ger "Benutzer '%-.48s'@'%-.64s' hat keine Zugriffsberechtigung für Datenbank '%-.192s'"
         greek "Δεν επιτέρεται η πρόσβαση στο χρήστη: '%-.48s'@'%-.64s' στη βάση δεδομένων '%-.192s'"
         hun "A(z) '%-.48s'@'%-.64s' felhasznalo szamara tiltott eleres az '%-.192s' adabazishoz."
         ita "Accesso non consentito per l'utente: '%-.48s'@'%-.64s' al database '%-.192s'"
-        jpn "ユーザー '%-.48s'@'%-.64s' の '%-.192s' データベースへのアクセスを拒否します"
+        jpn "ユーザー '%-.48s'@'%-.64s' によるデータベース '%-.192s' へのアクセスは拒否されました。"
         kor "'%-.48s'@'%-.64s' 사용자는 '%-.192s' 데이타베이스에 접근이 거부 되었습니다."
         nor "Tilgang nektet for bruker: '%-.48s'@'%-.64s' til databasen '%-.192s' nektet"
         norwegian-ny "Tilgang ikkje tillate for brukar: '%-.48s'@'%-.64s' til databasen '%-.192s' nekta"
@@ -1065,18 +1033,17 @@ ER_DBACCESS_DENIED_ERROR 42000
         swe "Användare '%-.48s'@'%-.64s' är ej berättigad att använda databasen %-.192s"
         ukr "Доступ заборонено для користувача: '%-.48s'@'%-.64s' до бази данних '%-.192s'"
 ER_ACCESS_DENIED_ERROR 28000 
-        cze "P-Břístup pro uživatele '%-.48s'@'%-.64s' (s heslem %s)"
+        cze "Přístup pro uživatele '%-.48s'@'%-.64s' (s heslem %s)"
         dan "Adgang nægtet bruger: '%-.48s'@'%-.64s' (Bruger adgangskode: %s)"
         nla "Toegang geweigerd voor gebruiker: '%-.48s'@'%-.64s' (Wachtwoord gebruikt: %s)"
         eng "Access denied for user '%-.48s'@'%-.64s' (using password: %s)"
-        jps "ユーザー '%-.48s'@'%-.64s' を拒否します.uUsing password: %s)",
         est "Ligipääs keelatud kasutajale '%-.48s'@'%-.64s' (kasutab parooli: %s)"
         fre "Accès refusé pour l'utilisateur: '%-.48s'@'@%-.64s' (mot de passe: %s)"
         ger "Benutzer '%-.48s'@'%-.64s' hat keine Zugriffsberechtigung (verwendetes Passwort: %s)"
         greek "Δεν επιτέρεται η πρόσβαση στο χρήστη: '%-.48s'@'%-.64s' (χρήση password: %s)"
         hun "A(z) '%-.48s'@'%-.64s' felhasznalo szamara tiltott eleres. (Hasznalja a jelszot: %s)"
         ita "Accesso non consentito per l'utente: '%-.48s'@'%-.64s' (Password: %s)"
-        jpn "ユーザー '%-.48s'@'%-.64s' を拒否します.uUsing password: %s)"
+        jpn "ユーザー '%-.48s'@'%-.64s' のアクセスは拒否されました。(using password: %s)"
         kor "'%-.48s'@'%-.64s' 사용자는 접근이 거부 되었습니다. (using password: %s)"
         nor "Tilgang nektet for bruker: '%-.48s'@'%-.64s' (Bruker passord: %s)"
         norwegian-ny "Tilgang ikke tillate for brukar: '%-.48s'@'%-.64s' (Brukar passord: %s)"
@@ -1089,18 +1056,17 @@ ER_ACCESS_DENIED_ERROR 28000
         swe "Användare '%-.48s'@'%-.64s' är ej berättigad att logga in (Använder lösen: %s)"
         ukr "Доступ заборонено для користувача: '%-.48s'@'%-.64s' (Використано пароль: %s)"
 ER_NO_DB_ERROR 3D000 
-        cze "Nebyla vybr-Bána žádná databáze"
+        cze "Nebyla vybrána žádná databáze"
         dan "Ingen database valgt"
         nla "Geen database geselecteerd"
         eng "No database selected"
-        jps "データベースが選択されていません.",
         est "Andmebaasi ei ole valitud"
         fre "Aucune base n'a été sélectionnée"
         ger "Keine Datenbank ausgewählt"
         greek "Δεν επιλέχθηκε βάση δεδομένων"
         hun "Nincs kivalasztott adatbazis"
         ita "Nessun database selezionato"
-        jpn "データベースが選択されていません."
+        jpn "データベースが選択されていません。"
         kor "선택된 데이타베이스가 없습니다."
         nor "Ingen database valgt"
         norwegian-ny "Ingen database vald"
@@ -1114,18 +1080,17 @@ ER_NO_DB_ERROR 3D000
         swe "Ingen databas i användning"
         ukr "Базу данних не вибрано"
 ER_UNKNOWN_COM_ERROR 08S01 
-        cze "Nezn-Bámý příkaz"
+        cze "Neznámý příkaz"
         dan "Ukendt kommando"
         nla "Onbekend commando"
         eng "Unknown command"
-        jps "そのコマンドは何？",
         est "Tundmatu käsk"
         fre "Commande inconnue"
         ger "Unbekannter Befehl"
         greek "Αγνωστη εντολή"
         hun "Ervenytelen parancs"
         ita "Comando sconosciuto"
-        jpn "そのコマンドは何？"
+        jpn "不明なコマンドです。"
         kor "명령어가 뭔지 모르겠어요..."
         nor "Ukjent kommando"
         norwegian-ny "Ukjent kommando"
@@ -1136,21 +1101,20 @@ ER_UNKNOWN_COM_ERROR 08S01
         serbian "Nepoznata komanda"
         slo "Neznámy príkaz"
         spa "Comando desconocido"
-        swe "Okänt commando"
+        swe "Okänt kommando"
         ukr "Невідома команда"
 ER_BAD_NULL_ERROR 23000 
-        cze "Sloupec '%-.192s' nem-Bůže být null"
+        cze "Sloupec '%-.192s' nemůže být null"
         dan "Kolonne '%-.192s' kan ikke være NULL"
         nla "Kolom '%-.192s' kan niet null zijn"
         eng "Column '%-.192s' cannot be null"
-        jps "Column '%-.192s' は null にはできないのです",
         est "Tulp '%-.192s' ei saa omada nullväärtust"
         fre "Le champ '%-.192s' ne peut être vide (null)"
         ger "Feld '%-.192s' darf nicht NULL sein"
         greek "Το πεδίο '%-.192s' δεν μπορεί να είναι κενό (null)"
         hun "A(z) '%-.192s' oszlop erteke nem lehet nulla"
         ita "La colonna '%-.192s' non puo` essere nulla"
-        jpn "Column '%-.192s' は null にはできないのです"
+        jpn "列 '%-.192s' は null にできません。"
         kor "칼럼 '%-.192s'는 널(Null)이 되면 안됩니다. "
         nor "Kolonne '%-.192s' kan ikke vere null"
         norwegian-ny "Kolonne '%-.192s' kan ikkje vere null"
@@ -1164,18 +1128,17 @@ ER_BAD_NULL_ERROR 23000
         swe "Kolumn '%-.192s' får inte vara NULL"
         ukr "Стовбець '%-.192s' не може бути нульовим"
 ER_BAD_DB_ERROR 42000 
-        cze "Nezn-Bámá databáze '%-.192s'"
+        cze "Neznámá databáze '%-.192s'"
         dan "Ukendt database '%-.192s'"
         nla "Onbekende database '%-.192s'"
         eng "Unknown database '%-.192s'"
-        jps "'%-.192s' なんてデータベースは知りません.",
         est "Tundmatu andmebaas '%-.192s'"
         fre "Base '%-.192s' inconnue"
         ger "Unbekannte Datenbank '%-.192s'"
         greek "Αγνωστη βάση δεδομένων '%-.192s'"
         hun "Ervenytelen adatbazis: '%-.192s'"
         ita "Database '%-.192s' sconosciuto"
-        jpn "'%-.192s' なんてデータベースは知りません."
+        jpn "'%-.192s' は不明なデータベースです。"
         kor "데이타베이스 '%-.192s'는 알수 없음"
         nor "Ukjent database '%-.192s'"
         norwegian-ny "Ukjent database '%-.192s'"
@@ -1189,18 +1152,17 @@ ER_BAD_DB_ERROR 42000
         swe "Okänd databas: '%-.192s'"
         ukr "Невідома база данних '%-.192s'"
 ER_TABLE_EXISTS_ERROR 42S01 
-        cze "Tabulka '%-.192s' ji-Bž existuje"
+        cze "Tabulka '%-.192s' již existuje"
         dan "Tabellen '%-.192s' findes allerede"
         nla "Tabel '%-.192s' bestaat al"
         eng "Table '%-.192s' already exists"
-        jps "Table '%-.192s' は既にあります",
         est "Tabel '%-.192s' juba eksisteerib"
         fre "La table '%-.192s' existe déjà"
         ger "Tabelle '%-.192s' bereits vorhanden"
         greek "Ο πίνακας '%-.192s' υπάρχει ήδη"
         hun "A(z) '%-.192s' tabla mar letezik"
         ita "La tabella '%-.192s' esiste gia`"
-        jpn "Table '%-.192s' は既にあります"
+        jpn "表 '%-.192s' はすでに存在します。"
         kor "테이블 '%-.192s'는 이미 존재함"
         nor "Tabellen '%-.192s' eksisterer allerede"
         norwegian-ny "Tabellen '%-.192s' eksisterar allereide"
@@ -1214,18 +1176,17 @@ ER_TABLE_EXISTS_ERROR 42S01
         swe "Tabellen '%-.192s' finns redan"
         ukr "Таблиця '%-.192s' вже існує"
 ER_BAD_TABLE_ERROR 42S02 
-        cze "Nezn-Bámá tabulka '%-.100s'"
+        cze "Neznámá tabulka '%-.100s'"
         dan "Ukendt tabel '%-.100s'"
         nla "Onbekende tabel '%-.100s'"
         eng "Unknown table '%-.100s'"
-        jps "table '%-.100s' はありません.",
         est "Tundmatu tabel '%-.100s'"
         fre "Table '%-.100s' inconnue"
         ger "Unbekannte Tabelle '%-.100s'"
         greek "Αγνωστος πίνακας '%-.100s'"
         hun "Ervenytelen tabla: '%-.100s'"
         ita "Tabella '%-.100s' sconosciuta"
-        jpn "table '%-.100s' はありません."
+        jpn "'%-.100s' は不明な表です。"
         kor "테이블 '%-.100s'는 알수 없음"
         nor "Ukjent tabell '%-.100s'"
         norwegian-ny "Ukjent tabell '%-.100s'"
@@ -1239,7 +1200,7 @@ ER_BAD_TABLE_ERROR 42S02
         swe "Okänd tabell '%-.100s'"
         ukr "Невідома таблиця '%-.100s'"
 ER_NON_UNIQ_ERROR 23000 
-        cze "Sloupec '%-.192s' v %-.192s nen-Bí zcela jasný"
+        cze "Sloupec '%-.192s' v %-.192s není zcela jasný"
         dan "Felt: '%-.192s' i tabel %-.192s er ikke entydigt"
         nla "Kolom: '%-.192s' in %-.192s is niet eenduidig"
         eng "Column '%-.192s' in %-.192s is ambiguous"
@@ -1249,7 +1210,7 @@ ER_NON_UNIQ_ERROR 23000
         greek "Το πεδίο: '%-.192s' σε %-.192s δεν έχει καθοριστεί"
         hun "A(z) '%-.192s' oszlop %-.192s-ben ketertelmu"
         ita "Colonna: '%-.192s' di %-.192s e` ambigua"
-        jpn "Column: '%-.192s' in %-.192s is ambiguous"
+        jpn "列 '%-.192s' は %-.192s 内で曖昧です。"
         kor "칼럼: '%-.192s' in '%-.192s' 이 모호함"
         nor "Felt: '%-.192s' i tabell %-.192s er ikke entydig"
         norwegian-ny "Kolonne: '%-.192s' i tabell %-.192s er ikkje eintydig"
@@ -1263,18 +1224,17 @@ ER_NON_UNIQ_ERROR 23000
         swe "Kolumn '%-.192s' i %-.192s är inte unik"
         ukr "Стовбець '%-.192s' у %-.192s визначений неоднозначно"
 ER_SERVER_SHUTDOWN 08S01 
-        cze "Prob-Bíhá ukončování práce serveru"
+        cze "Probíhá ukončování práce serveru"
         dan "Database nedlukning er i gang"
         nla "Bezig met het stoppen van de server"
         eng "Server shutdown in progress"
-        jps "Server を shutdown 中...",
         est "Serveri seiskamine käib"
         fre "Arrêt du serveur en cours"
         ger "Der Server wird heruntergefahren"
         greek "Εναρξη διαδικασίας αποσύνδεσης του εξυπηρετητή (server shutdown)"
         hun "A szerver leallitasa folyamatban"
         ita "Shutdown del server in corso"
-        jpn "Server を shutdown 中..."
+        jpn "サーバーをシャットダウン中です。"
         kor "Server가 셧다운 중입니다."
         nor "Database nedkobling er i gang"
         norwegian-ny "Tenar nedkopling er i gang"
@@ -1288,18 +1248,17 @@ ER_SERVER_SHUTDOWN 08S01
         swe "Servern går nu ned"
         ukr "Завершується работа сервера"
 ER_BAD_FIELD_ERROR 42S22 S0022
-        cze "Nezn-Bámý sloupec '%-.192s' v %-.192s"
+        cze "Neznámý sloupec '%-.192s' v %-.192s"
         dan "Ukendt kolonne '%-.192s' i tabel %-.192s"
         nla "Onbekende kolom '%-.192s' in %-.192s"
         eng "Unknown column '%-.192s' in '%-.192s'"
-        jps "'%-.192s' column は '%-.192s' にはありません.",
         est "Tundmatu tulp '%-.192s' '%-.192s'-s"
         fre "Champ '%-.192s' inconnu dans %-.192s"
         ger "Unbekanntes Tabellenfeld '%-.192s' in %-.192s"
         greek "Αγνωστο πεδίο '%-.192s' σε '%-.192s'"
         hun "A(z) '%-.192s' oszlop ervenytelen '%-.192s'-ben"
         ita "Colonna sconosciuta '%-.192s' in '%-.192s'"
-        jpn "'%-.192s' column は '%-.192s' にはありません."
+        jpn "列 '%-.192s' は '%-.192s' にはありません。"
         kor "Unknown 칼럼 '%-.192s' in '%-.192s'"
         nor "Ukjent kolonne '%-.192s' i tabell %-.192s"
         norwegian-ny "Ukjent felt '%-.192s' i tabell %-.192s"
@@ -1313,17 +1272,17 @@ ER_BAD_FIELD_ERROR 42S22 S0022
         swe "Okänd kolumn '%-.192s' i %-.192s"
         ukr "Невідомий стовбець '%-.192s' у '%-.192s'"
 ER_WRONG_FIELD_WITH_GROUP 42000 S1009
-        cze "Pou-Bžité '%-.192s' nebylo v group by"
+        cze "Použité '%-.192s' nebylo v group by"
         dan "Brugte '%-.192s' som ikke var i group by"
         nla "Opdracht gebruikt '%-.192s' dat niet in de GROUP BY voorkomt"
         eng "'%-.192s' isn't in GROUP BY"
-        jps "'%-.192s' isn't in GROUP BY",
         est "'%-.192s' puudub GROUP BY klauslis"
         fre "'%-.192s' n'est pas dans 'group by'"
         ger "'%-.192s' ist nicht in GROUP BY vorhanden"
         greek "Χρησιμοποιήθηκε '%-.192s' που δεν υπήρχε στο group by"
         hun "Used '%-.192s' with wasn't in group by"
         ita "Usato '%-.192s' che non e` nel GROUP BY"
+        jpn "'%-.192s' はGROUP BY句で指定されていません。"
         kor "'%-.192s'은 GROUP BY속에 없음"
         nor "Brukte '%-.192s' som ikke var i group by"
         norwegian-ny "Brukte '%-.192s' som ikkje var i group by"
@@ -1337,7 +1296,7 @@ ER_WRONG_FIELD_WITH_GROUP 42000 S1009
         swe "'%-.192s' finns inte i GROUP BY"
         ukr "'%-.192s' не є у GROUP BY"
 ER_WRONG_GROUP_FIELD 42000 S1009
-        cze "Nemohu pou-Bžít group na '%-.192s'"
+        cze "Nemohu použít group na '%-.192s'"
         dan "Kan ikke gruppere på '%-.192s'"
         nla "Kan '%-.192s' niet groeperen"
         eng "Can't group on '%-.192s'"
@@ -1347,6 +1306,7 @@ ER_WRONG_GROUP_FIELD 42000 S1009
         greek "Αδύνατη η ομαδοποίηση (group on) '%-.192s'"
         hun "A group nem hasznalhato: '%-.192s'"
         ita "Impossibile raggruppare per '%-.192s'"
+        jpn "'%-.192s' でのグループ化はできません。"
         kor "'%-.192s'를 그룹할 수 없음"
         nor "Kan ikke gruppere på '%-.192s'"
         norwegian-ny "Kan ikkje gruppere på '%-.192s'"
@@ -1360,7 +1320,7 @@ ER_WRONG_GROUP_FIELD 42000 S1009
         swe "Kan inte använda GROUP BY med '%-.192s'"
         ukr "Не можу групувати по '%-.192s'"
 ER_WRONG_SUM_SELECT 42000 S1009
-        cze "P-Bříkaz obsahuje zároveň funkci sum a sloupce"
+        cze "Příkaz obsahuje zároveň funkci sum a sloupce"
         dan "Udtrykket har summer (sum) funktioner og kolonner i samme udtryk"
         nla "Opdracht heeft totaliseer functies en kolommen in dezelfde opdracht"
         eng "Statement has sum functions and columns in same statement"
@@ -1369,6 +1329,7 @@ ER_WRONG_SUM_SELECT 42000 S1009
         ger "Die Verwendung von Summierungsfunktionen und Spalten im selben Befehl ist nicht erlaubt"
         greek "Η διατύπωση περιέχει sum functions και columns στην ίδια διατύπωση"
         ita "Il comando ha una funzione SUM e una colonna non specificata nella GROUP BY"
+        jpn "集計関数と通常の列が同時に指定されています。"
         kor "Statement 가 sum기능을 동작중이고 칼럼도 동일한 statement입니다."
         nor "Uttrykket har summer (sum) funksjoner og kolonner i samme uttrykk"
         norwegian-ny "Uttrykket har summer (sum) funksjoner og kolonner i same uttrykk"
@@ -1382,7 +1343,7 @@ ER_WRONG_SUM_SELECT 42000 S1009
         swe "Kommandot har både sum functions och enkla funktioner"
         ukr "У виразі використано підсумовуючі функції поряд з іменами стовбців"
 ER_WRONG_VALUE_COUNT 21S01 
-        cze "Po-Bčet sloupců neodpovídá zadané hodnotě"
+        cze "Počet sloupců neodpovídá zadané hodnotě"
         dan "Kolonne tæller stemmer ikke med antallet af værdier"
         nla "Het aantal kolommen komt niet overeen met het aantal opgegeven waardes"
         eng "Column count doesn't match value count"
@@ -1391,6 +1352,7 @@ ER_WRONG_VALUE_COUNT 21S01
         greek "Το Column count δεν ταιριάζει με το value count"
         hun "Az oszlopban levo ertek nem egyezik meg a szamitott ertekkel"
         ita "Il numero delle colonne non e` uguale al numero dei valori"
+        jpn "列数が値の個数と一致しません。"
         kor "칼럼의 카운트가 값의 카운트와 일치하지 않습니다."
         nor "Felt telling stemmer verdi telling"
         norwegian-ny "Kolonne telling stemmer verdi telling"
@@ -1404,18 +1366,17 @@ ER_WRONG_VALUE_COUNT 21S01
         swe "Antalet kolumner motsvarar inte antalet värden"
         ukr "Кількість стовбців не співпадає з кількістю значень"
 ER_TOO_LONG_IDENT 42000 S1009
-        cze "Jm-Béno identifikátoru '%-.100s' je příliš dlouhé"
+        cze "Jméno identifikátoru '%-.100s' je příliš dlouhé"
         dan "Navnet '%-.100s' er for langt"
         nla "Naam voor herkenning '%-.100s' is te lang"
         eng "Identifier name '%-.100s' is too long"
-        jps "Identifier name '%-.100s' は長すぎます",
         est "Identifikaatori '%-.100s' nimi on liiga pikk"
         fre "Le nom de l'identificateur '%-.100s' est trop long"
         ger "Name des Bezeichners '%-.100s' ist zu lang"
         greek "Το identifier name '%-.100s' είναι πολύ μεγάλο"
         hun "A(z) '%-.100s' azonositonev tul hosszu."
         ita "Il nome dell'identificatore '%-.100s' e` troppo lungo"
-        jpn "Identifier name '%-.100s' は長すぎます"
+        jpn "識別子名 '%-.100s' は長すぎます。"
         kor "Identifier '%-.100s'는 너무 길군요."
         nor "Identifikator '%-.100s' er for lang"
         norwegian-ny "Identifikator '%-.100s' er for lang"
@@ -1429,18 +1390,17 @@ ER_TOO_LONG_IDENT 42000 S1009
         swe "Kolumnnamn '%-.100s' är för långt"
         ukr "Ім'я ідентифікатора '%-.100s' задовге"
 ER_DUP_FIELDNAME 42S21 S1009
-        cze "Zdvojen-Bé jméno sloupce '%-.192s'"
+        cze "Zdvojené jméno sloupce '%-.192s'"
         dan "Feltnavnet '%-.192s' findes allerede"
         nla "Dubbele kolom naam '%-.192s'"
         eng "Duplicate column name '%-.192s'"
-        jps "'%-.192s' という column 名は重複してます",
         est "Kattuv tulba nimi '%-.192s'"
         fre "Nom du champ '%-.192s' déjà utilisé"
         ger "Doppelter Spaltenname: '%-.192s'"
         greek "Επανάληψη column name '%-.192s'"
         hun "Duplikalt oszlopazonosito: '%-.192s'"
         ita "Nome colonna duplicato '%-.192s'"
-        jpn "'%-.192s' という column 名は重複してます"
+        jpn "列名 '%-.192s' は重複してます。"
         kor "중복된 칼럼 이름: '%-.192s'"
         nor "Feltnavnet '%-.192s' eksisterte fra før"
         norwegian-ny "Feltnamnet '%-.192s' eksisterte frå før"
@@ -1454,18 +1414,17 @@ ER_DUP_FIELDNAME 42S21 S1009
         swe "Kolumnnamn '%-.192s finns flera gånger"
         ukr "Дублююче ім'я стовбця '%-.192s'"
 ER_DUP_KEYNAME 42000 S1009
-        cze "Zdvojen-Bé jméno klíče '%-.192s'"
+        cze "Zdvojené jméno klíče '%-.192s'"
         dan "Indeksnavnet '%-.192s' findes allerede"
         nla "Dubbele zoeksleutel naam '%-.192s'"
         eng "Duplicate key name '%-.192s'"
-        jps "'%-.192s' という key の名前は重複しています",
         est "Kattuv võtme nimi '%-.192s'"
         fre "Nom de clef '%-.192s' déjà utilisé"
         ger "Doppelter Name für Schlüssel vorhanden: '%-.192s'"
         greek "Επανάληψη key name '%-.192s'"
         hun "Duplikalt kulcsazonosito: '%-.192s'"
         ita "Nome chiave duplicato '%-.192s'"
-        jpn "'%-.192s' という key の名前は重複しています"
+        jpn "索引名 '%-.192s' は重複しています。"
         kor "중복된 키 이름 : '%-.192s'"
         nor "Nøkkelnavnet '%-.192s' eksisterte fra før"
         norwegian-ny "Nøkkelnamnet '%-.192s' eksisterte frå før"
@@ -1481,32 +1440,31 @@ ER_DUP_KEYNAME 42000 S1009
 # When using this error code, please use ER(ER_DUP_ENTRY_WITH_KEY_NAME)
 # for the message string.  See, for example, code in handler.cc.
 ER_DUP_ENTRY 23000 S1009
-	cze "Zdvojen-Bý klíč '%-.192s' (číslo klíče %d)"
+	cze "Zdvojený klíč '%-.192s' (číslo klíče %d)"
 	dan "Ens værdier '%-.192s' for indeks %d"
 	nla "Dubbele ingang '%-.192s' voor zoeksleutel %d"
 	eng "Duplicate entry '%-.192s' for key %d"
-	jps "'%-.192s' は key %d において重複しています",
 	est "Kattuv väärtus '%-.192s' võtmele %d"
 	fre "Duplicata du champ '%-.192s' pour la clef %d"
 	ger "Doppelter Eintrag '%-.192s' für Schlüssel %d"
 	greek "Διπλή εγγραφή '%-.192s' για το κλειδί %d"
 	hun "Duplikalt bejegyzes '%-.192s' a %d kulcs szerint."
 	ita "Valore duplicato '%-.192s' per la chiave %d"
-	jpn "'%-.192s' は key %d において重複しています"
+	jpn "'%-.192s' は索引 %d で重複しています。"
 	kor "중복된 입력 값 '%-.192s': key %d"
 	nor "Like verdier '%-.192s' for nøkkel %d"
 	norwegian-ny "Like verdiar '%-.192s' for nykkel %d"
-	pol "Powtórzone wyst?pienie '%-.192s' dla klucza %d"
+	pol "Powtórzone wystąpienie '%-.192s' dla klucza %d"
 	por "Entrada '%-.192s' duplicada para a chave %d"
 	rum "Cimpul '%-.192s' e duplicat pentru cheia %d"
 	rus "Дублирующаяся запись '%-.192s' по ключу %d"
 	serbian "Dupliran unos '%-.192s' za ključ '%d'"
 	slo "Opakovaný kľúč '%-.192s' (číslo kľúča %d)"
 	spa "Entrada duplicada '%-.192s' para la clave %d"
-	swe "Dubbel nyckel '%-.192s' för nyckel %d"
+	swe "Dublett '%-.192s' för nyckel %d"
 	ukr "Дублюючий запис '%-.192s' для ключа %d"
 ER_WRONG_FIELD_SPEC 42000 S1009
-        cze "Chybn-Bá specifikace sloupce '%-.192s'"
+        cze "Chybná specifikace sloupce '%-.192s'"
         dan "Forkert kolonnespecifikaton for felt '%-.192s'"
         nla "Verkeerde kolom specificatie voor kolom '%-.192s'"
         eng "Incorrect column specifier for column '%-.192s'"
@@ -1516,6 +1474,7 @@ ER_WRONG_FIELD_SPEC 42000 S1009
         greek "Εσφαλμένο column specifier για το πεδίο '%-.192s'"
         hun "Rossz oszlopazonosito: '%-.192s'"
         ita "Specifica errata per la colonna '%-.192s'"
+        jpn "列 '%-.192s' の定義が不正です。"
         kor "칼럼 '%-.192s'의 부정확한 칼럼 정의자"
         nor "Feil kolonne spesifikator for felt '%-.192s'"
         norwegian-ny "Feil kolonne spesifikator for kolonne '%-.192s'"
@@ -1529,18 +1488,17 @@ ER_WRONG_FIELD_SPEC 42000 S1009
         swe "Felaktigt kolumntyp för kolumn '%-.192s'"
         ukr "Невірний специфікатор стовбця '%-.192s'"
 ER_PARSE_ERROR 42000 s1009
-        cze "%s bl-Bízko '%-.80s' na řádku %d"
+        cze "%s blízko '%-.80s' na řádku %d"
         dan "%s nær '%-.80s' på linje %d"
         nla "%s bij '%-.80s' in regel %d"
         eng "%s near '%-.80s' at line %d"
-        jps "%s  : '%-.80s' 付近  : %d 行目",
         est "%s '%-.80s' ligidal real %d"
         fre "%s près de '%-.80s' à la ligne %d"
         ger "%s bei '%-.80s' in Zeile %d"
         greek "%s πλησίον '%-.80s' στη γραμμή %d"
         hun "A %s a '%-.80s'-hez kozeli a %d sorban"
         ita "%s vicino a '%-.80s' linea %d"
-        jpn "%s  : '%-.80s' 付近  : %d 行目"
+        jpn "%s : '%-.80s' 付近 %d 行目"
         kor "'%s' 에러 같읍니다. ('%-.80s' 명령어 라인 %d)"
         nor "%s nær '%-.80s' på linje %d"
         norwegian-ny "%s attmed '%-.80s' på line %d"
@@ -1554,18 +1512,17 @@ ER_PARSE_ERROR 42000 s1009
         swe "%s nära '%-.80s' på rad %d"
         ukr "%s біля '%-.80s' в строці %d"
 ER_EMPTY_QUERY 42000  
-        cze "V-Býsledek dotazu je prázdný"
+        cze "Výsledek dotazu je prázdný"
         dan "Forespørgsel var tom"
         nla "Query was leeg"
         eng "Query was empty"
-        jps "Query が空です.",
         est "Tühi päring"
         fre "Query est vide"
         ger "Leere Abfrage"
         greek "Το ερώτημα (query) που θέσατε ήταν κενό"
         hun "Ures lekerdezes."
         ita "La query e` vuota"
-        jpn "Query が空です."
+        jpn "クエリが空です。"
         kor "쿼리결과가 없습니다."
         nor "Forespørsel var tom"
         norwegian-ny "Førespurnad var tom"
@@ -1579,18 +1536,17 @@ ER_EMPTY_QUERY 42000
         swe "Frågan var tom"
         ukr "Пустий запит"
 ER_NONUNIQ_TABLE 42000 S1009
-        cze "Nejednozna-Bčná tabulka/alias: '%-.192s'"
+        cze "Nejednoznačná tabulka/alias: '%-.192s'"
         dan "Tabellen/aliaset: '%-.192s' er ikke unikt"
         nla "Niet unieke waarde tabel/alias: '%-.192s'"
         eng "Not unique table/alias: '%-.192s'"
-        jps "'%-.192s' は一意の table/alias 名ではありません",
         est "Ei ole unikaalne tabel/alias '%-.192s'"
         fre "Table/alias: '%-.192s' non unique"
         ger "Tabellenname/Alias '%-.192s' nicht eindeutig"
         greek "Αδύνατη η ανεύρεση unique table/alias: '%-.192s'"
         hun "Nem egyedi tabla/alias: '%-.192s'"
         ita "Tabella/alias non unico: '%-.192s'"
-        jpn "'%-.192s' は一意の table/alias 名ではありません"
+        jpn "表名／別名 '%-.192s' は一意ではありません。"
         kor "Unique 하지 않은 테이블/alias: '%-.192s'"
         nor "Ikke unikt tabell/alias: '%-.192s'"
         norwegian-ny "Ikkje unikt tabell/alias: '%-.192s'"
@@ -1604,7 +1560,7 @@ ER_NONUNIQ_TABLE 42000 S1009
         swe "Icke unikt tabell/alias: '%-.192s'"
         ukr "Неунікальна таблиця/псевдонім: '%-.192s'"
 ER_INVALID_DEFAULT 42000 S1009
-        cze "Chybn-Bá defaultní hodnota pro '%-.192s'"
+        cze "Chybná defaultní hodnota pro '%-.192s'"
         dan "Ugyldig standardværdi for '%-.192s'"
         nla "Foutieve standaard waarde voor '%-.192s'"
         eng "Invalid default value for '%-.192s'"
@@ -1614,6 +1570,7 @@ ER_INVALID_DEFAULT 42000 S1009
         greek "Εσφαλμένη προκαθορισμένη τιμή (default value) για '%-.192s'"
         hun "Ervenytelen ertek: '%-.192s'"
         ita "Valore di default non valido per '%-.192s'"
+        jpn "'%-.192s' へのデフォルト値が無効です。"
         kor "'%-.192s'의 유효하지 못한 디폴트 값을 사용하셨습니다."
         nor "Ugyldig standardverdi for '%-.192s'"
         norwegian-ny "Ugyldig standardverdi for '%-.192s'"
@@ -1627,18 +1584,17 @@ ER_INVALID_DEFAULT 42000 S1009
         swe "Ogiltigt DEFAULT värde för '%-.192s'"
         ukr "Невірне значення по замовчуванню для '%-.192s'"
 ER_MULTIPLE_PRI_KEY 42000 S1009
-        cze "Definov-Báno více primárních klíčů"
+        cze "Definováno více primárních klíčů"
         dan "Flere primærnøgler specificeret"
         nla "Meerdere primaire zoeksleutels gedefinieerd"
         eng "Multiple primary key defined"
-        jps "複数の primary key が定義されました",
         est "Mitut primaarset võtit ei saa olla"
         fre "Plusieurs clefs primaires définies"
         ger "Mehrere Primärschlüssel (PRIMARY KEY) definiert"
         greek "Περισσότερα από ένα primary key ορίστηκαν"
         hun "Tobbszoros elsodleges kulcs definialas."
         ita "Definite piu` chiave primarie"
-        jpn "複数の primary key が定義されました"
+        jpn "PRIMARY KEY が複数定義されています。"
         kor "Multiple primary key가 정의되어 있슴"
         nor "Fleire primærnøkle spesifisert"
         norwegian-ny "Fleire primærnyklar spesifisert"
@@ -1652,18 +1608,17 @@ ER_MULTIPLE_PRI_KEY 42000 S1009
         swe "Flera PRIMARY KEY använda"
         ukr "Первинного ключа визначено неодноразово"
 ER_TOO_MANY_KEYS 42000 S1009
-        cze "Zad-Báno příliš mnoho klíčů, je povoleno nejvíce %d klíčů"
+        cze "Zadáno příliš mnoho klíčů, je povoleno nejvíce %d klíčů"
         dan "For mange nøgler specificeret. Kun %d nøgler må bruges"
         nla "Teveel zoeksleutels gedefinieerd. Maximaal zijn %d zoeksleutels toegestaan"
         eng "Too many keys specified; max %d keys allowed"
-        jps "key の指定が多すぎます.  key は最大 %d までです",
         est "Liiga palju võtmeid. Maksimaalselt võib olla %d võtit"
         fre "Trop de clefs sont définies. Maximum de %d clefs alloué"
         ger "Zu viele Schlüssel definiert. Maximal %d Schlüssel erlaubt"
         greek "Πάρα πολλά key ορίσθηκαν. Το πολύ %d επιτρέπονται"
         hun "Tul sok kulcs. Maximum %d kulcs engedelyezett."
         ita "Troppe chiavi. Sono ammesse max %d chiavi"
-        jpn "key の指定が多すぎます.  key は最大 %d までです"
+        jpn "索引の数が多すぎます。最大 %d 個までです。"
         kor "너무 많은 키가 정의되어 있읍니다.. 최대 %d의 키가 가능함"
         nor "For mange nøkler spesifisert. Maks %d nøkler tillatt"
         norwegian-ny "For mange nykler spesifisert. Maks %d nyklar tillatt"
@@ -1677,7 +1632,7 @@ ER_TOO_MANY_KEYS 42000 S1009
         swe "För många nycklar använda. Man får ha högst %d nycklar"
         ukr "Забагато ключів зазначено. Дозволено не більше %d ключів"
 ER_TOO_MANY_KEY_PARTS 42000 S1009
-        cze "Zad-Báno příliš mnoho část klíčů, je povoleno nejvíce %d částí"
+        cze "Zadáno příliš mnoho část klíčů, je povoleno nejvíce %d částí"
         dan "For mange nøgledele specificeret. Kun %d dele må bruges"
         nla "Teveel zoeksleutel onderdelen gespecificeerd. Maximaal %d onderdelen toegestaan"
         eng "Too many key parts specified; max %d parts allowed"
@@ -1687,6 +1642,7 @@ ER_TOO_MANY_KEY_PARTS 42000 S1009
         greek "Πάρα πολλά key parts ορίσθηκαν. Το πολύ %d επιτρέπονται"
         hun "Tul sok kulcsdarabot definialt. Maximum %d resz engedelyezett"
         ita "Troppe parti di chiave specificate. Sono ammesse max %d parti"
+        jpn "索引のキー列指定が多すぎます。最大 %d 個までです。"
         kor "너무 많은 키 부분(parts)들이 정의되어 있읍니다.. 최대 %d 부분이 가능함"
         nor "For mange nøkkeldeler spesifisert. Maks %d deler tillatt"
         norwegian-ny "For mange nykkeldelar spesifisert. Maks %d delar tillatt"
@@ -1700,18 +1656,17 @@ ER_TOO_MANY_KEY_PARTS 42000 S1009
         swe "För många nyckeldelar använda. Man får ha högst %d nyckeldelar"
         ukr "Забагато частин ключа зазначено. Дозволено не більше %d частин"
 ER_TOO_LONG_KEY 42000 S1009
-        cze "Zadan-Bý klíč byl příliš dlouhý, největší délka klíče je %d"
+        cze "Zadaný klíč byl příliš dlouhý, největší délka klíče je %d"
         dan "Specificeret nøgle var for lang. Maksimal nøglelængde er %d"
         nla "Gespecificeerde zoeksleutel was te lang. De maximale lengte is %d"
         eng "Specified key was too long; max key length is %d bytes"
-        jps "key が長すぎます. key の長さは最大 %d です",
         est "Võti on liiga pikk. Maksimaalne võtmepikkus on %d"
         fre "La clé est trop longue. Longueur maximale: %d"
         ger "Schlüssel ist zu lang. Die maximale Schlüssellänge beträgt %d"
         greek "Το κλειδί που ορίσθηκε είναι πολύ μεγάλο. Το μέγιστο μήκος είναι %d"
         hun "A megadott kulcs tul hosszu. Maximalis kulcshosszusag: %d"
         ita "La chiave specificata e` troppo lunga. La max lunghezza della chiave e` %d"
-        jpn "key が長すぎます. key の長さは最大 %d です"
+        jpn "索引のキーが長すぎます。最大 %d バイトまでです。"
         kor "정의된 키가 너무 깁니다. 최대 키의 길이는 %d입니다."
         nor "Spesifisert nøkkel var for lang. Maks nøkkellengde er is %d"
         norwegian-ny "Spesifisert nykkel var for lang. Maks nykkellengde er %d"
@@ -1725,18 +1680,17 @@ ER_TOO_LONG_KEY 42000 S1009
         swe "För lång nyckel. Högsta tillåtna nyckellängd är %d"
         ukr "Зазначений ключ задовгий. Найбільша довжина ключа %d байтів"
 ER_KEY_COLUMN_DOES_NOT_EXITS 42000 S1009
-        cze "Kl-Bíčový sloupec '%-.192s' v tabulce neexistuje"
+        cze "Klíčový sloupec '%-.192s' v tabulce neexistuje"
         dan "Nøglefeltet '%-.192s' eksisterer ikke i tabellen"
         nla "Zoeksleutel kolom '%-.192s' bestaat niet in tabel"
         eng "Key column '%-.192s' doesn't exist in table"
-        jps "Key column '%-.192s' がテーブルにありません.",
         est "Võtme tulp '%-.192s' puudub tabelis"
         fre "La clé '%-.192s' n'existe pas dans la table"
         ger "In der Tabelle gibt es kein Schlüsselfeld '%-.192s'"
         greek "Το πεδίο κλειδί '%-.192s' δεν υπάρχει στον πίνακα"
         hun "A(z) '%-.192s'kulcsoszlop nem letezik a tablaban"
         ita "La colonna chiave '%-.192s' non esiste nella tabella"
-        jpn "Key column '%-.192s' がテーブルにありません."
+        jpn "キー列 '%-.192s' は表にありません。"
         kor "Key 칼럼 '%-.192s'는 테이블에 존재하지 않습니다."
         nor "Nøkkel felt '%-.192s' eksiterer ikke i tabellen"
         norwegian-ny "Nykkel kolonne '%-.192s' eksiterar ikkje i tabellen"
@@ -1750,7 +1704,7 @@ ER_KEY_COLUMN_DOES_NOT_EXITS 42000 S1009
         swe "Nyckelkolumn '%-.192s' finns inte"
         ukr "Ключовий стовбець '%-.192s' не існує у таблиці"
 ER_BLOB_USED_AS_KEY 42000 S1009
-        cze "Blob sloupec '%-.192s' nem-Bůže být použit jako klíč"
+        cze "Blob sloupec '%-.192s' nemůže být použit jako klíč"
         dan "BLOB feltet '%-.192s' kan ikke bruges ved specifikation af indeks"
         nla "BLOB kolom '%-.192s' kan niet gebruikt worden bij zoeksleutel specificatie"
         eng "BLOB column '%-.192s' can't be used in key specification with the used table type"
@@ -1760,6 +1714,7 @@ ER_BLOB_USED_AS_KEY 42000 S1009
         greek "Πεδίο τύπου Blob '%-.192s' δεν μπορεί να χρησιμοποιηθεί στον ορισμό ενός κλειδιού (key specification)"
         hun "Blob objektum '%-.192s' nem hasznalhato kulcskent"
         ita "La colonna BLOB '%-.192s' non puo` essere usata nella specifica della chiave"
+        jpn "指定されたストレージエンジンでは、BLOB列 '%-.192s' は索引キーにできません。"
         kor "BLOB 칼럼 '%-.192s'는 키 정의에서 사용될 수 없습니다."
         nor "Blob felt '%-.192s' kan ikke brukes ved spesifikasjon av nøkler"
         norwegian-ny "Blob kolonne '%-.192s' kan ikkje brukast ved spesifikasjon av nyklar"
@@ -1773,18 +1728,17 @@ ER_BLOB_USED_AS_KEY 42000 S1009
         swe "En BLOB '%-.192s' kan inte vara nyckel med den använda tabelltypen"
         ukr "BLOB стовбець '%-.192s' не може бути використаний у визначенні ключа в цьому типі таблиці"
 ER_TOO_BIG_FIELDLENGTH 42000 S1009
-        cze "P-Bříliš velká délka sloupce '%-.192s' (nejvíce %lu). Použijte BLOB"
+        cze "Příliš velká délka sloupce '%-.192s' (nejvíce %lu). Použijte BLOB"
         dan "For stor feltlængde for kolonne '%-.192s' (maks = %lu). Brug BLOB i stedet"
         nla "Te grote kolomlengte voor '%-.192s' (max = %lu). Maak hiervoor gebruik van het type BLOB"
         eng "Column length too big for column '%-.192s' (max = %lu); use BLOB or TEXT instead"
-        jps "column '%-.192s' は,確保する column の大きさが多すぎます. (最大 %lu まで). BLOB をかわりに使用してください."
         est "Tulba '%-.192s' pikkus on liiga pikk (maksimaalne pikkus: %lu). Kasuta BLOB väljatüüpi"
         fre "Champ '%-.192s' trop long (max = %lu). Utilisez un BLOB"
         ger "Feldlänge für Feld '%-.192s' zu groß (maximal %lu). BLOB- oder TEXT-Spaltentyp verwenden!"
         greek "Πολύ μεγάλο μήκος για το πεδίο '%-.192s' (max = %lu). Παρακαλώ χρησιμοποιείστε τον τύπο BLOB"
         hun "A(z) '%-.192s' oszlop tul hosszu. (maximum = %lu). Hasznaljon BLOB tipust inkabb."
         ita "La colonna '%-.192s' e` troppo grande (max=%lu). Utilizza un BLOB."
-        jpn "column '%-.192s' は,確保する column の大きさが多すぎます. (最大 %lu まで). BLOB をかわりに使用してください."
+        jpn "列 '%-.192s' のサイズ定義が大きすぎます (最大 %lu まで)。代わりに BLOB または TEXT を使用してください。"
         kor "칼럼 '%-.192s'의 칼럼 길이가 너무 깁니다 (최대 = %lu). 대신에 BLOB를 사용하세요."
         nor "For stor nøkkellengde for kolonne '%-.192s' (maks = %lu). Bruk BLOB istedenfor"
         norwegian-ny "For stor nykkellengde for felt '%-.192s' (maks = %lu). Bruk BLOB istadenfor"
@@ -1798,18 +1752,17 @@ ER_TOO_BIG_FIELDLENGTH 42000 S1009
         swe "För stor kolumnlängd angiven för '%-.192s' (max= %lu). Använd en BLOB instället"
         ukr "Задовга довжина стовбця '%-.192s' (max = %lu). Використайте тип BLOB"
 ER_WRONG_AUTO_KEY 42000 S1009
-        cze "M-Bůžete mít pouze jedno AUTO pole a to musí být definováno jako klíč"
+        cze "Můžete mít pouze jedno AUTO pole a to musí být definováno jako klíč"
         dan "Der kan kun specificeres eet AUTO_INCREMENT-felt, og det skal være indekseret"
         nla "Er kan slechts 1 autofield zijn en deze moet als zoeksleutel worden gedefinieerd."
         eng "Incorrect table definition; there can be only one auto column and it must be defined as a key"
-        jps "テーブルの定義が違います; there can be only one auto column and it must be defined as a key",
         est "Vigane tabelikirjeldus; Tabelis tohib olla üks auto_increment tüüpi tulp ning see peab olema defineeritud võtmena"
         fre "Un seul champ automatique est permis et il doit être indexé"
         ger "Falsche Tabellendefinition. Es darf nur eine AUTO_INCREMENT-Spalte geben, und diese muss als Schlüssel definiert werden"
         greek "Μπορεί να υπάρχει μόνο ένα auto field και πρέπει να έχει ορισθεί σαν key"
         hun "Csak egy auto mezo lehetseges, es azt kulcskent kell definialni."
         ita "Puo` esserci solo un campo AUTO e deve essere definito come chiave"
-        jpn "テーブルの定義が違います; there can be only one auto column and it must be defined as a key"
+        jpn "不正な表定義です。AUTO_INCREMENT列は１個までで、索引を定義する必要があります。"
         kor "부정확한 테이블 정의; 테이블은 하나의 auto 칼럼이 존재하고 키로 정의되어져야 합니다."
         nor "Bare ett auto felt kan være definert som nøkkel."
         norwegian-ny "Bare eitt auto felt kan være definert som nøkkel."
@@ -1823,18 +1776,17 @@ ER_WRONG_AUTO_KEY 42000 S1009
         swe "Det får finnas endast ett AUTO_INCREMENT-fält och detta måste vara en nyckel"
         ukr "Невірне визначення таблиці; Може бути лише один автоматичний стовбець, що повинен бути визначений як ключ"
 ER_READY  
-        cze "%s: p-Břipraven na spojení\nVersion: '%s'  socket: '%s'  port: %d""
+        cze "%s: připraven na spojení\nVersion: '%s'  socket: '%s'  port: %d""
         dan "%s: klar til tilslutninger\nVersion: '%s'  socket: '%s'  port: %d""
         nla "%s: klaar voor verbindingen\nVersion: '%s'  socket: '%s'  port: %d""
         eng "%s: ready for connections.\nVersion: '%s'  socket: '%s'  port: %d"
-        jps "%s: 準備完了¥nVersion: '%s'  socket: '%s'  port: %d"",
         est "%s: ootab ühendusi\nVersion: '%s'  socket: '%s'  port: %d""
         fre "%s: Prêt pour des connexions\nVersion: '%s'  socket: '%s'  port: %d""
         ger "%s: Bereit für Verbindungen.\nVersion: '%s'  Socket: '%s'  Port: %d"
         greek "%s: σε αναμονή συνδέσεων\nVersion: '%s'  socket: '%s'  port: %d""
         hun "%s: kapcsolatra kesz\nVersion: '%s'  socket: '%s'  port: %d""
         ita "%s: Pronto per le connessioni\nVersion: '%s'  socket: '%s'  port: %d""
-        jpn "%s: 準備完了\nVersion: '%s'  socket: '%s'  port: %d""
+        jpn "%s: 接続準備完了。\nバージョン: '%s'  socket: '%s'  port: %d""
         kor "%s: 연결 준비중입니다\nVersion: '%s'  socket: '%s'  port: %d""
         nor "%s: klar for tilkoblinger\nVersion: '%s'  socket: '%s'  port: %d""
         norwegian-ny "%s: klar for tilkoblingar\nVersion: '%s'  socket: '%s'  port: %d""
@@ -1848,7 +1800,7 @@ ER_READY
         swe "%s: klar att ta emot klienter\nVersion: '%s'  socket: '%s'  port: %d""
         ukr "%s: Готовий для з'єднань!\nVersion: '%s'  socket: '%s'  port: %d""
 ER_NORMAL_SHUTDOWN  
-        cze "%s: norm-Bální ukončení\n"
+        cze "%s: normální ukončení\n"
         dan "%s: Normal nedlukning\n"
         nla "%s: Normaal afgesloten \n"
         eng "%s: Normal shutdown\n"
@@ -1858,6 +1810,7 @@ ER_NORMAL_SHUTDOWN
         greek "%s: Φυσιολογική διαδικασία shutdown\n"
         hun "%s: Normal leallitas\n"
         ita "%s: Shutdown normale\n"
+        jpn "%s: 通常シャットダウン\n"
         kor "%s: 정상적인 shutdown\n"
         nor "%s: Normal avslutning\n"
         norwegian-ny "%s: Normal nedkopling\n"
@@ -1871,18 +1824,17 @@ ER_NORMAL_SHUTDOWN
         swe "%s: Normal avslutning\n"
         ukr "%s: Нормальне завершення\n"
 ER_GOT_SIGNAL  
-        cze "%s: p-Břijat signal %d, končím\n"
+        cze "%s: přijat signal %d, končím\n"
         dan "%s: Fangede signal %d. Afslutter!!\n"
         nla "%s: Signaal %d. Systeem breekt af!\n"
         eng "%s: Got signal %d. Aborting!\n"
-        jps "%s: Got signal %d. 中断!¥n",
         est "%s: sain signaali %d. Lõpetan!\n"
         fre "%s: Reçu le signal %d. Abandonne!\n"
         ger "%s: Signal %d erhalten. Abbruch!\n"
         greek "%s: Ελήφθη το μήνυμα %d. Η διαδικασία εγκαταλείπεται!\n"
         hun "%s: %d jelzes. Megszakitva!\n"
         ita "%s: Ricevuto segnale %d. Interruzione!\n"
-        jpn "%s: Got signal %d. 中断!\n"
+        jpn "%s: シグナル %d を受信しました。強制終了します！\n"
         kor "%s: %d 신호가 들어왔음. 중지!\n"
         nor "%s: Oppdaget signal %d. Avslutter!\n"
         norwegian-ny "%s: Oppdaga signal %d. Avsluttar!\n"
@@ -1896,18 +1848,17 @@ ER_GOT_SIGNAL
         swe "%s: Fick signal %d. Avslutar!\n"
         ukr "%s: Отримано сигнал %d. Перериваюсь!\n"
 ER_SHUTDOWN_COMPLETE  
-        cze "%s: ukon-Bčení práce hotovo\n"
+        cze "%s: ukončení práce hotovo\n"
         dan "%s: Server lukket\n"
         nla "%s: Afsluiten afgerond\n"
         eng "%s: Shutdown complete\n"
-        jps "%s: Shutdown 完了¥n",
         est "%s: Lõpp\n"
         fre "%s: Arrêt du serveur terminé\n"
         ger "%s: Herunterfahren beendet\n"
         greek "%s: Η διαδικασία Shutdown ολοκληρώθηκε\n"
         hun "%s: A leallitas kesz\n"
         ita "%s: Shutdown completato\n"
-        jpn "%s: Shutdown 完了\n"
+        jpn "%s: シャットダウン完了\n"
         kor "%s: Shutdown 이 완료됨!\n"
         nor "%s: Avslutning komplett\n"
         norwegian-ny "%s: Nedkopling komplett\n"
@@ -1921,18 +1872,17 @@ ER_SHUTDOWN_COMPLETE
         swe "%s: Avslutning klar\n"
         ukr "%s: Роботу завершено\n"
 ER_FORCING_CLOSE 08S01 
-        cze "%s: n-Básilné uzavření threadu %ld uživatele '%-.48s'\n"
+        cze "%s: násilné uzavření threadu %ld uživatele '%-.48s'\n"
         dan "%s: Forceret nedlukning af tråd: %ld  bruger: '%-.48s'\n"
         nla "%s: Afsluiten afgedwongen van thread %ld  gebruiker: '%-.48s'\n"
         eng "%s: Forcing close of thread %ld  user: '%-.48s'\n"
-        jps "%s: スレッド %ld 強制終了  user: '%-.48s'¥n",
         est "%s: Sulgen jõuga lõime %ld  kasutaja: '%-.48s'\n"
         fre "%s: Arrêt forcé de la tâche (thread) %ld  utilisateur: '%-.48s'\n"
         ger "%s: Thread %ld zwangsweise beendet. Benutzer: '%-.48s'\n"
         greek "%s: Το thread θα κλείσει %ld  user: '%-.48s'\n"
         hun "%s: A(z) %ld thread kenyszeritett zarasa. Felhasznalo: '%-.48s'\n"
         ita "%s: Forzata la chiusura del thread %ld utente: '%-.48s'\n"
-        jpn "%s: スレッド %ld 強制終了  user: '%-.48s'\n"
+        jpn "%s: スレッド %ld を強制終了します (ユーザー: '%-.48s')\n"
         kor "%s: thread %ld의 강제 종료 user: '%-.48s'\n"
         nor "%s: Påtvinget avslutning av tråd %ld  bruker: '%-.48s'\n"
         norwegian-ny "%s: Påtvinga avslutning av tråd %ld  brukar: '%-.48s'\n"
@@ -1946,18 +1896,17 @@ ER_FORCING_CLOSE 08S01
         swe "%s: Stänger av tråd %ld; användare: '%-.48s'\n"
         ukr "%s: Прискорюю закриття гілки %ld користувача: '%-.48s'\n"
 ER_IPSOCK_ERROR 08S01 
-        cze "Nemohu vytvo-Břit IP socket"
+        cze "Nemohu vytvořit IP socket"
         dan "Kan ikke oprette IP socket"
         nla "Kan IP-socket niet openen"
         eng "Can't create IP socket"
-        jps "IP socket が作れません",
         est "Ei suuda luua IP socketit"
         fre "Ne peut créer la connexion IP (socket)"
         ger "Kann IP-Socket nicht erzeugen"
         greek "Δεν είναι δυνατή η δημιουργία IP socket"
         hun "Az IP socket nem hozhato letre"
         ita "Impossibile creare il socket IP"
-        jpn "IP socket が作れません"
+        jpn "IPソケットを作成できません。"
         kor "IP 소켓을 만들지 못했습니다."
         nor "Kan ikke opprette IP socket"
         norwegian-ny "Kan ikkje opprette IP socket"
@@ -1971,18 +1920,17 @@ ER_IPSOCK_ERROR 08S01
         swe "Kan inte skapa IP-socket"
         ukr "Не можу створити IP роз'єм"
 ER_NO_SUCH_INDEX 42S12 S1009
-        cze "Tabulka '%-.192s' nem-Bá index odpovídající CREATE INDEX. Vytvořte tabulku znovu"
+        cze "Tabulka '%-.192s' nemá index odpovídající CREATE INDEX. Vytvořte tabulku znovu"
         dan "Tabellen '%-.192s' har ikke den nøgle, som blev brugt i CREATE INDEX. Genopret tabellen"
         nla "Tabel '%-.192s' heeft geen INDEX zoals deze gemaakt worden met CREATE INDEX. Maak de tabel opnieuw"
         eng "Table '%-.192s' has no index like the one used in CREATE INDEX; recreate the table"
-        jps "Table '%-.192s' はそのような index を持っていません(CREATE INDEX 実行時に指定されていません). テーブルを作り直してください",
         est "Tabelil '%-.192s' puuduvad võtmed. Loo tabel uuesti"
         fre "La table '%-.192s' n'a pas d'index comme celle utilisée dans CREATE INDEX. Recréez la table"
         ger "Tabelle '%-.192s' besitzt keinen wie den in CREATE INDEX verwendeten Index. Tabelle neu anlegen"
         greek "Ο πίνακας '%-.192s' δεν έχει ευρετήριο (index) σαν αυτό που χρησιμοποιείτε στην CREATE INDEX. Παρακαλώ, ξαναδημιουργήστε τον πίνακα"
         hun "A(z) '%-.192s' tablahoz nincs meg a CREATE INDEX altal hasznalt index. Alakitsa at a tablat"
         ita "La tabella '%-.192s' non ha nessun indice come quello specificatato dalla CREATE INDEX. Ricrea la tabella"
-        jpn "Table '%-.192s' はそのような index を持っていません(CREATE INDEX 実行時に指定されていません). テーブルを作り直してください"
+        jpn "表 '%-.192s' に以前CREATE INDEXで作成された索引がありません。表を作り直してください。"
         kor "테이블 '%-.192s'는 인덱스를 만들지 않았습니다. alter 테이블명령을 이용하여 테이블을 수정하세요..."
         nor "Tabellen '%-.192s' har ingen index som den som er brukt i CREATE INDEX. Gjenopprett tabellen"
         norwegian-ny "Tabellen '%-.192s' har ingen index som den som er brukt i CREATE INDEX. Oprett tabellen på nytt"
@@ -1996,7 +1944,7 @@ ER_NO_SUCH_INDEX 42S12 S1009
         swe "Tabellen '%-.192s' har inget index som motsvarar det angivna i CREATE INDEX. Skapa om tabellen"
         ukr "Таблиця '%-.192s' має індекс, що не співпадає з вказанним у CREATE INDEX. Створіть таблицю знову"
 ER_WRONG_FIELD_TERMINATORS 42000 S1009
-        cze "Argument separ-Bátoru položek nebyl očekáván. Přečtěte si manuál"
+        cze "Argument separátoru položek nebyl očekáván. Přečtěte si manuál"
         dan "Felt adskiller er ikke som forventet, se dokumentationen"
         nla "De argumenten om velden te scheiden zijn anders dan verwacht. Raadpleeg de handleiding"
         eng "Field separator argument is not what is expected; check the manual"
@@ -2006,6 +1954,7 @@ ER_WRONG_FIELD_TERMINATORS 42000 S1009
         greek "Ο διαχωριστής πεδίων δεν είναι αυτός που αναμενόταν. Παρακαλώ ανατρέξτε στο manual"
         hun "A mezoelvalaszto argumentumok nem egyeznek meg a varttal. Nezze meg a kezikonyvben!"
         ita "L'argomento 'Field separator' non e` quello atteso. Controlla il manuale"
+        jpn "フィールド区切り文字が予期せぬ使われ方をしています。マニュアルを確認して下さい。"
         kor "필드 구분자 인수들이 완전하지 않습니다. 메뉴얼을 찾아 보세요."
         nor "Felt skiller argumentene er ikke som forventet, se dokumentasjonen"
         norwegian-ny "Felt skiljer argumenta er ikkje som venta, sjå dokumentasjonen"
@@ -2019,7 +1968,7 @@ ER_WRONG_FIELD_TERMINATORS 42000 S1009
         swe "Fältseparatorerna är vad som förväntades. Kontrollera mot manualen"
         ukr "Хибний розділювач полів. Почитайте документацію"
 ER_BLOBS_AND_NO_TERMINATED 42000 S1009
-        cze "Nen-Bí možné použít pevný rowlength s BLOBem. Použijte 'fields terminated by'."
+        cze "Není možné použít pevný rowlength s BLOBem. Použijte 'fields terminated by'."
         dan "Man kan ikke bruge faste feltlængder med BLOB. Brug i stedet 'fields terminated by'."
         nla "Bij het gebruik van BLOBs is het niet mogelijk om vaste rijlengte te gebruiken. Maak s.v.p. gebruik van 'fields terminated by'."
         eng "You can't use fixed rowlength with BLOBs; please use 'fields terminated by'"
@@ -2029,7 +1978,7 @@ ER_BLOBS_AND_NO_TERMINATED 42000 S1009
         greek "Δεν μπορείτε να χρησιμοποιήσετε fixed rowlength σε BLOBs. Παρακαλώ χρησιμοποιείστε 'fields terminated by'."
         hun "Fix hosszusagu BLOB-ok nem hasznalhatok. Hasznalja a 'mezoelvalaszto jelet' ."
         ita "Non possono essere usate righe a lunghezza fissa con i BLOB. Usa 'FIELDS TERMINATED BY'."
-        jpn "You can't use fixed rowlength with BLOBs; please use 'fields terminated by'."
+        jpn "BLOBには固定長レコードが使用できません。'FIELDS TERMINATED BY'句を使用して下さい。"
         kor "BLOB로는 고정길이의 lowlength를 사용할 수 없습니다. 'fields terminated by'를 사용하세요."
         nor "En kan ikke bruke faste feltlengder med BLOB. Vennlisgt bruk 'fields terminated by'."
         norwegian-ny "Ein kan ikkje bruke faste feltlengder med BLOB. Vennlisgt bruk 'fields terminated by'."
@@ -2043,18 +1992,17 @@ ER_BLOBS_AND_NO_TERMINATED 42000 S1009
         swe "Man kan inte använda fast radlängd med blobs. Använd 'fields terminated by'"
         ukr "Не можна використовувати сталу довжину строки з BLOB. Зкористайтеся 'fields terminated by'"
 ER_TEXTFILE_NOT_READABLE  
-        cze "Soubor '%-.128s' mus-Bí být v adresáři databáze nebo čitelný pro všechny"
+        cze "Soubor '%-.128s' musí být v adresáři databáze nebo čitelný pro všechny"
         dan "Filen '%-.128s' skal være i database-folderen, eller kunne læses af alle"
         nla "Het bestand '%-.128s' dient in de database directory voor the komen of leesbaar voor iedereen te zijn."
         eng "The file '%-.128s' must be in the database directory or be readable by all"
-        jps "ファイル '%-.128s' は databse の directory にあるか全てのユーザーが読めるように許可されていなければなりません.",
         est "Fail '%-.128s' peab asuma andmebaasi kataloogis või olema kõigile loetav"
         fre "Le fichier '%-.128s' doit être dans le répertoire de la base et lisible par tous"
         ger "Datei '%-.128s' muss im Datenbank-Verzeichnis vorhanden oder lesbar für alle sein"
         greek "Το αρχείο '%-.128s' πρέπει να υπάρχει στο database directory ή να μπορεί να διαβαστεί από όλους"
         hun "A(z) '%-.128s'-nak az adatbazis konyvtarban kell lennie, vagy mindenki szamara olvashatonak"
         ita "Il file '%-.128s' deve essere nella directory del database e deve essere leggibile da tutti"
-        jpn "ファイル '%-.128s' は databse の directory にあるか全てのユーザーが読めるように許可されていなければなりません."
+        jpn "ファイル '%-.128s' はデータベースディレクトリにあるか、全てのユーザーから読める必要があります。"
         kor "'%-.128s' 화일는 데이타베이스 디렉토리에 존재하거나 모두에게 읽기 가능하여야 합니다."
         nor "Filen '%-.128s' må være i database-katalogen for å være lesbar for alle"
         norwegian-ny "Filen '%-.128s' må være i database-katalogen for å være lesbar for alle"
@@ -2068,18 +2016,17 @@ ER_TEXTFILE_NOT_READABLE
         swe "Textfilen '%-.128s' måste finnas i databasbiblioteket eller vara läsbar för alla"
         ukr "Файл '%-.128s' повинен бути у теці бази данних або мати встановлене право на читання для усіх"
 ER_FILE_EXISTS_ERROR  
-        cze "Soubor '%-.200s' ji-Bž existuje"
+        cze "Soubor '%-.200s' již existuje"
         dan "Filen '%-.200s' eksisterer allerede"
         nla "Het bestand '%-.200s' bestaat reeds"
         eng "File '%-.200s' already exists"
-        jps "File '%-.200s' は既に存在します",
         est "Fail '%-.200s' juba eksisteerib"
         fre "Le fichier '%-.200s' existe déjà"
         ger "Datei '%-.200s' bereits vorhanden"
         greek "Το αρχείο '%-.200s' υπάρχει ήδη"
         hun "A '%-.200s' file mar letezik."
         ita "Il file '%-.200s' esiste gia`"
-        jpn "File '%-.200s' は既に存在します"
+        jpn "ファイル '%-.200s' はすでに存在します。"
         kor "'%-.200s' 화일은 이미 존재합니다."
         nor "Filen '%-.200s' eksisterte allerede"
         norwegian-ny "Filen '%-.200s' eksisterte allereide"
@@ -2093,18 +2040,17 @@ ER_FILE_EXISTS_ERROR
         swe "Filen '%-.200s' existerar redan"
         ukr "Файл '%-.200s' вже існує"
 ER_LOAD_INFO  
-        cze "Z-Báznamů: %ld  Vymazáno: %ld  Přeskočeno: %ld  Varování: %ld"
+        cze "Záznamů: %ld  Vymazáno: %ld  Přeskočeno: %ld  Varování: %ld"
         dan "Poster: %ld  Fjernet: %ld  Sprunget over: %ld  Advarsler: %ld"
         nla "Records: %ld  Verwijderd: %ld  Overgeslagen: %ld  Waarschuwingen: %ld"
         eng "Records: %ld  Deleted: %ld  Skipped: %ld  Warnings: %ld"
-        jps "レコード数: %ld  削除: %ld  Skipped: %ld  Warnings: %ld",
         est "Kirjeid: %ld  Kustutatud: %ld  Vahele jäetud: %ld  Hoiatusi: %ld"
         fre "Enregistrements: %ld  Effacés: %ld  Non traités: %ld  Avertissements: %ld"
         ger "Datensätze: %ld  Gelöscht: %ld  Ausgelassen: %ld  Warnungen: %ld"
         greek "Εγγραφές: %ld  Διαγραφές: %ld  Παρεκάμφθησαν: %ld  Προειδοποιήσεις: %ld"
         hun "Rekordok: %ld  Torolve: %ld  Skipped: %ld  Warnings: %ld"
         ita "Records: %ld  Cancellati: %ld  Saltati: %ld  Avvertimenti: %ld"
-        jpn "レコード数: %ld  削除: %ld  Skipped: %ld  Warnings: %ld"
+        jpn "レコード数: %ld  削除: %ld  スキップ: %ld  警告: %ld"
         kor "레코드: %ld개  삭제: %ld개  스킵: %ld개  경고: %ld개"
         nor "Poster: %ld  Fjernet: %ld  Hoppet over: %ld  Advarsler: %ld"
         norwegian-ny "Poster: %ld  Fjerna: %ld  Hoppa over: %ld  Åtvaringar: %ld"
@@ -2118,11 +2064,10 @@ ER_LOAD_INFO
         swe "Rader: %ld  Bortagna: %ld  Dubletter: %ld  Varningar: %ld"
         ukr "Записів: %ld  Видалено: %ld  Пропущено: %ld  Застережень: %ld"
 ER_ALTER_INFO  
-        cze "Z-Báznamů: %ld  Zdvojených: %ld"
+        cze "Záznamů: %ld  Zdvojených: %ld"
         dan "Poster: %ld  Ens: %ld"
         nla "Records: %ld  Dubbel: %ld"
         eng "Records: %ld  Duplicates: %ld"
-        jps "レコード数: %ld  重複: %ld",
         est "Kirjeid: %ld  Kattuvaid: %ld"
         fre "Enregistrements: %ld  Doublons: %ld"
         ger "Datensätze: %ld  Duplikate: %ld"
@@ -2143,7 +2088,7 @@ ER_ALTER_INFO
         swe "Rader: %ld  Dubletter: %ld"
         ukr "Записів: %ld  Дублікатів: %ld"
 ER_WRONG_SUB_KEY  
-        cze "Chybn-Bá podčást klíče -- není to řetězec nebo je delší než délka části klíče"
+        cze "Chybná podčást klíče -- není to řetězec nebo je delší než délka části klíče"
         dan "Forkert indeksdel. Den anvendte nøgledel er ikke en streng eller længden er større end nøglelængden"
         nla "Foutief sub-gedeelte van de zoeksleutel. De gebruikte zoeksleutel is geen onderdeel van een string of of de gebruikte lengte is langer dan de zoeksleutel"
         eng "Incorrect prefix key; the used key part isn't a string, the used length is longer than the key part, or the storage engine doesn't support unique prefix keys"
@@ -2153,7 +2098,7 @@ ER_WRONG_SUB_KEY
         greek "Εσφαλμένο sub part key. Το χρησιμοποιούμενο key part δεν είναι string ή το μήκος του είναι μεγαλύτερο"
         hun "Rossz alkulcs. A hasznalt kulcsresz nem karaktersorozat vagy hosszabb, mint a kulcsresz"
         ita "Sotto-parte della chiave errata. La parte di chiave utilizzata non e` una stringa o la lunghezza e` maggiore della parte di chiave."
-        jpn "Incorrect prefix key; the used key part isn't a string or the used length is longer than the key part"
+        jpn "キーのプレフィックスが不正です。キーが文字列ではないか、プレフィックス長がキーよりも長いか、ストレージエンジンが一意索引のプレフィックス指定をサポートしていません。"
         kor "부정확한 서버 파트 키. 사용된 키 파트가 스트링이 아니거나 키 파트의 길이가 너무 깁니다."
         nor "Feil delnøkkel. Den brukte delnøkkelen er ikke en streng eller den oppgitte lengde er lengre enn nøkkel lengden"
         norwegian-ny "Feil delnykkel. Den brukte delnykkelen er ikkje ein streng eller den oppgitte lengda er lengre enn nykkellengden"
@@ -2167,18 +2112,17 @@ ER_WRONG_SUB_KEY
         swe "Felaktig delnyckel. Nyckeldelen är inte en sträng eller den angivna längden är längre än kolumnlängden"
         ukr "Невірна частина ключа. Використана частина ключа не є строкою, задовга або вказівник таблиці не підтримує унікальних частин ключей"
 ER_CANT_REMOVE_ALL_FIELDS 42000 
-        cze "Nen-Bí možné vymazat všechny položky s ALTER TABLE. Použijte DROP TABLE"
+        cze "Není možné vymazat všechny položky s ALTER TABLE. Použijte DROP TABLE"
         dan "Man kan ikke slette alle felter med ALTER TABLE. Brug DROP TABLE i stedet."
         nla "Het is niet mogelijk alle velden te verwijderen met ALTER TABLE. Gebruik a.u.b. DROP TABLE hiervoor!"
         eng "You can't delete all columns with ALTER TABLE; use DROP TABLE instead"
-        jps "ALTER TABLE で全ての column は削除できません. DROP TABLE を使用してください",
         est "ALTER TABLE kasutades ei saa kustutada kõiki tulpasid. Kustuta tabel DROP TABLE abil"
         fre "Vous ne pouvez effacer tous les champs avec ALTER TABLE. Utilisez DROP TABLE"
         ger "Mit ALTER TABLE können nicht alle Felder auf einmal gelöscht werden. Dafür DROP TABLE verwenden"
         greek "Δεν είναι δυνατή η διαγραφή όλων των πεδίων με ALTER TABLE. Παρακαλώ χρησιμοποιείστε DROP TABLE"
         hun "Az osszes mezo nem torolheto az ALTER TABLE-lel. Hasznalja a DROP TABLE-t helyette"
         ita "Non si possono cancellare tutti i campi con una ALTER TABLE. Utilizzare DROP TABLE"
-        jpn "ALTER TABLE で全ての column は削除できません. DROP TABLE を使用してください"
+        jpn "ALTER TABLE では全ての列の削除はできません。DROP TABLE を使用してください。"
         kor "ALTER TABLE 명령으로는 모든 칼럼을 지울 수 없습니다. DROP TABLE 명령을 이용하세요."
         nor "En kan ikke slette alle felt med ALTER TABLE. Bruk DROP TABLE isteden."
         norwegian-ny "Ein kan ikkje slette alle felt med ALTER TABLE. Bruk DROP TABLE istadenfor."
@@ -2192,18 +2136,17 @@ ER_CANT_REMOVE_ALL_FIELDS 42000
         swe "Man kan inte radera alla fält med ALTER TABLE. Använd DROP TABLE istället"
         ukr "Не можливо видалити всі стовбці за допомогою ALTER TABLE. Для цього скористайтеся DROP TABLE"
 ER_CANT_DROP_FIELD_OR_KEY 42000 
-        cze "Nemohu zru-Bšit '%-.192s' (provést DROP). Zkontrolujte, zda neexistují záznamy/klíče"
+        cze "Nemohu zrušit '%-.192s' (provést DROP). Zkontrolujte, zda neexistují záznamy/klíče"
         dan "Kan ikke udføre DROP '%-.192s'. Undersøg om feltet/nøglen eksisterer."
         nla "Kan '%-.192s' niet weggooien. Controleer of het veld of de zoeksleutel daadwerkelijk bestaat."
         eng "Can't DROP '%-.192s'; check that column/key exists"
-        jps "'%-.192s' を破棄できませんでした; check that column/key exists",
         est "Ei suuda kustutada '%-.192s'. Kontrolli kas tulp/võti eksisteerib"
         fre "Ne peut effacer (DROP) '%-.192s'. Vérifiez s'il existe"
         ger "Kann '%-.192s' nicht löschen. Existiert die Spalte oder der Schlüssel?"
         greek "Αδύνατη η διαγραφή (DROP) '%-.192s'. Παρακαλώ ελέγξτε αν το πεδίο/κλειδί υπάρχει"
         hun "A DROP '%-.192s' nem lehetseges. Ellenorizze, hogy a mezo/kulcs letezik-e"
         ita "Impossibile cancellare '%-.192s'. Controllare che il campo chiave esista"
-        jpn "'%-.192s' を破棄できませんでした; check that column/key exists"
+        jpn "'%-.192s' を削除できません。列／索引の存在を確認して下さい。"
         kor "'%-.192s'를 DROP할 수 없습니다. 칼럼이나 키가 존재하는지 채크하세요."
         nor "Kan ikke DROP '%-.192s'. Undersøk om felt/nøkkel eksisterer."
         norwegian-ny "Kan ikkje DROP '%-.192s'. Undersøk om felt/nøkkel eksisterar."
@@ -2217,18 +2160,17 @@ ER_CANT_DROP_FIELD_OR_KEY 42000
         swe "Kan inte ta bort '%-.192s'. Kontrollera att fältet/nyckel finns"
         ukr "Не можу DROP '%-.192s'. Перевірте, чи цей стовбець/ключ існує"
 ER_INSERT_INFO  
-        cze "Z-Báznamů: %ld  Zdvojených: %ld  Varování: %ld"
+        cze "Záznamů: %ld  Zdvojených: %ld  Varování: %ld"
         dan "Poster: %ld  Ens: %ld  Advarsler: %ld"
         nla "Records: %ld  Dubbel: %ld  Waarschuwing: %ld"
         eng "Records: %ld  Duplicates: %ld  Warnings: %ld"
-        jps "レコード数: %ld  重複数: %ld  Warnings: %ld",
         est "Kirjeid: %ld  Kattuvaid: %ld  Hoiatusi: %ld"
         fre "Enregistrements: %ld  Doublons: %ld  Avertissements: %ld"
         ger "Datensätze: %ld  Duplikate: %ld  Warnungen: %ld"
         greek "Εγγραφές: %ld  Επαναλήψεις: %ld  Προειδοποιήσεις: %ld"
         hun "Rekordok: %ld  Duplikalva: %ld  Warnings: %ld"
         ita "Records: %ld  Duplicati: %ld  Avvertimenti: %ld"
-        jpn "レコード数: %ld  重複数: %ld  Warnings: %ld"
+        jpn "レコード数: %ld  重複数: %ld  警告: %ld"
         kor "레코드: %ld개  중복: %ld개  경고: %ld개"
         nor "Poster: %ld  Like: %ld  Advarsler: %ld"
         norwegian-ny "Postar: %ld  Like: %ld  Åtvaringar: %ld"
@@ -2244,22 +2186,22 @@ ER_INSERT_INFO
 ER_UPDATE_TABLE_USED  
         eng "You can't specify target table '%-.192s' for update in FROM clause"
         ger "Die Verwendung der zu aktualisierenden Zieltabelle '%-.192s' ist in der FROM-Klausel nicht zulässig."
+        jpn "FROM句にある表 '%-.192s' はUPDATEの対象にできません。"
         rus "Не допускается указание таблицы '%-.192s' в списке таблиц FROM для внесения в нее изменений"
         swe "INSERT-table '%-.192s' får inte finnas i FROM tabell-listan"
         ukr "Таблиця '%-.192s' що змінюється не дозволена у переліку таблиць FROM"
 ER_NO_SUCH_THREAD  
-        cze "Nezn-Bámá identifikace threadu: %lu"
+        cze "Neznámá identifikace threadu: %lu"
         dan "Ukendt tråd id: %lu"
         nla "Onbekend thread id: %lu"
         eng "Unknown thread id: %lu"
-        jps "thread id: %lu はありません",
         est "Tundmatu lõim: %lu"
         fre "Numéro de tâche inconnu: %lu"
         ger "Unbekannte Thread-ID: %lu"
         greek "Αγνωστο thread id: %lu"
         hun "Ervenytelen szal (thread) id: %lu"
         ita "Thread id: %lu sconosciuto"
-        jpn "thread id: %lu はありません"
+        jpn "不明なスレッドIDです: %lu"
         kor "알수 없는 쓰레드 id: %lu"
         nor "Ukjent tråd id: %lu"
         norwegian-ny "Ukjent tråd id: %lu"
@@ -2273,18 +2215,17 @@ ER_NO_SUCH_THREAD
         swe "Finns ingen tråd med id %lu"
         ukr "Невідомий ідентифікатор гілки: %lu"
 ER_KILL_DENIED_ERROR  
-        cze "Nejste vlastn-Bíkem threadu %lu"
+        cze "Nejste vlastníkem threadu %lu"
         dan "Du er ikke ejer af tråden %lu"
         nla "U bent geen bezitter van thread %lu"
         eng "You are not owner of thread %lu"
-        jps "thread %lu のオーナーではありません",
         est "Ei ole lõime %lu omanik"
         fre "Vous n'êtes pas propriétaire de la tâche no: %lu"
         ger "Sie sind nicht Eigentümer von Thread %lu"
         greek "Δεν είσθε owner του thread %lu"
         hun "A %lu thread-nek mas a tulajdonosa"
         ita "Utente non proprietario del thread %lu"
-        jpn "thread %lu のオーナーではありません"
+        jpn "スレッド %lu のオーナーではありません。"
         kor "쓰레드(Thread) %lu의 소유자가 아닙니다."
         nor "Du er ikke eier av tråden %lu"
         norwegian-ny "Du er ikkje eigar av tråd %lu"
@@ -2298,7 +2239,7 @@ ER_KILL_DENIED_ERROR
         swe "Du är inte ägare till tråd %lu"
         ukr "Ви не володар гілки %lu"
 ER_NO_TABLES_USED  
-        cze "Nejsou pou-Bžity žádné tabulky"
+        cze "Nejsou použity žádné tabulky"
         dan "Ingen tabeller i brug"
         nla "Geen tabellen gebruikt."
         eng "No tables used"
@@ -2308,6 +2249,7 @@ ER_NO_TABLES_USED
         greek "Δεν χρησιμοποιήθηκαν πίνακες"
         hun "Nincs hasznalt tabla"
         ita "Nessuna tabella usata"
+        jpn "表が指定されていません。"
         kor "어떤 테이블도 사용되지 않았습니다."
         nor "Ingen tabeller i bruk"
         norwegian-ny "Ingen tabellar i bruk"
@@ -2321,7 +2263,7 @@ ER_NO_TABLES_USED
         swe "Inga tabeller angivna"
         ukr "Не використано таблиць"
 ER_TOO_BIG_SET  
-        cze "P-Bříliš mnoho řetězců pro sloupec %-.192s a SET"
+        cze "Příliš mnoho řetězců pro sloupec %-.192s a SET"
         dan "For mange tekststrenge til specifikationen af SET i kolonne %-.192s"
         nla "Teveel strings voor kolom %-.192s en SET"
         eng "Too many strings for column %-.192s and SET"
@@ -2331,6 +2273,7 @@ ER_TOO_BIG_SET
         greek "Πάρα πολλά strings για το πεδίο %-.192s και SET"
         hun "Tul sok karakter: %-.192s es SET"
         ita "Troppe stringhe per la colonna %-.192s e la SET"
+        jpn "SET型の列 '%-.192s' のメンバーの数が多すぎます。"
         kor "칼럼 %-.192s와 SET에서 스트링이 너무 많습니다."
         nor "For mange tekststrenger kolonne %-.192s og SET"
         norwegian-ny "For mange tekststrengar felt %-.192s og SET"
@@ -2344,7 +2287,7 @@ ER_TOO_BIG_SET
         swe "För många alternativ till kolumn %-.192s för SET"
         ukr "Забагато строк для стовбця %-.192s та SET"
 ER_NO_UNIQUE_LOGFILE  
-        cze "Nemohu vytvo-Břit jednoznačné jméno logovacího souboru %-.200s.(1-999)\n"
+        cze "Nemohu vytvořit jednoznačné jméno logovacího souboru %-.200s.(1-999)\n"
         dan "Kan ikke lave unikt log-filnavn %-.200s.(1-999)\n"
         nla "Het is niet mogelijk een unieke naam te maken voor de logfile %-.200s.(1-999)\n"
         eng "Can't generate a unique log-filename %-.200s.(1-999)\n"
@@ -2354,6 +2297,7 @@ ER_NO_UNIQUE_LOGFILE
         greek "Αδύνατη η δημιουργία unique log-filename %-.200s.(1-999)\n"
         hun "Egyedi log-filenev nem generalhato: %-.200s.(1-999)\n"
         ita "Impossibile generare un nome del file log unico %-.200s.(1-999)\n"
+        jpn "一意なログファイル名 %-.200s.(1-999) を生成できません。\n"
         kor "Unique 로그화일 '%-.200s'를 만들수 없습니다.(1-999)\n"
         nor "Kan ikke lage unikt loggfilnavn %-.200s.(1-999)\n"
         norwegian-ny "Kan ikkje lage unikt loggfilnavn %-.200s.(1-999)\n"
@@ -2367,18 +2311,17 @@ ER_NO_UNIQUE_LOGFILE
         swe "Kan inte generera ett unikt filnamn %-.200s.(1-999)\n"
         ukr "Не можу згенерувати унікальне ім'я log-файлу %-.200s.(1-999)\n"
 ER_TABLE_NOT_LOCKED_FOR_WRITE  
-        cze "Tabulka '%-.192s' byla zam-Bčena s READ a nemůže být změněna"
+        cze "Tabulka '%-.192s' byla zamčena s READ a nemůže být změněna"
         dan "Tabellen '%-.192s' var låst med READ lås og kan ikke opdateres"
         nla "Tabel '%-.192s' was gelocked met een lock om te lezen. Derhalve kunnen geen wijzigingen worden opgeslagen."
         eng "Table '%-.192s' was locked with a READ lock and can't be updated"
-        jps "Table '%-.192s' は READ lock になっていて、更新はできません",
         est "Tabel '%-.192s' on lukustatud READ lukuga ning ei ole muudetav"
         fre "Table '%-.192s' verrouillée lecture (READ): modification impossible"
         ger "Tabelle '%-.192s' ist mit Lesesperre versehen und kann nicht aktualisiert werden"
         greek "Ο πίνακας '%-.192s' έχει κλειδωθεί με READ lock και δεν επιτρέπονται αλλαγές"
         hun "A(z) '%-.192s' tabla zarolva lett (READ lock) es nem lehet frissiteni"
         ita "La tabella '%-.192s' e` soggetta a lock in lettura e non puo` essere aggiornata"
-        jpn "Table '%-.192s' は READ lock になっていて、更新はできません"
+        jpn "表 '%-.192s' はREADロックされていて、更新できません。"
         kor "테이블 '%-.192s'는 READ 락이 잠겨있어서 갱신할 수 없습니다."
         nor "Tabellen '%-.192s' var låst med READ lås og kan ikke oppdateres"
         norwegian-ny "Tabellen '%-.192s' var låst med READ lås og kan ikkje oppdaterast"
@@ -2392,18 +2335,17 @@ ER_TABLE_NOT_LOCKED_FOR_WRITE
         swe "Tabell '%-.192s' kan inte uppdateras emedan den är låst för läsning"
         ukr "Таблицю '%-.192s' заблоковано тільки для читання, тому її не можна оновити"
 ER_TABLE_NOT_LOCKED  
-        cze "Tabulka '%-.192s' nebyla zam-Bčena s LOCK TABLES"
+        cze "Tabulka '%-.192s' nebyla zamčena s LOCK TABLES"
         dan "Tabellen '%-.192s' var ikke låst med LOCK TABLES"
         nla "Tabel '%-.192s' was niet gelocked met LOCK TABLES"
         eng "Table '%-.192s' was not locked with LOCK TABLES"
-        jps "Table '%-.192s' は LOCK TABLES によってロックされていません",
         est "Tabel '%-.192s' ei ole lukustatud käsuga LOCK TABLES"
         fre "Table '%-.192s' non verrouillée: utilisez LOCK TABLES"
         ger "Tabelle '%-.192s' wurde nicht mit LOCK TABLES gesperrt"
         greek "Ο πίνακας '%-.192s' δεν έχει κλειδωθεί με LOCK TABLES"
         hun "A(z) '%-.192s' tabla nincs zarolva a LOCK TABLES-szel"
         ita "Non e` stato impostato il lock per la tabella '%-.192s' con LOCK TABLES"
-        jpn "Table '%-.192s' は LOCK TABLES によってロックされていません"
+        jpn "表 '%-.192s' は LOCK TABLES でロックされていません。"
         kor "테이블 '%-.192s'는 LOCK TABLES 명령으로 잠기지 않았습니다."
         nor "Tabellen '%-.192s' var ikke låst med LOCK TABLES"
         norwegian-ny "Tabellen '%-.192s' var ikkje låst med LOCK TABLES"
@@ -2417,7 +2359,7 @@ ER_TABLE_NOT_LOCKED
         swe "Tabell '%-.192s' är inte låst med LOCK TABLES"
         ukr "Таблицю '%-.192s' не було блоковано з LOCK TABLES"
 ER_BLOB_CANT_HAVE_DEFAULT 42000 
-        cze "Blob polo-Bžka '%-.192s' nemůže mít defaultní hodnotu"
+        cze "Blob položka '%-.192s' nemůže mít defaultní hodnotu"
         dan "BLOB feltet '%-.192s' kan ikke have en standard værdi"
         nla "Blob veld '%-.192s' can geen standaardwaarde bevatten"
         eng "BLOB/TEXT column '%-.192s' can't have a default value"
@@ -2427,7 +2369,7 @@ ER_BLOB_CANT_HAVE_DEFAULT 42000
         greek "Τα Blob πεδία '%-.192s' δεν μπορούν να έχουν προκαθορισμένες τιμές (default value)"
         hun "A(z) '%-.192s' blob objektumnak nem lehet alapertelmezett erteke"
         ita "Il campo BLOB '%-.192s' non puo` avere un valore di default"
-        jpn "BLOB column '%-.192s' can't have a default value"
+        jpn "BLOB/TEXT 列 '%-.192s' にはデフォルト値を指定できません。"
         kor "BLOB 칼럼 '%-.192s' 는 디폴트 값을 가질 수 없습니다."
         nor "Blob feltet '%-.192s' kan ikke ha en standard verdi"
         norwegian-ny "Blob feltet '%-.192s' kan ikkje ha ein standard verdi"
@@ -2441,18 +2383,17 @@ ER_BLOB_CANT_HAVE_DEFAULT 42000
         swe "BLOB fält '%-.192s' kan inte ha ett DEFAULT-värde"
         ukr "Стовбець BLOB '%-.192s' не може мати значення по замовчуванню"
 ER_WRONG_DB_NAME 42000 
-        cze "Nep-Břípustné jméno databáze '%-.100s'"
+        cze "Nepřípustné jméno databáze '%-.100s'"
         dan "Ugyldigt database navn '%-.100s'"
         nla "Databasenaam '%-.100s' is niet getoegestaan"
         eng "Incorrect database name '%-.100s'"
-        jps "指定した database 名 '%-.100s' が間違っています",
         est "Vigane andmebaasi nimi '%-.100s'"
         fre "Nom de base de donnée illégal: '%-.100s'"
         ger "Unerlaubter Datenbankname '%-.100s'"
         greek "Λάθος όνομα βάσης δεδομένων '%-.100s'"
         hun "Hibas adatbazisnev: '%-.100s'"
         ita "Nome database errato '%-.100s'"
-        jpn "指定した database 名 '%-.100s' が間違っています"
+        jpn "データベース名 '%-.100s' は不正です。"
         kor "'%-.100s' 데이타베이스의 이름이 부정확합니다."
         nor "Ugyldig database navn '%-.100s'"
         norwegian-ny "Ugyldig database namn '%-.100s'"
@@ -2466,18 +2407,17 @@ ER_WRONG_DB_NAME 42000
         swe "Felaktigt databasnamn '%-.100s'"
         ukr "Невірне ім'я бази данних '%-.100s'"
 ER_WRONG_TABLE_NAME 42000 
-        cze "Nep-Břípustné jméno tabulky '%-.100s'"
+        cze "Nepřípustné jméno tabulky '%-.100s'"
         dan "Ugyldigt tabel navn '%-.100s'"
         nla "Niet toegestane tabelnaam '%-.100s'"
         eng "Incorrect table name '%-.100s'"
-        jps "指定した table 名 '%-.100s' はまちがっています",
         est "Vigane tabeli nimi '%-.100s'"
         fre "Nom de table illégal: '%-.100s'"
         ger "Unerlaubter Tabellenname '%-.100s'"
         greek "Λάθος όνομα πίνακα '%-.100s'"
         hun "Hibas tablanev: '%-.100s'"
         ita "Nome tabella errato '%-.100s'"
-        jpn "指定した table 名 '%-.100s' はまちがっています"
+        jpn "表名 '%-.100s' は不正です。"
         kor "'%-.100s' 테이블 이름이 부정확합니다."
         nor "Ugyldig tabell navn '%-.100s'"
         norwegian-ny "Ugyldig tabell namn '%-.100s'"
@@ -2491,7 +2431,7 @@ ER_WRONG_TABLE_NAME 42000
         swe "Felaktigt tabellnamn '%-.100s'"
         ukr "Невірне ім'я таблиці '%-.100s'"
 ER_TOO_BIG_SELECT 42000 
-        cze "Zadan-Bý SELECT by procházel příliš mnoho záznamů a trval velmi dlouho. Zkontrolujte tvar WHERE a je-li SELECT v pořádku, použijte SET SQL_BIG_SELECTS=1"
+        cze "Zadaný SELECT by procházel příliš mnoho záznamů a trval velmi dlouho. Zkontrolujte tvar WHERE a je-li SELECT v pořádku, použijte SET SQL_BIG_SELECTS=1"
         dan "SELECT ville undersøge for mange poster og ville sandsynligvis tage meget lang tid. Undersøg WHERE delen og brug SET SQL_BIG_SELECTS=1 hvis udtrykket er korrekt"
         nla "Het SELECT-statement zou te veel records analyseren en dus veel tijd in beslagnemen. Kijk het WHERE-gedeelte van de query na en kies SET SQL_BIG_SELECTS=1 als het stament in orde is."
         eng "The SELECT would examine more than MAX_JOIN_SIZE rows; check your WHERE and use SET SQL_BIG_SELECTS=1 or SET MAX_JOIN_SIZE=# if the SELECT is okay"
@@ -2501,6 +2441,7 @@ ER_TOO_BIG_SELECT 42000
         greek "Το SELECT θα εξετάσει μεγάλο αριθμό εγγραφών και πιθανώς θα καθυστερήσει. Παρακαλώ εξετάστε τις παραμέτρους του WHERE και χρησιμοποιείστε SET SQL_BIG_SELECTS=1 αν το SELECT είναι σωστό"
         hun "A SELECT tul sok rekordot fog megvizsgalni es nagyon sokaig fog tartani. Ellenorizze a WHERE-t es hasznalja a SET SQL_BIG_SELECTS=1 beallitast, ha a SELECT okay"
         ita "La SELECT dovrebbe esaminare troppi record e usare troppo tempo. Controllare la WHERE e usa SET SQL_BIG_SELECTS=1 se e` tutto a posto."
+        jpn "SELECTがMAX_JOIN_SIZEを超える行数を処理しました。WHERE句を確認し、SELECT文に問題がなければ、 SET SQL_BIG_SELECTS=1 または SET MAX_JOIN_SIZE=# を使用して下さい。"
         kor "SELECT 명령에서 너무 많은 레코드를 찾기 때문에 많은 시간이 소요됩니다. 따라서 WHERE 문을 점검하거나, 만약 SELECT가 ok되면  SET SQL_BIG_SELECTS=1 옵션을 사용하세요."
         nor "SELECT ville undersøke for mange poster og ville sannsynligvis ta veldig lang tid. Undersøk WHERE klausulen og bruk SET SQL_BIG_SELECTS=1 om SELECTen er korrekt"
         norwegian-ny "SELECT ville undersøkje for mange postar og ville sannsynligvis ta veldig lang tid. Undersøk WHERE klausulen og bruk SET SQL_BIG_SELECTS=1 om SELECTen er korrekt"
@@ -2514,7 +2455,7 @@ ER_TOO_BIG_SELECT 42000
         swe "Den angivna frågan skulle läsa mer än MAX_JOIN_SIZE rader.  Kontrollera din WHERE och använd SET SQL_BIG_SELECTS=1 eller SET MAX_JOIN_SIZE=# ifall du vill hantera stora joins"
         ukr "Запиту SELECT потрібно обробити багато записів, що, певне, займе дуже багато часу. Перевірте ваше WHERE та використовуйте SET SQL_BIG_SELECTS=1, якщо цей запит SELECT є вірним"
 ER_UNKNOWN_ERROR  
-        cze "Nezn-Bámá chyba"
+        cze "Neznámá chyba"
         dan "Ukendt fejl"
         nla "Onbekende Fout"
         eng "Unknown error"
@@ -2524,6 +2465,7 @@ ER_UNKNOWN_ERROR
         greek "Προέκυψε άγνωστο λάθος"
         hun "Ismeretlen hiba"
         ita "Errore sconosciuto"
+        jpn "不明なエラー"
         kor "알수 없는 에러입니다."
         nor "Ukjent feil"
         norwegian-ny "Ukjend feil"
@@ -2533,10 +2475,10 @@ ER_UNKNOWN_ERROR
         serbian "Nepoznata greška"
         slo "Neznámá chyba"
         spa "Error desconocido"
-        swe "Oidentifierat fel"
+        swe "Okänt fel"
         ukr "Невідома помилка"
 ER_UNKNOWN_PROCEDURE 42000 
-        cze "Nezn-Bámá procedura %-.192s"
+        cze "Neznámá procedura %-.192s"
         dan "Ukendt procedure %-.192s"
         nla "Onbekende procedure %-.192s"
         eng "Unknown procedure '%-.192s'"
@@ -2546,6 +2488,7 @@ ER_UNKNOWN_PROCEDURE 42000
         greek "Αγνωστη διαδικασία '%-.192s'"
         hun "Ismeretlen eljaras: '%-.192s'"
         ita "Procedura '%-.192s' sconosciuta"
+        jpn "'%-.192s' は不明なプロシージャです。"
         kor "알수 없는 수행문 : '%-.192s'"
         nor "Ukjent prosedyre %-.192s"
         norwegian-ny "Ukjend prosedyre %-.192s"
@@ -2559,7 +2502,7 @@ ER_UNKNOWN_PROCEDURE 42000
         swe "Okänd procedur: %-.192s"
         ukr "Невідома процедура '%-.192s'"
 ER_WRONG_PARAMCOUNT_TO_PROCEDURE 42000 
-        cze "Chybn-Bý počet parametrů procedury %-.192s"
+        cze "Chybný počet parametrů procedury %-.192s"
         dan "Forkert antal  parametre til proceduren %-.192s"
         nla "Foutief aantal parameters doorgegeven aan procedure %-.192s"
         eng "Incorrect parameter count to procedure '%-.192s'"
@@ -2569,6 +2512,7 @@ ER_WRONG_PARAMCOUNT_TO_PROCEDURE 42000
         greek "Λάθος αριθμός παραμέτρων στη διαδικασία '%-.192s'"
         hun "Rossz parameter a(z) '%-.192s'eljaras szamitasanal"
         ita "Numero di parametri errato per la procedura '%-.192s'"
+        jpn "プロシージャ '%-.192s' へのパラメータ数が不正です。"
         kor "'%-.192s' 수행문에 대한 부정확한 파라메터"
         nor "Feil parameter antall til prosedyren %-.192s"
         norwegian-ny "Feil parameter tal til prosedyra %-.192s"
@@ -2582,7 +2526,7 @@ ER_WRONG_PARAMCOUNT_TO_PROCEDURE 42000
         swe "Felaktigt antal parametrar till procedur %-.192s"
         ukr "Хибна кількість параметрів процедури '%-.192s'"
 ER_WRONG_PARAMETERS_TO_PROCEDURE  
-        cze "Chybn-Bé parametry procedury %-.192s"
+        cze "Chybné parametry procedury %-.192s"
         dan "Forkert(e) parametre til proceduren %-.192s"
         nla "Foutieve parameters voor procedure %-.192s"
         eng "Incorrect parameters to procedure '%-.192s'"
@@ -2592,6 +2536,7 @@ ER_WRONG_PARAMETERS_TO_PROCEDURE
         greek "Λάθος παράμετροι στην διαδικασία '%-.192s'"
         hun "Rossz parameter a(z) '%-.192s' eljarasban"
         ita "Parametri errati per la procedura '%-.192s'"
+        jpn "プロシージャ '%-.192s' へのパラメータが不正です。"
         kor "'%-.192s' 수행문에 대한 부정확한 파라메터"
         nor "Feil parametre til prosedyren %-.192s"
         norwegian-ny "Feil parameter til prosedyra %-.192s"
@@ -2605,7 +2550,7 @@ ER_WRONG_PARAMETERS_TO_PROCEDURE
         swe "Felaktiga parametrar till procedur %-.192s"
         ukr "Хибний параметер процедури '%-.192s'"
 ER_UNKNOWN_TABLE 42S02 
-        cze "Nezn-Bámá tabulka '%-.192s' v %-.32s"
+        cze "Neznámá tabulka '%-.192s' v %-.32s"
         dan "Ukendt tabel '%-.192s' i %-.32s"
         nla "Onbekende tabel '%-.192s' in %-.32s"
         eng "Unknown table '%-.192s' in %-.32s"
@@ -2615,7 +2560,7 @@ ER_UNKNOWN_TABLE 42S02
         greek "Αγνωστος πίνακας '%-.192s' σε %-.32s"
         hun "Ismeretlen tabla: '%-.192s' %-.32s-ban"
         ita "Tabella '%-.192s' sconosciuta in %-.32s"
-        jpn "Unknown table '%-.192s' in %-.32s"
+        jpn "'%-.192s' は %-.32s では不明な表です。"
         kor "알수 없는 테이블 '%-.192s' (데이타베이스 %-.32s)"
         nor "Ukjent tabell '%-.192s' i %-.32s"
         norwegian-ny "Ukjend tabell '%-.192s' i %-.32s"
@@ -2629,7 +2574,7 @@ ER_UNKNOWN_TABLE 42S02
         swe "Okänd tabell '%-.192s' i '%-.32s'"
         ukr "Невідома таблиця '%-.192s' у %-.32s"
 ER_FIELD_SPECIFIED_TWICE 42000 
-        cze "Polo-Bžka '%-.192s' je zadána dvakrát"
+        cze "Položka '%-.192s' je zadána dvakrát"
         dan "Feltet '%-.192s' er anvendt to gange"
         nla "Veld '%-.192s' is dubbel gespecificeerd"
         eng "Column '%-.192s' specified twice"
@@ -2639,6 +2584,7 @@ ER_FIELD_SPECIFIED_TWICE 42000
         greek "Το πεδίο '%-.192s' έχει ορισθεί δύο φορές"
         hun "A(z) '%-.192s' mezot ketszer definialta"
         ita "Campo '%-.192s' specificato 2 volte"
+        jpn "列 '%-.192s' は2回指定されています。"
         kor "칼럼 '%-.192s'는 두번 정의되어 있읍니다."
         nor "Feltet '%-.192s' er spesifisert to ganger"
         norwegian-ny "Feltet '%-.192s' er spesifisert to gangar"
@@ -2652,7 +2598,7 @@ ER_FIELD_SPECIFIED_TWICE 42000
         swe "Fält '%-.192s' är redan använt"
         ukr "Стовбець '%-.192s' зазначено двічі"
 ER_INVALID_GROUP_FUNC_USE  
-        cze "Nespr-Bávné použití funkce group"
+        cze "Nesprávné použití funkce group"
         dan "Forkert brug af grupperings-funktion"
         nla "Ongeldig gebruik van GROUP-functie"
         eng "Invalid use of group function"
@@ -2662,6 +2608,7 @@ ER_INVALID_GROUP_FUNC_USE
         greek "Εσφαλμένη χρήση της group function"
         hun "A group funkcio ervenytelen hasznalata"
         ita "Uso non valido di una funzione di raggruppamento"
+        jpn "集計関数の使用方法が不正です。"
         kor "잘못된 그룹 함수를 사용하였습니다."
         por "Uso inválido de função de agrupamento (GROUP)"
         rum "Folosire incorecta a functiei group"
@@ -2672,7 +2619,7 @@ ER_INVALID_GROUP_FUNC_USE
         swe "Felaktig användning av SQL grupp function"
         ukr "Хибне використання функції групування"
 ER_UNSUPPORTED_EXTENSION 42000 
-        cze "Tabulka '%-.192s' pou-Bžívá rozšíření, které v této verzi MariaDB není"
+        cze "Tabulka '%-.192s' používá rozšíření, které v této verzi MySQL není"
         dan "Tabellen '%-.192s' bruger et filtypenavn som ikke findes i denne MariaDB version"
         nla "Tabel '%-.192s' gebruikt een extensie, die niet in deze MariaDB-versie voorkomt."
         eng "Table '%-.192s' uses an extension that doesn't exist in this MariaDB version"
@@ -2682,6 +2629,7 @@ ER_UNSUPPORTED_EXTENSION 42000
         greek "Ο πίνακς '%-.192s' χρησιμοποιεί κάποιο extension που δεν υπάρχει στην έκδοση αυτή της MariaDB"
         hun "A(z) '%-.192s' tabla olyan bovitest hasznal, amely nem letezik ebben a MariaDB versioban."
         ita "La tabella '%-.192s' usa un'estensione che non esiste in questa versione di MariaDB"
+	jpn "表 '%-.192s' は、このMySQLバージョンには無い機能を使用しています。"
         kor "테이블 '%-.192s'는 확장명령을 이용하지만 현재의 MariaDB 버젼에서는 존재하지 않습니다."
         nor "Table '%-.192s' uses a extension that doesn't exist in this MariaDB version"
         norwegian-ny "Table '%-.192s' uses a extension that doesn't exist in this MariaDB version"
@@ -2695,18 +2643,17 @@ ER_UNSUPPORTED_EXTENSION 42000
         swe "Tabell '%-.192s' har en extension som inte finns i denna version av MariaDB"
         ukr "Таблиця '%-.192s' використовує розширення, що не існує у цій версії MariaDB"
 ER_TABLE_MUST_HAVE_COLUMNS 42000 
-        cze "Tabulka mus-Bí mít alespoň jeden sloupec"
+        cze "Tabulka musí mít alespoň jeden sloupec"
         dan "En tabel skal have mindst een kolonne"
         nla "Een tabel moet minstens 1 kolom bevatten"
         eng "A table must have at least 1 column"
-        jps "テーブルは最低 1 個の column が必要です",
         est "Tabelis peab olema vähemalt üks tulp"
         fre "Une table doit comporter au moins une colonne"
         ger "Eine Tabelle muss mindestens eine Spalte besitzen"
         greek "Ενας πίνακας πρέπει να έχει τουλάχιστον ένα πεδίο"
         hun "A tablanak legalabb egy oszlopot tartalmazni kell"
         ita "Una tabella deve avere almeno 1 colonna"
-        jpn "テーブルは最低 1 個の column が必要です"
+        jpn "表には最低でも1個の列が必要です。"
         kor "하나의 테이블에서는 적어도 하나의 칼럼이 존재하여야 합니다."
         por "Uma tabela tem que ter pelo menos uma (1) coluna"
         rum "O tabela trebuie sa aiba cel putin o coloana"
@@ -2717,18 +2664,17 @@ ER_TABLE_MUST_HAVE_COLUMNS 42000
         swe "Tabeller måste ha minst 1 kolumn"
         ukr "Таблиця повинна мати хочаб один стовбець"
 ER_RECORD_FILE_FULL  
-        cze "Tabulka '%-.192s' je pln-Bá"
+        cze "Tabulka '%-.192s' je plná"
         dan "Tabellen '%-.192s' er fuld"
         nla "De tabel '%-.192s' is vol"
         eng "The table '%-.192s' is full"
-        jps "table '%-.192s' はいっぱいです",
         est "Tabel '%-.192s' on täis"
         fre "La table '%-.192s' est pleine"
         ger "Tabelle '%-.192s' ist voll"
         greek "Ο πίνακας '%-.192s' είναι γεμάτος"
         hun "A '%-.192s' tabla megtelt"
         ita "La tabella '%-.192s' e` piena"
-        jpn "table '%-.192s' はいっぱいです"
+        jpn "表 '%-.192s' は満杯です。"
         kor "테이블 '%-.192s'가 full났습니다. "
         por "Tabela '%-.192s' está cheia"
         rum "Tabela '%-.192s' e plina"
@@ -2739,18 +2685,17 @@ ER_RECORD_FILE_FULL
         swe "Tabellen '%-.192s' är full"
         ukr "Таблиця '%-.192s' заповнена"
 ER_UNKNOWN_CHARACTER_SET 42000 
-        cze "Nezn-Bámá znaková sada: '%-.64s'"
+        cze "Neznámá znaková sada: '%-.64s'"
         dan "Ukendt tegnsæt: '%-.64s'"
         nla "Onbekende character set: '%-.64s'"
         eng "Unknown character set: '%-.64s'"
-        jps "character set '%-.64s' はサポートしていません",
         est "Vigane kooditabel '%-.64s'"
         fre "Jeu de caractères inconnu: '%-.64s'"
         ger "Unbekannter Zeichensatz: '%-.64s'"
         greek "Αγνωστο character set: '%-.64s'"
         hun "Ervenytelen karakterkeszlet: '%-.64s'"
         ita "Set di caratteri '%-.64s' sconosciuto"
-        jpn "character set '%-.64s' はサポートしていません"
+        jpn "不明な文字コードセット: '%-.64s'"
         kor "알수없는 언어 Set: '%-.64s'"
         por "Conjunto de caracteres '%-.64s' desconhecido"
         rum "Set de caractere invalid: '%-.64s'"
@@ -2761,18 +2706,17 @@ ER_UNKNOWN_CHARACTER_SET 42000
         swe "Okänd teckenuppsättning: '%-.64s'"
         ukr "Невідома кодова таблиця: '%-.64s'"
 ER_TOO_MANY_TABLES  
-        cze "P-Bříliš mnoho tabulek, MariaDB jich může mít v joinu jen %d"
+        cze "Příliš mnoho tabulek, MySQL jich může mít v joinu jen %d"
         dan "For mange tabeller. MariaDB kan kun bruge %d tabeller i et join"
         nla "Teveel tabellen. MariaDB kan slechts %d tabellen in een join bevatten"
         eng "Too many tables; MariaDB can only use %d tables in a join"
-        jps "テーブルが多すぎます; MariaDB can only use %d tables in a join",
         est "Liiga palju tabeleid. MariaDB suudab JOINiga ühendada kuni %d tabelit"
         fre "Trop de tables. MariaDB ne peut utiliser que %d tables dans un JOIN"
         ger "Zu viele Tabellen. MariaDB kann in einem Join maximal %d Tabellen verwenden"
         greek "Πολύ μεγάλος αριθμός πινάκων. Η MariaDB μπορεί να χρησιμοποιήσει %d πίνακες σε διαδικασία join"
         hun "Tul sok tabla. A MariaDB csak %d tablat tud kezelni osszefuzeskor"
         ita "Troppe tabelle. MariaDB puo` usare solo %d tabelle in una join"
-        jpn "テーブルが多すぎます; MariaDB can only use %d tables in a join"
+        jpn "表が多すぎます。MySQLがJOINできる表は %d 個までです。"
         kor "너무 많은 테이블이 Join되었습니다. MariaDB에서는 JOIN시 %d개의 테이블만 사용할 수 있습니다."
         por "Tabelas demais. O MariaDB pode usar somente %d tabelas em uma junção (JOIN)"
         rum "Prea multe tabele. MariaDB nu poate folosi mai mult de %d tabele intr-un join"
@@ -2783,18 +2727,17 @@ ER_TOO_MANY_TABLES
         swe "För många tabeller. MariaDB can ha högst %d tabeller i en och samma join"
         ukr "Забагато таблиць. MariaDB може використовувати лише %d таблиць у об'єднанні"
 ER_TOO_MANY_FIELDS  
-        cze "P-Bříliš mnoho položek"
+        cze "Příliš mnoho položek"
         dan "For mange felter"
         nla "Te veel velden"
         eng "Too many columns"
-        jps "column が多すぎます",
         est "Liiga palju tulpasid"
         fre "Trop de champs"
         ger "Zu viele Felder"
         greek "Πολύ μεγάλος αριθμός πεδίων"
         hun "Tul sok mezo"
         ita "Troppi campi"
-        jpn "column が多すぎます"
+        jpn "列が多すぎます。"
         kor "칼럼이 너무 많습니다."
         por "Colunas demais"
         rum "Prea multe coloane"
@@ -2805,18 +2748,17 @@ ER_TOO_MANY_FIELDS
         swe "För många fält"
         ukr "Забагато стовбців"
 ER_TOO_BIG_ROWSIZE 42000 
-        cze "-BŘádek je příliš velký. Maximální velikost řádku, nepočítaje položky blob, je %ld. Musíte změnit některé položky na blob"
+        cze "Řádek je příliš velký. Maximální velikost řádku, nepočítaje položky blob, je %ld. Musíte změnit některé položky na blob"
         dan "For store poster. Max post størrelse, uden BLOB's, er %ld. Du må lave nogle felter til BLOB's"
         nla "Rij-grootte is groter dan toegestaan. Maximale rij grootte, blobs niet meegeteld, is %ld. U dient sommige velden in blobs te veranderen."
         eng "Row size too large. The maximum row size for the used table type, not counting BLOBs, is %ld. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs"
-        jps "row size が大きすぎます. BLOB を含まない場合の row size の最大は %ld です. いくつかの field を BLOB に変えてください.",
         est "Liiga pikk kirje. Kirje maksimumpikkus arvestamata BLOB-tüüpi välju on %ld. Muuda mõned väljad BLOB-tüüpi väljadeks"
         fre "Ligne trop grande. Le taille maximale d'une ligne, sauf les BLOBs, est %ld. Changez le type de quelques colonnes en BLOB"
         ger "Zeilenlänge zu groß. Die maximale Zeilenlänge für den verwendeten Tabellentyp (ohne BLOB-Felder) beträgt %ld. Einige Felder müssen in BLOB oder TEXT umgewandelt werden"
         greek "Πολύ μεγάλο μέγεθος εγγραφής. Το μέγιστο μέγεθος εγγραφής, χωρίς να υπολογίζονται τα blobs, είναι %ld. Πρέπει να ορίσετε κάποια πεδία σαν blobs"
         hun "Tul nagy sormeret. A maximalis sormeret (nem szamolva a blob objektumokat) %ld. Nehany mezot meg kell valtoztatnia"
         ita "Riga troppo grande. La massima grandezza di una riga, non contando i BLOB, e` %ld. Devi cambiare alcuni campi in BLOB"
-        jpn "row size が大きすぎます. BLOB を含まない場合の row size の最大は %ld です. いくつかの field を BLOB に変えてください."
+        jpn "行サイズが大きすぎます。この表の最大行サイズは BLOB を含まずに %ld です。格納時のオーバーヘッドも含まれます(マニュアルを確認してください)。列をTEXTまたはBLOBに変更する必要があります。"
         kor "너무 큰 row 사이즈입니다. BLOB를 계산하지 않고 최대 row 사이즈는 %ld입니다. 얼마간의 필드들을 BLOB로 바꾸셔야 겠군요.."
         por "Tamanho de linha grande demais. O máximo tamanho de linha, não contando BLOBs, é %ld. Você tem que mudar alguns campos para BLOBs"
         rum "Marimea liniei (row) prea mare. Marimea maxima a liniei, excluzind BLOB-urile este de %ld. Trebuie sa schimbati unele cimpuri in BLOB-uri"
@@ -2827,17 +2769,16 @@ ER_TOO_BIG_ROWSIZE 42000
         swe "För stor total radlängd. Den högst tillåtna radlängden, förutom BLOBs, är %ld. Ändra några av dina fält till BLOB"
         ukr "Задовга строка. Найбільшою довжиною строки, не рахуючи BLOB, є %ld. Вам потрібно привести деякі стовбці до типу BLOB"
 ER_STACK_OVERRUN  
-        cze "P-Břetečení zásobníku threadu: použito %ld z %ld. Použijte 'mysqld --thread_stack=#' k zadání většího zásobníku"
+        cze "Přetečení zásobníku threadu: použito %ld z %ld. Použijte 'mysqld --thread_stack=#' k zadání většího zásobníku"
         dan "Thread stack brugt:  Brugt: %ld af en %ld stak.  Brug 'mysqld --thread_stack=#' for at allokere en større stak om nødvendigt"
         nla "Thread stapel overrun:  Gebruikte: %ld van een %ld stack. Gebruik 'mysqld --thread_stack=#' om een grotere stapel te definieren (indien noodzakelijk)."
         eng "Thread stack overrun:  Used: %ld of a %ld stack.  Use 'mysqld --thread_stack=#' to specify a bigger stack if needed"
-        jps "Thread stack overrun:  Used: %ld of a %ld stack.  スタック領域を多くとりたい場合、'mysqld --thread_stack=#' と指定してください",
         fre "Débordement de la pile des tâches (Thread stack). Utilisées: %ld pour une pile de %ld.  Essayez 'mysqld --thread_stack=#' pour indiquer une plus grande valeur"
         ger "Thread-Stack-Überlauf. Benutzt: %ld von %ld Stack. 'mysqld --thread_stack=#' verwenden, um bei Bedarf einen größeren Stack anzulegen"
         greek "Stack overrun στο thread:  Used: %ld of a %ld stack.  Παρακαλώ χρησιμοποιείστε 'mysqld --thread_stack=#' για να ορίσετε ένα μεγαλύτερο stack αν χρειάζεται"
         hun "Thread verem tullepes:  Used: %ld of a %ld stack. Hasznalja a 'mysqld --thread_stack=#' nagyobb verem definialasahoz"
         ita "Thread stack overrun:  Usati: %ld di uno stack di %ld.  Usa 'mysqld --thread_stack=#' per specificare uno stack piu` grande."
-        jpn "Thread stack overrun:  Used: %ld of a %ld stack.  スタック領域を多くとりたい場合、'mysqld --thread_stack=#' と指定してください"
+        jpn "スレッドスタック不足です(使用: %ld ; サイズ: %ld)。必要に応じて、より大きい値で 'mysqld --thread_stack=#' の指定をしてください。"
         kor "쓰레드 스택이 넘쳤습니다.  사용: %ld개 스택: %ld개.  만약 필요시 더큰 스택을 원할때에는 'mysqld --thread_stack=#' 를 정의하세요"
         por "Estouro da pilha do 'thread'. Usados %ld de uma pilha de %ld. Use 'mysqld --thread_stack=#' para especificar uma pilha maior, se necessário"
         rum "Stack-ul thread-ului a fost depasit (prea mic):  Folositi: %ld intr-un stack de %ld.  Folositi 'mysqld --thread_stack=#' ca sa specifici un stack mai mare"
@@ -2848,7 +2789,7 @@ ER_STACK_OVERRUN
         swe "Trådstacken tog slut:  Har använt %ld av %ld bytes.  Använd 'mysqld --thread_stack=#' ifall du behöver en större stack"
         ukr "Стек гілок переповнено:  Використано: %ld з %ld. Використовуйте 'mysqld --thread_stack=#' аби зазначити більший стек, якщо необхідно"
 ER_WRONG_OUTER_JOIN 42000 
-        cze "V OUTER JOIN byl nalezen k-Břížový odkaz. Prověřte ON podmínky"
+        cze "V OUTER JOIN byl nalezen křížový odkaz. Prověřte ON podmínky"
         dan "Krydsreferencer fundet i OUTER JOIN; check dine ON conditions"
         nla "Gekruiste afhankelijkheid gevonden in OUTER JOIN. Controleer uw ON-conditions"
         eng "Cross dependency found in OUTER JOIN; examine your ON conditions"
@@ -2858,6 +2799,7 @@ ER_WRONG_OUTER_JOIN 42000
         greek "Cross dependency βρέθηκε σε OUTER JOIN.  Παρακαλώ εξετάστε τις συνθήκες που θέσατε στο ON"
         hun "Keresztfuggoseg van az OUTER JOIN-ban. Ellenorizze az ON felteteleket"
         ita "Trovata una dipendenza incrociata nella OUTER JOIN. Controlla le condizioni ON"
+        jpn "OUTER JOINに相互依存が見つかりました。ON句の条件を確認して下さい。"
         por "Dependência cruzada encontrada em junção externa (OUTER JOIN); examine as condições utilizadas nas cláusulas 'ON'"
         rum "Dependinta incrucisata (cross dependency) gasita in OUTER JOIN.  Examinati conditiile ON"
         rus "В OUTER JOIN обнаружена перекрестная зависимость. Внимательно проанализируйте свои условия ON"
@@ -2870,18 +2812,17 @@ ER_NULL_COLUMN_IN_INDEX 42000
         eng "Table handler doesn't support NULL in given index. Please change column '%-.192s' to be NOT NULL or use another handler"
         swe "Tabell hanteraren kan inte indexera NULL kolumner för den givna index typen. Ändra '%-.192s' till NOT NULL eller använd en annan hanterare"
 ER_CANT_FIND_UDF  
-        cze "Nemohu na-Bčíst funkci '%-.192s'"
+        cze "Nemohu načíst funkci '%-.192s'"
         dan "Kan ikke læse funktionen '%-.192s'"
         nla "Kan functie '%-.192s' niet laden"
         eng "Can't load function '%-.192s'"
-        jps "function '%-.192s' を ロードできません",
         est "Ei suuda avada funktsiooni '%-.192s'"
         fre "Imposible de charger la fonction '%-.192s'"
         ger "Kann Funktion '%-.192s' nicht laden"
         greek "Δεν είναι δυνατή η διαδικασία load για τη συνάρτηση '%-.192s'"
         hun "A(z) '%-.192s' fuggveny nem toltheto be"
         ita "Impossibile caricare la funzione '%-.192s'"
-        jpn "function '%-.192s' を ロードできません"
+        jpn "関数 '%-.192s' をロードできません。"
         kor "'%-.192s' 함수를 로드하지 못했습니다."
         por "Não pode carregar a função '%-.192s'"
         rum "Nu pot incarca functia '%-.192s'"
@@ -2896,14 +2837,13 @@ ER_CANT_INITIALIZE_UDF
         dan "Kan ikke starte funktionen '%-.192s'; %-.80s"
         nla "Kan functie '%-.192s' niet initialiseren; %-.80s"
         eng "Can't initialize function '%-.192s'; %-.80s"
-        jps "function '%-.192s' を初期化できません; %-.80s",
         est "Ei suuda algväärtustada funktsiooni '%-.192s'; %-.80s"
         fre "Impossible d'initialiser la fonction '%-.192s'; %-.80s"
         ger "Kann Funktion '%-.192s' nicht initialisieren: %-.80s"
         greek "Δεν είναι δυνατή η έναρξη της συνάρτησης '%-.192s'; %-.80s"
         hun "A(z) '%-.192s' fuggveny nem inicializalhato; %-.80s"
         ita "Impossibile inizializzare la funzione '%-.192s'; %-.80s"
-        jpn "function '%-.192s' を初期化できません; %-.80s"
+        jpn "関数 '%-.192s' を初期化できません。; %-.80s"
         kor "'%-.192s' 함수를 초기화 하지 못했습니다.; %-.80s"
         por "Não pode inicializar a função '%-.192s' - '%-.80s'"
         rum "Nu pot initializa functia '%-.192s'; %-.80s"
@@ -2914,18 +2854,17 @@ ER_CANT_INITIALIZE_UDF
         swe "Kan inte initialisera funktionen '%-.192s'; '%-.80s'"
         ukr "Не можу ініціалізувати функцію '%-.192s'; %-.80s"
 ER_UDF_NO_PATHS  
-        cze "Pro sd-Bílenou knihovnu nejsou povoleny cesty"
+        cze "Pro sdílenou knihovnu nejsou povoleny cesty"
         dan "Angivelse af sti ikke tilladt for delt bibliotek"
         nla "Geen pad toegestaan voor shared library"
         eng "No paths allowed for shared library"
-        jps "shared library へのパスが通っていません",
         est "Teegi nimes ei tohi olla kataloogi"
         fre "Chemin interdit pour les bibliothèques partagées"
         ger "Keine Pfade gestattet für Shared Library"
         greek "Δεν βρέθηκαν paths για την shared library"
         hun "Nincs ut a megosztott konyvtarakhoz (shared library)"
         ita "Non sono ammessi path per le librerie condivisa"
-        jpn "shared library へのパスが通っていません"
+        jpn "共有ライブラリにはパスを指定できません。"
         kor "공유 라이버러리를 위한 패스가 정의되어 있지 않습니다."
         por "Não há caminhos (paths) permitidos para biblioteca compartilhada"
         rum "Nici un paths nu e permis pentru o librarie shared"
@@ -2936,18 +2875,17 @@ ER_UDF_NO_PATHS
         swe "Man får inte ange sökväg för dynamiska bibliotek"
         ukr "Не дозволено використовувати путі для розділюваних бібліотек"
 ER_UDF_EXISTS  
-        cze "Funkce '%-.192s' ji-Bž existuje"
+        cze "Funkce '%-.192s' již existuje"
         dan "Funktionen '%-.192s' findes allerede"
         nla "Functie '%-.192s' bestaat reeds"
         eng "Function '%-.192s' already exists"
-        jps "Function '%-.192s' は既に定義されています",
         est "Funktsioon '%-.192s' juba eksisteerib"
         fre "La fonction '%-.192s' existe déjà"
         ger "Funktion '%-.192s' existiert schon"
         greek "Η συνάρτηση '%-.192s' υπάρχει ήδη"
         hun "A '%-.192s' fuggveny mar letezik"
         ita "La funzione '%-.192s' esiste gia`"
-        jpn "Function '%-.192s' は既に定義されています"
+        jpn "関数 '%-.192s' はすでに定義されています。"
         kor "'%-.192s' 함수는 이미 존재합니다."
         por "Função '%-.192s' já existe"
         rum "Functia '%-.192s' exista deja"
@@ -2958,18 +2896,17 @@ ER_UDF_EXISTS
         swe "Funktionen '%-.192s' finns redan"
         ukr "Функція '%-.192s' вже існує"
 ER_CANT_OPEN_LIBRARY  
-        cze "Nemohu otev-Břít sdílenou knihovnu '%-.192s' (errno: %d %-.128s)"
+        cze "Nemohu otevřít sdílenou knihovnu '%-.192s' (errno: %d %-.128s)"
         dan "Kan ikke åbne delt bibliotek '%-.192s' (errno: %d %-.128s)"
         nla "Kan shared library '%-.192s' niet openen (Errcode: %d %-.128s)"
         eng "Can't open shared library '%-.192s' (errno: %d %-.128s)"
-        jps "shared library '%-.192s' を開く事ができません (errno: %d %-.128s)",
         est "Ei suuda avada jagatud teeki '%-.192s' (veakood: %d %-.128s)"
         fre "Impossible d'ouvrir la bibliothèque partagée '%-.192s' (errno: %d %-.128s)"
         ger "Kann Shared Library '%-.192s' nicht öffnen (Fehler: %d %-.128s)"
         greek "Δεν είναι δυνατή η ανάγνωση της shared library '%-.192s' (κωδικός λάθους: %d %-.128s)"
         hun "A(z) '%-.192s' megosztott konyvtar nem hasznalhato (hibakod: %d %-.128s)"
         ita "Impossibile aprire la libreria condivisa '%-.192s' (errno: %d %-.128s)"
-        jpn "shared library '%-.192s' を開く事ができません (errno: %d %-.128s)"
+        jpn "共有ライブラリ '%-.192s' を開く事ができません。(エラー番号: %d %-.128s)"
         kor "'%-.192s' 공유 라이버러리를 열수 없습니다.(에러번호: %d %-.128s)"
         nor "Can't open shared library '%-.192s' (errno: %d %-.128s)"
         norwegian-ny "Can't open shared library '%-.192s' (errno: %d %-.128s)"
@@ -2983,18 +2920,17 @@ ER_CANT_OPEN_LIBRARY
         swe "Kan inte öppna det dynamiska biblioteket '%-.192s' (Felkod: %d %-.128s)"
         ukr "Не можу відкрити розділювану бібліотеку '%-.192s' (помилка: %d %-.128s)"
 ER_CANT_FIND_DL_ENTRY
-        cze "Nemohu naj-Bít funkci '%-.128s' v knihovně"
+        cze "Nemohu najít funkci '%-.128s' v knihovně"
         dan "Kan ikke finde funktionen '%-.128s' i bibliotek"
         nla "Kan functie '%-.128s' niet in library vinden"
         eng "Can't find symbol '%-.128s' in library"
-        jps "function '%-.128s' をライブラリー中に見付ける事ができません",
         est "Ei leia funktsiooni '%-.128s' antud teegis"
         fre "Impossible de trouver la fonction '%-.128s' dans la bibliothèque"
         ger "Kann Funktion '%-.128s' in der Library nicht finden"
         greek "Δεν είναι δυνατή η ανεύρεση της συνάρτησης '%-.128s' στην βιβλιοθήκη"
         hun "A(z) '%-.128s' fuggveny nem talalhato a konyvtarban"
         ita "Impossibile trovare la funzione '%-.128s' nella libreria"
-        jpn "function '%-.128s' をライブラリー中に見付ける事ができません"
+        jpn "関数 '%-.128s' は共有ライブラリー中にありません。"
         kor "라이버러리에서 '%-.128s' 함수를 찾을 수 없습니다."
         por "Não pode encontrar a função '%-.128s' na biblioteca"
         rum "Nu pot gasi functia '%-.128s' in libraria"
@@ -3005,18 +2941,17 @@ ER_CANT_FIND_DL_ENTRY
         swe "Hittar inte funktionen '%-.128s' in det dynamiska biblioteket"
         ukr "Не можу знайти функцію '%-.128s' у бібліотеці"
 ER_FUNCTION_NOT_DEFINED  
-        cze "Funkce '%-.192s' nen-Bí definována"
+        cze "Funkce '%-.192s' není definována"
         dan "Funktionen '%-.192s' er ikke defineret"
         nla "Functie '%-.192s' is niet gedefinieerd"
         eng "Function '%-.192s' is not defined"
-        jps "Function '%-.192s' は定義されていません",
         est "Funktsioon '%-.192s' ei ole defineeritud"
         fre "La fonction '%-.192s' n'est pas définie"
         ger "Funktion '%-.192s' ist nicht definiert"
         greek "Η συνάρτηση '%-.192s' δεν έχει ορισθεί"
         hun "A '%-.192s' fuggveny nem definialt"
         ita "La funzione '%-.192s' non e` definita"
-        jpn "Function '%-.192s' は定義されていません"
+        jpn "関数 '%-.192s' は定義されていません。"
         kor "'%-.192s' 함수가 정의되어 있지 않습니다."
         por "Função '%-.192s' não está definida"
         rum "Functia '%-.192s' nu e definita"
@@ -3027,18 +2962,17 @@ ER_FUNCTION_NOT_DEFINED
         swe "Funktionen '%-.192s' är inte definierad"
         ukr "Функцію '%-.192s' не визначено"
 ER_HOST_IS_BLOCKED  
-        cze "Stroj '%-.64s' je zablokov-Bán kvůli mnoha chybám při připojování. Odblokujete použitím 'mysqladmin flush-hosts'"
+        cze "Stroj '%-.64s' je zablokován kvůli mnoha chybám při připojování. Odblokujete použitím 'mysqladmin flush-hosts'"
         dan "Værten '%-.64s' er blokeret på grund af mange fejlforespørgsler. Lås op med 'mysqladmin flush-hosts'"
         nla "Host '%-.64s' is geblokkeeerd vanwege te veel verbindings fouten. Deblokkeer met 'mysqladmin flush-hosts'"
         eng "Host '%-.64s' is blocked because of many connection errors; unblock with 'mysqladmin flush-hosts'"
-        jps "Host '%-.64s' は many connection error のため、拒否されました.  'mysqladmin flush-hosts' で解除してください",
         est "Masin '%-.64s' on blokeeritud hulgaliste ühendusvigade tõttu. Blokeeringu saab tühistada 'mysqladmin flush-hosts' käsuga"
         fre "L'hôte '%-.64s' est bloqué à cause d'un trop grand nombre d'erreur de connexion. Débloquer le par 'mysqladmin flush-hosts'"
         ger "Host '%-.64s' blockiert wegen zu vieler Verbindungsfehler. Aufheben der Blockierung mit 'mysqladmin flush-hosts'"
         greek "Ο υπολογιστής '%-.64s' έχει αποκλεισθεί λόγω πολλαπλών λαθών σύνδεσης. Προσπαθήστε να διορώσετε με 'mysqladmin flush-hosts'"
         hun "A '%-.64s' host blokkolodott, tul sok kapcsolodasi hiba miatt. Hasznalja a 'mysqladmin flush-hosts' parancsot"
         ita "Sistema '%-.64s' bloccato a causa di troppi errori di connessione. Per sbloccarlo: 'mysqladmin flush-hosts'"
-        jpn "Host '%-.64s' は many connection error のため、拒否されました.  'mysqladmin flush-hosts' で解除してください"
+        jpn "接続エラーが多いため、ホスト '%-.64s' は拒否されました。'mysqladmin flush-hosts' で解除できます。"
         kor "너무 많은 연결오류로 인하여 호스트 '%-.64s'는 블락되었습니다. 'mysqladmin flush-hosts'를 이용하여 블락을 해제하세요"
         por "'Host' '%-.64s' está bloqueado devido a muitos erros de conexão. Desbloqueie com 'mysqladmin flush-hosts'"
         rum "Host-ul '%-.64s' e blocat din cauza multelor erori de conectie. Poti deploca folosind 'mysqladmin flush-hosts'"
@@ -3048,18 +2982,17 @@ ER_HOST_IS_BLOCKED
         swe "Denna dator, '%-.64s', är blockerad pga många felaktig paket. Gör 'mysqladmin flush-hosts' för att ta bort alla blockeringarna"
         ukr "Хост '%-.64s' заблоковано з причини великої кількості помилок з'єднання. Для розблокування використовуйте 'mysqladmin flush-hosts'"
 ER_HOST_NOT_PRIVILEGED  
-        cze "Stroj '%-.64s' nem-Bá povoleno se k tomuto MariaDB serveru připojit"
+        cze "Stroj '%-.64s' nemá povoleno se k tomuto MySQL serveru připojit"
         dan "Værten '%-.64s' kan ikke tilkoble denne MariaDB-server"
         nla "Het is host '%-.64s' is niet toegestaan verbinding te maken met deze MariaDB server"
         eng "Host '%-.64s' is not allowed to connect to this MariaDB server"
-        jps "Host '%-.64s' は MariaDB server に接続を許可されていません",
         est "Masinal '%-.64s' puudub ligipääs sellele MariaDB serverile"
         fre "Le hôte '%-.64s' n'est pas authorisé à se connecter à ce serveur MariaDB"
         ger "Host '%-.64s' hat keine Berechtigung, sich mit diesem MariaDB-Server zu verbinden"
         greek "Ο υπολογιστής '%-.64s' δεν έχει δικαίωμα σύνδεσης με τον MariaDB server"
         hun "A '%-.64s' host szamara nem engedelyezett a kapcsolodas ehhez a MariaDB szerverhez"
         ita "Al sistema '%-.64s' non e` consentita la connessione a questo server MariaDB"
-        jpn "Host '%-.64s' は MariaDB server に接続を許可されていません"
+        jpn "ホスト '%-.64s' からのこの MySQL server への接続は許可されていません。"
         kor "'%-.64s' 호스트는 이 MariaDB서버에 접속할 허가를 받지 못했습니다."
         por "'Host' '%-.64s' não tem permissão para se conectar com este servidor MariaDB"
         rum "Host-ul '%-.64s' nu este permis a se conecta la aceste server MariaDB"
@@ -3069,18 +3002,17 @@ ER_HOST_NOT_PRIVILEGED
         swe "Denna dator, '%-.64s', har inte privileger att använda denna MariaDB server"
         ukr "Хосту '%-.64s' не доволено зв'язуватись з цим сервером MariaDB"
 ER_PASSWORD_ANONYMOUS_USER 42000 
-        cze "Pou-Bžíváte MariaDB jako anonymní uživatel a anonymní uživatelé nemají povoleno měnit hesla"
+        cze "Používáte MySQL jako anonymní uživatel a anonymní uživatelé nemají povoleno měnit hesla"
         dan "Du bruger MariaDB som anonym bruger. Anonyme brugere må ikke ændre adgangskoder"
         nla "U gebruikt MariaDB als anonieme gebruiker en deze mogen geen wachtwoorden wijzigen"
         eng "You are using MariaDB as an anonymous user and anonymous users are not allowed to change passwords"
-        jps "MariaDB を anonymous users で使用している状態では、パスワードの変更はできません",
         est "Te kasutate MariaDB-i anonüümse kasutajana, kelledel pole parooli muutmise õigust"
         fre "Vous utilisez un utilisateur anonyme et les utilisateurs anonymes ne sont pas autorisés à changer les mots de passe"
         ger "Sie benutzen MariaDB als anonymer Benutzer und dürfen daher keine Passwörter ändern"
         greek "Χρησιμοποιείτε την MariaDB σαν anonymous user και έτσι δεν μπορείτε να αλλάξετε τα passwords άλλων χρηστών"
         hun "Nevtelen (anonymous) felhasznalokent nem negedelyezett a jelszovaltoztatas"
         ita "Impossibile cambiare la password usando MariaDB come utente anonimo"
-        jpn "MariaDB を anonymous users で使用している状態では、パスワードの変更はできません"
+        jpn "MySQL を匿名ユーザーで使用しているので、パスワードの変更はできません。"
         kor "당신은 MariaDB서버에 익명의 사용자로 접속을 하셨습니다.익명의 사용자는 암호를 변경할 수 없습니다."
         por "Você está usando o MariaDB como usuário anônimo e usuários anônimos não têm permissão para mudar senhas"
         rum "Dumneavoastra folositi MariaDB ca un utilizator anonim si utilizatorii anonimi nu au voie sa schime parolele"
@@ -3090,18 +3022,17 @@ ER_PASSWORD_ANONYMOUS_USER 42000
         swe "Du använder MariaDB som en anonym användare och som sådan får du inte ändra ditt lösenord"
         ukr "Ви використовуєте MariaDB як анонімний користувач, тому вам не дозволено змінювати паролі"
 ER_PASSWORD_NOT_ALLOWED 42000 
-        cze "Na zm-Běnu hesel ostatním musíte mít právo provést update tabulek v databázi mysql"
+        cze "Na změnu hesel ostatním musíte mít právo provést update tabulek v databázi mysql"
         dan "Du skal have tilladelse til at opdatere tabeller i MariaDB databasen for at ændre andres adgangskoder"
         nla "U moet tabel update priveleges hebben in de mysql database om wachtwoorden voor anderen te mogen wijzigen"
         eng "You must have privileges to update tables in the mysql database to be able to change passwords for others"
-        jps "他のユーザーのパスワードを変更するためには, mysql データベースに対して update の許可がなければなりません.",
         est "Teiste paroolide muutmiseks on nõutav tabelite muutmisõigus 'mysql' andmebaasis"
         fre "Vous devez avoir le privilège update sur les tables de la base de donnée mysql pour pouvoir changer les mots de passe des autres"
         ger "Sie benötigen die Berechtigung zum Aktualisieren von Tabellen in der Datenbank 'mysql', um die Passwörter anderer Benutzer ändern zu können"
         greek "Πρέπει να έχετε δικαίωμα διόρθωσης πινάκων (update) στη βάση δεδομένων mysql για να μπορείτε να αλλάξετε τα passwords άλλων χρηστών"
         hun "Onnek tabla-update joggal kell rendelkeznie a mysql adatbazisban masok jelszavanak megvaltoztatasahoz"
         ita "E` necessario il privilegio di update sulle tabelle del database mysql per cambiare le password per gli altri utenti"
-        jpn "他のユーザーのパスワードを変更するためには, mysql データベースに対して update の許可がなければなりません."
+        jpn "他のユーザーのパスワードを変更するためには、mysqlデータベースの表を更新する権限が必要です。"
         kor "당신은 다른사용자들의 암호를 변경할 수 있도록 데이타베이스 변경권한을 가져야 합니다."
         por "Você deve ter privilégios para atualizar tabelas no banco de dados mysql para ser capaz de mudar a senha de outros"
         rum "Trebuie sa aveti privilegii sa actualizati tabelele in bazele de date mysql ca sa puteti sa schimati parolele altora"
@@ -3111,7 +3042,7 @@ ER_PASSWORD_NOT_ALLOWED 42000
         swe "För att ändra lösenord för andra måste du ha rättigheter att uppdatera mysql-databasen"
         ukr "Ви повині мати право на оновлення таблиць у базі данних mysql, аби мати можливість змінювати пароль іншим"
 ER_PASSWORD_NO_MATCH 42000 
-        cze "V tabulce user nen-Bí žádný odpovídající řádek"
+        cze "V tabulce user není žádný odpovídající řádek"
         dan "Kan ikke finde nogen tilsvarende poster i bruger tabellen"
         nla "Kan geen enkele passende rij vinden in de gebruikers tabel"
         eng "Can't find any matching row in the user table"
@@ -3121,6 +3052,7 @@ ER_PASSWORD_NO_MATCH 42000
         greek "Δεν είναι δυνατή η ανεύρεση της αντίστοιχης εγγραφής στον πίνακα των χρηστών"
         hun "Nincs megegyezo sor a user tablaban"
         ita "Impossibile trovare la riga corrispondente nella tabella user"
+        jpn "ユーザーテーブルに該当するレコードが見つかりません。"
         kor "사용자 테이블에서 일치하는 것을 찾을 수 없읍니다."
         por "Não pode encontrar nenhuma linha que combine na tabela usuário (user table)"
         rum "Nu pot gasi nici o linie corespunzatoare in tabela utilizatorului"
@@ -3130,17 +3062,16 @@ ER_PASSWORD_NO_MATCH 42000
         swe "Hittade inte användaren i 'user'-tabellen"
         ukr "Не можу знайти відповідних записів у таблиці користувача"
 ER_UPDATE_INFO  
-        cze "Nalezen-Bých řádků: %ld  Změněno: %ld  Varování: %ld"
+        cze "Nalezených řádků: %ld  Změněno: %ld  Varování: %ld"
         dan "Poster fundet: %ld  Ændret: %ld  Advarsler: %ld"
         nla "Passende rijen: %ld  Gewijzigd: %ld  Waarschuwingen: %ld"
         eng "Rows matched: %ld  Changed: %ld  Warnings: %ld"
-        jps "一致数(Rows matched): %ld  変更: %ld  Warnings: %ld",
         est "Sobinud kirjeid: %ld  Muudetud: %ld  Hoiatusi: %ld"
         fre "Enregistrements correspondants: %ld  Modifiés: %ld  Warnings: %ld"
         ger "Datensätze gefunden: %ld  Geändert: %ld  Warnungen: %ld"
         hun "Megegyezo sorok szama: %ld  Valtozott: %ld  Warnings: %ld"
         ita "Rows riconosciute: %ld  Cambiate: %ld  Warnings: %ld"
-        jpn "一致数(Rows matched): %ld  変更: %ld  Warnings: %ld"
+        jpn "該当した行: %ld  変更: %ld  警告: %ld"
         kor "일치하는 Rows : %ld개 변경됨: %ld개  경고: %ld개"
         por "Linhas que combinaram: %ld - Alteradas: %ld - Avisos: %ld"
         rum "Linii identificate (matched): %ld  Schimbate: %ld  Atentionari (warnings): %ld"
@@ -3150,17 +3081,16 @@ ER_UPDATE_INFO
         swe "Rader: %ld  Uppdaterade: %ld  Varningar: %ld"
         ukr "Записів відповідає: %ld  Змінено: %ld  Застережень: %ld"
 ER_CANT_CREATE_THREAD  
-        cze "Nemohu vytvo-Břit nový thread (errno %M). Pokud je ještě nějaká volná paměť, podívejte se do manuálu na část o chybách specifických pro jednotlivé operační systémy"
+        cze "Nemohu vytvořit nový thread (errno %M). Pokud je ještě nějaká volná paměť, podívejte se do manuálu na část o chybách specifických pro jednotlivé operační systémy"
         dan "Kan ikke danne en ny tråd (fejl nr. %M). Hvis computeren ikke er løbet tør for hukommelse, kan du se i brugervejledningen for en mulig operativ-system - afhængig fejl"
         nla "Kan geen nieuwe thread aanmaken (Errcode: %M). Indien er geen tekort aan geheugen is kunt u de handleiding consulteren over een mogelijke OS afhankelijke fout"
         eng "Can't create a new thread (errno %M); if you are not out of available memory, you can consult the manual for a possible OS-dependent bug"
-        jps "新規にスレッドが作れませんでした (errno %M). もし最大使用許可メモリー数を越えていないのにエラーが発生しているなら, マニュアルの中から 'possible OS-dependent bug' という文字を探してくみてださい.",
         est "Ei suuda luua uut lõime (veakood %M). Kui mälu ei ole otsas, on tõenäoliselt tegemist operatsioonisüsteemispetsiifilise veaga"
         fre "Impossible de créer une nouvelle tâche (errno %M). S'il reste de la mémoire libre, consultez le manual pour trouver un éventuel bug dépendant de l'OS"
         ger "Kann keinen neuen Thread erzeugen (Fehler: %M). Sollte noch Speicher verfügbar sein, bitte im Handbuch wegen möglicher Fehler im Betriebssystem nachschlagen"
         hun "Uj thread letrehozasa nem lehetseges (Hibakod: %M). Amenyiben van meg szabad memoria, olvassa el a kezikonyv operacios rendszerfuggo hibalehetosegekrol szolo reszet"
         ita "Impossibile creare un nuovo thread (errno %M). Se non ci sono problemi di memoria disponibile puoi consultare il manuale per controllare possibili problemi dipendenti dal SO"
-        jpn "新規にスレッドが作れませんでした (errno %M). もし最大使用許可メモリー数を越えていないのにエラーが発生しているなら, マニュアルの中から 'possible OS-dependent bug' という文字を探してくみてださい."
+        jpn "新規にスレッドを作成できません。(エラー番号 %M) もしも使用可能メモリーの不足でなければ、OS依存のバグである可能性があります。"
         kor "새로운 쓰레드를 만들 수 없습니다.(에러번호 %M). 만약 여유메모리가 있다면 OS-dependent버그 의 메뉴얼 부분을 찾아보시오."
         nor "Can't create a new thread (errno %M); if you are not out of available memory you can consult the manual for any possible OS dependent bug"
         norwegian-ny "Can't create a new thread (errno %M); if you are not out of available memory you can consult the manual for any possible OS dependent bug"
@@ -3173,7 +3103,7 @@ ER_CANT_CREATE_THREAD
         swe "Kan inte skapa en ny tråd (errno %M)"
         ukr "Не можу створити нову гілку (помилка %M). Якщо ви не використали усю пам'ять, то прочитайте документацію до вашої ОС - можливо це помилка ОС"
 ER_WRONG_VALUE_COUNT_ON_ROW 21S01 
-        cze "Po-Bčet sloupců neodpovídá počtu hodnot na řádku %lu"
+        cze "Počet sloupců neodpovídá počtu hodnot na řádku %lu"
         dan "Kolonne antallet stemmer ikke overens med antallet af værdier i post %lu"
         nla "Kolom aantal komt niet overeen met waarde aantal in rij %lu"
         eng "Column count doesn't match value count at row %lu"
@@ -3181,6 +3111,7 @@ ER_WRONG_VALUE_COUNT_ON_ROW 21S01
         ger "Anzahl der Felder stimmt nicht mit der Anzahl der Werte in Zeile %lu überein"
         hun "Az oszlopban talalhato ertek nem egyezik meg a %lu sorban szamitott ertekkel"
         ita "Il numero delle colonne non corrisponde al conteggio alla riga %lu"
+        jpn "%lu 行目で、列の数が値の数と一致しません。"
         kor "Row %lu에서 칼럼 카운트와 value 카운터와 일치하지 않습니다."
         por "Contagem de colunas não confere com a contagem de valores na linha %lu"
         rum "Numarul de coloane nu corespunde cu numarul de valori la linia %lu"
@@ -3190,7 +3121,7 @@ ER_WRONG_VALUE_COUNT_ON_ROW 21S01
         swe "Antalet kolumner motsvarar inte antalet värden på rad: %lu"
         ukr "Кількість стовбців не співпадає з кількістю значень у строці %lu"
 ER_CANT_REOPEN_TABLE  
-        cze "Nemohu znovuotev-Břít tabulku: '%-.192s"
+        cze "Nemohu znovuotevřít tabulku: '%-.192s"
         dan "Kan ikke genåbne tabel '%-.192s"
         nla "Kan tabel niet opnieuw openen: '%-.192s"
         eng "Can't reopen table: '%-.192s'"
@@ -3199,6 +3130,7 @@ ER_CANT_REOPEN_TABLE
         ger "Kann Tabelle'%-.192s' nicht erneut öffnen"
         hun "Nem lehet ujra-megnyitni a tablat: '%-.192s"
         ita "Impossibile riaprire la tabella: '%-.192s'"
+        jpn "表を再オープンできません。: '%-.192s'"
         kor "테이블을 다시 열수 없군요: '%-.192s"
         nor "Can't reopen table: '%-.192s"
         norwegian-ny "Can't reopen table: '%-.192s"
@@ -3212,17 +3144,16 @@ ER_CANT_REOPEN_TABLE
         swe "Kunde inte stänga och öppna tabell '%-.192s"
         ukr "Не можу перевідкрити таблицю: '%-.192s'"
 ER_INVALID_USE_OF_NULL 22004 
-        cze "Neplatn-Bé užití hodnoty NULL"
+        cze "Neplatné užití hodnoty NULL"
         dan "Forkert brug af nulværdi (NULL)"
         nla "Foutief gebruik van de NULL waarde"
         eng "Invalid use of NULL value"
-        jps "NULL 値の使用方法が不適切です",
         est "NULL väärtuse väärkasutus"
         fre "Utilisation incorrecte de la valeur NULL"
         ger "Unerlaubte Verwendung eines NULL-Werts"
         hun "A NULL ervenytelen hasznalata"
         ita "Uso scorretto del valore NULL"
-        jpn "NULL 値の使用方法が不適切です"
+        jpn "NULL 値の使用方法が不適切です。"
         kor "NULL 값을 잘못 사용하셨군요..."
         por "Uso inválido do valor NULL"
         rum "Folosirea unei value NULL e invalida"
@@ -3232,7 +3163,7 @@ ER_INVALID_USE_OF_NULL 22004
         swe "Felaktig använding av NULL"
         ukr "Хибне використання значення NULL"
 ER_REGEXP_ERROR 42000 
-        cze "Regul-Bární výraz vrátil chybu '%-.64s'"
+        cze "Regulární výraz vrátil chybu '%-.64s'"
         dan "Fik fejl '%-.64s' fra regexp"
         nla "Fout '%-.64s' ontvangen van regexp"
         eng "Got error '%-.64s' from regexp"
@@ -3241,6 +3172,7 @@ ER_REGEXP_ERROR 42000
         ger "regexp lieferte Fehler '%-.64s'"
         hun "'%-.64s' hiba a regularis kifejezes hasznalata soran (regexp)"
         ita "Errore '%-.64s' da regexp"
+        jpn "regexp がエラー '%-.64s' を返しました。"
         kor "regexp에서 '%-.64s'가 났습니다."
         por "Obteve erro '%-.64s' em regexp"
         rum "Eroarea '%-.64s' obtinuta din expresia regulara (regexp)"
@@ -3250,7 +3182,7 @@ ER_REGEXP_ERROR 42000
         swe "Fick fel '%-.64s' från REGEXP"
         ukr "Отримано помилку '%-.64s' від регулярного виразу"
 ER_MIX_OF_GROUP_FUNC_AND_FIELDS 42000 
-        cze "Pokud nen-Bí žádná GROUP BY klauzule, není dovoleno současné použití GROUP položek (MIN(),MAX(),COUNT()...) s ne GROUP položkami"
+        cze "Pokud není žádná GROUP BY klauzule, není dovoleno současné použití GROUP položek (MIN(),MAX(),COUNT()...) s ne GROUP položkami"
         dan "Sammenblanding af GROUP kolonner (MIN(),MAX(),COUNT()...) uden GROUP kolonner er ikke tilladt, hvis der ikke er noget GROUP BY prædikat"
         nla "Het mixen van GROUP kolommen (MIN(),MAX(),COUNT()...) met no-GROUP kolommen is foutief indien er geen GROUP BY clausule is"
         eng "Mixing of GROUP columns (MIN(),MAX(),COUNT(),...) with no GROUP columns is illegal if there is no GROUP BY clause"
@@ -3259,6 +3191,7 @@ ER_MIX_OF_GROUP_FUNC_AND_FIELDS 42000
         ger "Das Vermischen von GROUP-Feldern (MIN(),MAX(),COUNT()...) mit Nicht-GROUP-Feldern ist nicht zulässig, wenn keine GROUP-BY-Klausel vorhanden ist"
         hun "A GROUP mezok (MIN(),MAX(),COUNT()...) kevert hasznalata nem lehetseges GROUP BY hivatkozas nelkul"
         ita "Il mescolare funzioni di aggregazione (MIN(),MAX(),COUNT()...) e non e` illegale se non c'e` una clausula GROUP BY"
+        jpn "GROUP BY句が無い場合、集計関数(MIN(),MAX(),COUNT(),...)と通常の列を同時に使用できません。"
         kor "Mixing of GROUP 칼럼s (MIN(),MAX(),COUNT(),...) with no GROUP 칼럼s is illegal if there is no GROUP BY clause"
         por "Mistura de colunas agrupadas (com MIN(), MAX(), COUNT(), ...) com colunas não agrupadas é ilegal, se não existir uma cláusula de agrupamento (cláusula GROUP BY)"
         rum "Amestecarea de coloane GROUP (MIN(),MAX(),COUNT()...) fara coloane GROUP este ilegala daca nu exista o clauza GROUP BY"
@@ -3268,17 +3201,16 @@ ER_MIX_OF_GROUP_FUNC_AND_FIELDS 42000
         swe "Man får ha både GROUP-kolumner (MIN(),MAX(),COUNT()...) och fält i en fråga om man inte har en GROUP BY-del"
         ukr "Змішування GROUP стовбців (MIN(),MAX(),COUNT()...) з не GROUP стовбцями є забороненим, якщо не має GROUP BY"
 ER_NONEXISTING_GRANT 42000 
-        cze "Neexistuje odpov-Bídající grant pro uživatele '%-.48s' na stroji '%-.64s'"
+        cze "Neexistuje odpovídající grant pro uživatele '%-.48s' na stroji '%-.64s'"
         dan "Denne tilladelse findes ikke for brugeren '%-.48s' på vært '%-.64s'"
         nla "Deze toegang (GRANT) is niet toegekend voor gebruiker '%-.48s' op host '%-.64s'"
         eng "There is no such grant defined for user '%-.48s' on host '%-.64s'"
-        jps "ユーザー '%-.48s' (ホスト '%-.64s' のユーザー) は許可されていません",
         est "Sellist õigust ei ole defineeritud kasutajale '%-.48s' masinast '%-.64s'"
         fre "Un tel droit n'est pas défini pour l'utilisateur '%-.48s' sur l'hôte '%-.64s'"
         ger "Für Benutzer '%-.48s' auf Host '%-.64s' gibt es keine solche Berechtigung"
         hun "A '%-.48s' felhasznalonak nincs ilyen joga a '%-.64s' host-on"
         ita "GRANT non definita per l'utente '%-.48s' dalla macchina '%-.64s'"
-        jpn "ユーザー '%-.48s' (ホスト '%-.64s' のユーザー) は許可されていません"
+        jpn "ユーザー '%-.48s' (ホスト '%-.64s' 上) は許可されていません。"
         kor "사용자 '%-.48s' (호스트 '%-.64s')를 위하여 정의된 그런 승인은 없습니다."
         por "Não existe tal permissão (grant) definida para o usuário '%-.48s' no 'host' '%-.64s'"
         rum "Nu exista un astfel de grant definit pentru utilzatorul '%-.48s' de pe host-ul '%-.64s'"
@@ -3288,17 +3220,16 @@ ER_NONEXISTING_GRANT 42000
         swe "Det finns inget privilegium definierat för användare '%-.48s' på '%-.64s'"
         ukr "Повноважень не визначено для користувача '%-.48s' з хосту '%-.64s'"
 ER_TABLEACCESS_DENIED_ERROR 42000 
-        cze "%-.128s p-Bříkaz nepřístupný pro uživatele: '%-.48s'@'%-.64s' pro tabulku '%-.192s'"
+        cze "%-.128s příkaz nepřístupný pro uživatele: '%-.48s'@'%-.64s' pro tabulku '%-.192s'"
         dan "%-.128s-kommandoen er ikke tilladt for brugeren '%-.48s'@'%-.64s' for tabellen '%-.192s'"
         nla "%-.128s commando geweigerd voor gebruiker: '%-.48s'@'%-.64s' voor tabel '%-.192s'"
         eng "%-.128s command denied to user '%-.48s'@'%-.64s' for table '%-.192s'"
-        jps "コマンド %-.128s は ユーザー '%-.48s'@'%-.64s' ,テーブル '%-.192s' に対して許可されていません",
         est "%-.128s käsk ei ole lubatud kasutajale '%-.48s'@'%-.64s' tabelis '%-.192s'"
         fre "La commande '%-.128s' est interdite à l'utilisateur: '%-.48s'@'@%-.64s' sur la table '%-.192s'"
         ger "%-.128s Befehl nicht erlaubt für Benutzer '%-.48s'@'%-.64s' auf Tabelle '%-.192s'"
         hun "%-.128s parancs a '%-.48s'@'%-.64s' felhasznalo szamara nem engedelyezett a '%-.192s' tablaban"
         ita "Comando %-.128s negato per l'utente: '%-.48s'@'%-.64s' sulla tabella '%-.192s'"
-        jpn "コマンド %-.128s は ユーザー '%-.48s'@'%-.64s' ,テーブル '%-.192s' に対して許可されていません"
+        jpn "コマンド %-.128s は ユーザー '%-.48s'@'%-.64s' の表 '%-.192s' の使用に関して許可されていません。"
         kor "'%-.128s' 명령은 다음 사용자에게 거부되었습니다. : '%-.48s'@'%-.64s' for 테이블 '%-.192s'"
         por "Comando '%-.128s' negado para o usuário '%-.48s'@'%-.64s' na tabela '%-.192s'"
         rum "Comanda %-.128s interzisa utilizatorului: '%-.48s'@'%-.64s' pentru tabela '%-.192s'"
@@ -3308,17 +3239,16 @@ ER_TABLEACCESS_DENIED_ERROR 42000
         swe "%-.128s ej tillåtet för '%-.48s'@'%-.64s' för tabell '%-.192s'"
         ukr "%-.128s команда заборонена користувачу: '%-.48s'@'%-.64s' у таблиці '%-.192s'"
 ER_COLUMNACCESS_DENIED_ERROR 42000 
-        cze "%-.128s p-Bříkaz nepřístupný pro uživatele: '%-.48s'@'%-.64s' pro sloupec '%-.192s' v tabulce '%-.192s'"
+        cze "%-.128s příkaz nepřístupný pro uživatele: '%-.48s'@'%-.64s' pro sloupec '%-.192s' v tabulce '%-.192s'"
         dan "%-.128s-kommandoen er ikke tilladt for brugeren '%-.48s'@'%-.64s' for kolonne '%-.192s' in tabellen '%-.192s'"
         nla "%-.128s commando geweigerd voor gebruiker: '%-.48s'@'%-.64s' voor kolom '%-.192s' in tabel '%-.192s'"
         eng "%-.128s command denied to user '%-.48s'@'%-.64s' for column '%-.192s' in table '%-.192s'"
-        jps "コマンド %-.128s は ユーザー '%-.48s'@'%-.64s'¥n カラム '%-.192s' テーブル '%-.192s' に対して許可されていません",
         est "%-.128s käsk ei ole lubatud kasutajale '%-.48s'@'%-.64s' tulbale '%-.192s' tabelis '%-.192s'"
         fre "La commande '%-.128s' est interdite à l'utilisateur: '%-.48s'@'@%-.64s' sur la colonne '%-.192s' de la table '%-.192s'"
         ger "%-.128s Befehl nicht erlaubt für Benutzer '%-.48s'@'%-.64s' und Feld '%-.192s' in Tabelle '%-.192s'"
         hun "%-.128s parancs a '%-.48s'@'%-.64s' felhasznalo szamara nem engedelyezett a '%-.192s' mezo eseten a '%-.192s' tablaban"
         ita "Comando %-.128s negato per l'utente: '%-.48s'@'%-.64s' sulla colonna '%-.192s' della tabella '%-.192s'"
-        jpn "コマンド %-.128s は ユーザー '%-.48s'@'%-.64s'\n カラム '%-.192s' テーブル '%-.192s' に対して許可されていません"
+        jpn "コマンド %-.128s は ユーザー '%-.48s'@'%-.64s'\n の列 '%-.192s'(表 '%-.192s') の利用に関して許可されていません。"
         kor "'%-.128s' 명령은 다음 사용자에게 거부되었습니다. : '%-.48s'@'%-.64s' for 칼럼 '%-.192s' in 테이블 '%-.192s'"
         por "Comando '%-.128s' negado para o usuário '%-.48s'@'%-.64s' na coluna '%-.192s', na tabela '%-.192s'"
         rum "Comanda %-.128s interzisa utilizatorului: '%-.48s'@'%-.64s' pentru coloana '%-.192s' in tabela '%-.192s'"
@@ -3328,7 +3258,7 @@ ER_COLUMNACCESS_DENIED_ERROR 42000
         swe "%-.128s ej tillåtet för '%-.48s'@'%-.64s' för kolumn '%-.192s' i tabell '%-.192s'"
         ukr "%-.128s команда заборонена користувачу: '%-.48s'@'%-.64s' для стовбця '%-.192s' у таблиці '%-.192s'"
 ER_ILLEGAL_GRANT_FOR_TABLE 42000 
-        cze "Neplatn-Bý příkaz GRANT/REVOKE. Prosím, přečtěte si v manuálu, jaká privilegia je možné použít."
+        cze "Neplatný příkaz GRANT/REVOKE. Prosím, přečtěte si v manuálu, jaká privilegia je možné použít."
         dan "Forkert GRANT/REVOKE kommando. Se i brugervejledningen hvilke privilegier der kan specificeres."
         nla "Foutief GRANT/REVOKE commando. Raadpleeg de handleiding welke priveleges gebruikt kunnen worden."
         eng "Illegal GRANT/REVOKE command; please consult the manual to see which privileges can be used"
@@ -3338,7 +3268,7 @@ ER_ILLEGAL_GRANT_FOR_TABLE 42000
         greek "Illegal GRANT/REVOKE command; please consult the manual to see which privileges can be used."
         hun "Ervenytelen GRANT/REVOKE parancs. Kerem, nezze meg a kezikonyvben, milyen jogok lehetsegesek"
         ita "Comando GRANT/REVOKE illegale. Prego consultare il manuale per sapere quali privilegi possono essere usati."
-        jpn "Illegal GRANT/REVOKE command; please consult the manual to see which privleges can be used."
+        jpn "不正な GRANT/REVOKE コマンドです。どの権限で利用可能かはマニュアルを参照して下さい。"
         kor "잘못된 GRANT/REVOKE 명령. 어떤 권리와 승인이 사용되어 질 수 있는지 메뉴얼을 보시오."
         nor "Illegal GRANT/REVOKE command; please consult the manual to see which privleges can be used."
         norwegian-ny "Illegal GRANT/REVOKE command; please consult the manual to see which privleges can be used."
@@ -3352,7 +3282,7 @@ ER_ILLEGAL_GRANT_FOR_TABLE 42000
         swe "Felaktigt GRANT-privilegium använt"
         ukr "Хибна GRANT/REVOKE команда; прочитайте документацію стосовно того, які права можна використовувати"
 ER_GRANT_WRONG_HOST_OR_USER 42000 
-        cze "Argument p-Bříkazu GRANT uživatel nebo stroj je příliš dlouhý"
+        cze "Argument příkazu GRANT uživatel nebo stroj je příliš dlouhý"
         dan "Værts- eller brugernavn for langt til GRANT"
         nla "De host of gebruiker parameter voor GRANT is te lang"
         eng "The host or user argument to GRANT is too long"
@@ -3361,6 +3291,7 @@ ER_GRANT_WRONG_HOST_OR_USER 42000
         ger "Das Host- oder User-Argument für GRANT ist zu lang"
         hun "A host vagy felhasznalo argumentuma tul hosszu a GRANT parancsban"
         ita "L'argomento host o utente per la GRANT e` troppo lungo"
+        jpn "GRANTコマンドへの、ホスト名やユーザー名が長すぎます。"
         kor "승인(GRANT)을 위하여 사용한 사용자나 호스트의 값들이 너무 깁니다."
         por "Argumento de 'host' ou de usuário para o GRANT é longo demais"
         rum "Argumentul host-ului sau utilizatorului pentru GRANT e prea lung"
@@ -3379,7 +3310,7 @@ ER_NO_SUCH_TABLE 42S02
         ger "Tabelle '%-.192s.%-.192s' existiert nicht"
         hun "A '%-.192s.%-.192s' tabla nem letezik"
         ita "La tabella '%-.192s.%-.192s' non esiste"
-        jpn "Table '%-.192s.%-.192s' doesn't exist"
+        jpn "表 '%-.192s.%-.192s' は存在しません。"
         kor "테이블 '%-.192s.%-.192s' 는 존재하지 않습니다."
         nor "Table '%-.192s.%-.192s' doesn't exist"
         norwegian-ny "Table '%-.192s.%-.192s' doesn't exist"
@@ -3393,7 +3324,7 @@ ER_NO_SUCH_TABLE 42S02
         swe "Det finns ingen tabell som heter '%-.192s.%-.192s'"
         ukr "Таблиця '%-.192s.%-.192s' не існує"
 ER_NONEXISTING_TABLE_GRANT 42000 
-        cze "Neexistuje odpov-Bídající grant pro uživatele '%-.48s' na stroji '%-.64s' pro tabulku '%-.192s'"
+        cze "Neexistuje odpovídající grant pro uživatele '%-.48s' na stroji '%-.64s' pro tabulku '%-.192s'"
         dan "Denne tilladelse eksisterer ikke for brugeren '%-.48s' på vært '%-.64s' for tabellen '%-.192s'"
         nla "Deze toegang (GRANT) is niet toegekend voor gebruiker '%-.48s' op host '%-.64s' op tabel '%-.192s'"
         eng "There is no such grant defined for user '%-.48s' on host '%-.64s' on table '%-.192s'"
@@ -3402,6 +3333,7 @@ ER_NONEXISTING_TABLE_GRANT 42000
         ger "Eine solche Berechtigung ist für User '%-.48s' auf Host '%-.64s' an Tabelle '%-.192s' nicht definiert"
         hun "A '%-.48s' felhasznalo szamara a '%-.64s' host '%-.192s' tablajaban ez a parancs nem engedelyezett"
         ita "GRANT non definita per l'utente '%-.48s' dalla macchina '%-.64s' sulla tabella '%-.192s'"
+        jpn "ユーザー '%-.48s' (ホスト '%-.64s' 上) の表 '%-.192s' への権限は定義されていません。"
         kor "사용자 '%-.48s'(호스트 '%-.64s')는 테이블 '%-.192s'를 사용하기 위하여 정의된 승인은 없습니다. "
         por "Não existe tal permissão (grant) definido para o usuário '%-.48s' no 'host' '%-.64s', na tabela '%-.192s'"
         rum "Nu exista un astfel de privilegiu (grant) definit pentru utilizatorul '%-.48s' de pe host-ul '%-.64s' pentru tabela '%-.192s'"
@@ -3411,7 +3343,7 @@ ER_NONEXISTING_TABLE_GRANT 42000
         swe "Det finns inget privilegium definierat för användare '%-.48s' på '%-.64s' för tabell '%-.192s'"
         ukr "Повноважень не визначено для користувача '%-.48s' з хосту '%-.64s' для таблиці '%-.192s'"
 ER_NOT_ALLOWED_COMMAND 42000 
-        cze "Pou-Bžitý příkaz není v této verzi MariaDB povolen"
+        cze "Použitý příkaz není v této verzi MySQL povolen"
         dan "Den brugte kommando er ikke tilladt med denne udgave af MariaDB"
         nla "Het used commando is niet toegestaan in deze MariaDB versie"
         eng "The used command is not allowed with this MariaDB version"
@@ -3420,6 +3352,7 @@ ER_NOT_ALLOWED_COMMAND 42000
         ger "Der verwendete Befehl ist in dieser MariaDB-Version nicht zulässig"
         hun "A hasznalt parancs nem engedelyezett ebben a MariaDB verzioban"
         ita "Il comando utilizzato non e` supportato in questa versione di MariaDB"
+        jpn "このMySQLバージョンでは利用できないコマンドです。"
         kor "사용된 명령은 현재의 MariaDB 버젼에서는 이용되지 않습니다."
         por "Comando usado não é permitido para esta versão do MariaDB"
         rum "Comanda folosita nu este permisa pentru aceasta versiune de MariaDB"
@@ -3429,7 +3362,7 @@ ER_NOT_ALLOWED_COMMAND 42000
         swe "Du kan inte använda detta kommando med denna MariaDB version"
         ukr "Використовувана команда не дозволена у цій версії MariaDB"
 ER_SYNTAX_ERROR 42000 
-        cze "Va-Bše syntaxe je nějaká divná"
+        cze "Vaše syntaxe je nějaká divná"
         dan "Der er en fejl i SQL syntaksen"
         nla "Er is iets fout in de gebruikte syntax"
         eng "You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use"
@@ -3439,7 +3372,7 @@ ER_SYNTAX_ERROR 42000
         greek "You have an error in your SQL syntax"
         hun "Szintaktikai hiba"
         ita "Errore di sintassi nella query SQL"
-        jpn "Something is wrong in your syntax"
+        jpn "SQL構文エラーです。バージョンに対応するマニュアルを参照して正しい構文を確認してください。"
         kor "SQL 구문에 오류가 있습니다."
         nor "Something is wrong in your syntax"
         norwegian-ny "Something is wrong in your syntax"
@@ -3453,7 +3386,7 @@ ER_SYNTAX_ERROR 42000
         swe "Du har något fel i din syntax"
         ukr "У вас помилка у синтаксисі SQL"
 ER_DELAYED_CANT_CHANGE_LOCK  
-        cze "Zpo-Bžděný insert threadu nebyl schopen získat požadovaný zámek pro tabulku %-.192s"
+        cze "Zpožděný insert threadu nebyl schopen získat požadovaný zámek pro tabulku %-.192s"
         dan "Forsinket indsættelse tråden (delayed insert thread) kunne ikke opnå lås på tabellen %-.192s"
         nla "'Delayed insert' thread kon de aangevraagde 'lock' niet krijgen voor tabel %-.192s"
         eng "Delayed insert thread couldn't get requested lock for table %-.192s"
@@ -3462,6 +3395,7 @@ ER_DELAYED_CANT_CHANGE_LOCK
         ger "Verzögerter (DELAYED) Einfüge-Thread konnte die angeforderte Sperre für Tabelle '%-.192s' nicht erhalten"
         hun "A kesleltetett beillesztes (delayed insert) thread nem kapott zatolast a %-.192s tablahoz"
         ita "Il thread di inserimento ritardato non riesce ad ottenere il lock per la tabella %-.192s"
+        jpn "'Delayed insert'スレッドが表 '%-.192s' のロックを取得できませんでした。"
         kor "지연된 insert 쓰레드가 테이블 %-.192s의 요구된 락킹을 처리할 수 없었습니다."
         por "'Thread' de inserção retardada (atrasada) pois não conseguiu obter a trava solicitada para tabela '%-.192s'"
         rum "Thread-ul pentru inserarea aminata nu a putut obtine lacatul (lock) pentru tabela %-.192s"
@@ -3471,7 +3405,7 @@ ER_DELAYED_CANT_CHANGE_LOCK
         swe "DELAYED INSERT-tråden kunde inte låsa tabell '%-.192s'"
         ukr "Гілка для INSERT DELAYED не може отримати блокування для таблиці %-.192s"
 ER_TOO_MANY_DELAYED_THREADS  
-        cze "P-Bříliš mnoho zpožděných threadů"
+        cze "Příliš mnoho zpožděných threadů"
         dan "For mange slettede tråde (threads) i brug"
         nla "Te veel 'delayed' threads in gebruik"
         eng "Too many delayed threads in use"
@@ -3480,6 +3414,7 @@ ER_TOO_MANY_DELAYED_THREADS
         ger "Zu viele verzögerte (DELAYED) Threads in Verwendung"
         hun "Tul sok kesletetett thread (delayed)"
         ita "Troppi threads ritardati in uso"
+        jpn "'Delayed insert'スレッドが多すぎます。"
         kor "너무 많은 지연 쓰레드를 사용하고 있습니다."
         por "Excesso de 'threads' retardadas (atrasadas) em uso"
         rum "Prea multe threaduri aminate care sint in uz"
@@ -3489,7 +3424,7 @@ ER_TOO_MANY_DELAYED_THREADS
         swe "Det finns redan 'max_delayed_threads' trådar i använding"
         ukr "Забагато затриманих гілок використовується"
 ER_ABORTING_CONNECTION 08S01 
-        cze "Zru-Bšeno spojení %ld do databáze: '%-.192s' uživatel: '%-.48s' (%-.64s)"
+        cze "Zrušeno spojení %ld do databáze: '%-.192s' uživatel: '%-.48s' (%-.64s)"
         dan "Afbrudt forbindelse %ld til database: '%-.192s' bruger: '%-.48s' (%-.64s)"
         nla "Afgebroken verbinding %ld naar db: '%-.192s' gebruiker: '%-.48s' (%-.64s)"
         eng "Aborted connection %ld to db: '%-.192s' user: '%-.48s' (%-.64s)"
@@ -3498,7 +3433,7 @@ ER_ABORTING_CONNECTION 08S01
         ger "Abbruch der Verbindung %ld zur Datenbank '%-.192s'. Benutzer: '%-.48s' (%-.64s)"
         hun "Megszakitott kapcsolat %ld db: '%-.192s' adatbazishoz, felhasznalo: '%-.48s' (%-.64s)"
         ita "Interrotta la connessione %ld al db: '%-.192s' utente: '%-.48s' (%-.64s)"
-        jpn "Aborted connection %ld to db: '%-.192s' user: '%-.48s' (%-.64s)"
+        jpn "接続 %ld が中断されました。データベース: '%-.192s' ユーザー: '%-.48s' (%-.64s)"
         kor "데이타베이스 접속을 위한 연결 %ld가 중단됨 : '%-.192s' 사용자: '%-.48s' (%-.64s)"
         nor "Aborted connection %ld to db: '%-.192s' user: '%-.48s' (%-.64s)"
         norwegian-ny "Aborted connection %ld to db: '%-.192s' user: '%-.48s' (%-.64s)"
@@ -3512,7 +3447,7 @@ ER_ABORTING_CONNECTION 08S01
         swe "Avbröt länken för tråd %ld till db '%-.192s', användare '%-.48s' (%-.64s)"
         ukr "Перервано з'єднання %ld до бази данних: '%-.192s' користувача: '%-.48s' (%-.64s)"
 ER_NET_PACKET_TOO_LARGE 08S01 
-        cze "Zji-Bštěn příchozí packet delší než 'max_allowed_packet'"
+        cze "Zjištěn příchozí packet delší než 'max_allowed_packet'"
         dan "Modtog en datapakke som var større end 'max_allowed_packet'"
         nla "Groter pakket ontvangen dan 'max_allowed_packet'"
         eng "Got a packet bigger than 'max_allowed_packet' bytes"
@@ -3521,6 +3456,7 @@ ER_NET_PACKET_TOO_LARGE 08S01
         ger "Empfangenes Paket ist größer als 'max_allowed_packet' Bytes"
         hun "A kapott csomag nagyobb, mint a maximalisan engedelyezett: 'max_allowed_packet'"
         ita "Ricevuto un pacchetto piu` grande di 'max_allowed_packet'"
+        jpn "'max_allowed_packet'よりも大きなパケットを受信しました。"
         kor "'max_allowed_packet'보다 더큰 패킷을 받았습니다."
         por "Obteve um pacote maior do que a taxa máxima de pacotes definida (max_allowed_packet)"
         rum "Un packet mai mare decit 'max_allowed_packet' a fost primit"
@@ -3530,7 +3466,7 @@ ER_NET_PACKET_TOO_LARGE 08S01
         swe "Kommunkationspaketet är större än 'max_allowed_packet'"
         ukr "Отримано пакет більший ніж max_allowed_packet"
 ER_NET_READ_ERROR_FROM_PIPE 08S01 
-        cze "Zji-Bštěna chyba při čtení z roury spojení"
+        cze "Zjištěna chyba při čtení z roury spojení"
         dan "Fik læsefejl fra forbindelse (connection pipe)"
         nla "Kreeg leesfout van de verbindings pipe"
         eng "Got a read error from the connection pipe"
@@ -3539,6 +3475,7 @@ ER_NET_READ_ERROR_FROM_PIPE 08S01
         ger "Lese-Fehler bei einer Verbindungs-Pipe"
         hun "Olvasasi hiba a kapcsolat soran"
         ita "Rilevato un errore di lettura dalla pipe di connessione"
+        jpn "接続パイプの読み込みエラーです。"
         kor "연결 파이프로부터 에러가 발생하였습니다."
         por "Obteve um erro de leitura no 'pipe' da conexão"
         rum "Eroare la citire din cauza lui 'connection pipe'"
@@ -3548,7 +3485,7 @@ ER_NET_READ_ERROR_FROM_PIPE 08S01
         swe "Fick läsfel från klienten vid läsning från 'PIPE'"
         ukr "Отримано помилку читання з комунікаційного каналу"
 ER_NET_FCNTL_ERROR 08S01 
-        cze "Zji-Bštěna chyba fcntl()"
+        cze "Zjištěna chyba fcntl()"
         dan "Fik fejlmeddelelse fra fcntl()"
         nla "Kreeg fout van fcntl()"
         eng "Got an error from fcntl()"
@@ -3557,6 +3494,7 @@ ER_NET_FCNTL_ERROR 08S01
         ger "fcntl() lieferte einen Fehler"
         hun "Hiba a fcntl() fuggvenyben"
         ita "Rilevato un errore da fcntl()"
+        jpn "fcntl()がエラーを返しました。"
         kor "fcntl() 함수로부터 에러가 발생하였습니다."
         por "Obteve um erro em fcntl()"
         rum "Eroare obtinuta de la fcntl()"
@@ -3566,7 +3504,7 @@ ER_NET_FCNTL_ERROR 08S01
         swe "Fick fatalt fel från 'fcntl()'"
         ukr "Отримано помилкку від fcntl()"
 ER_NET_PACKETS_OUT_OF_ORDER 08S01 
-        cze "P-Bříchozí packety v chybném pořadí"
+        cze "Příchozí packety v chybném pořadí"
         dan "Modtog ikke datapakker i korrekt rækkefølge"
         nla "Pakketten in verkeerde volgorde ontvangen"
         eng "Got packets out of order"
@@ -3575,6 +3513,7 @@ ER_NET_PACKETS_OUT_OF_ORDER 08S01
         ger "Pakete nicht in der richtigen Reihenfolge empfangen"
         hun "Helytelen sorrendben erkezett adatcsomagok"
         ita "Ricevuti pacchetti non in ordine"
+        jpn "不正な順序のパケットを受信しました。"
         kor "순서가 맞지않는 패킷을 받았습니다."
         por "Obteve pacotes fora de ordem"
         rum "Packets care nu sint ordonati au fost gasiti"
@@ -3584,7 +3523,7 @@ ER_NET_PACKETS_OUT_OF_ORDER 08S01
         swe "Kommunikationspaketen kom i fel ordning"
         ukr "Отримано пакети у неналежному порядку"
 ER_NET_UNCOMPRESS_ERROR 08S01 
-        cze "Nemohu rozkomprimovat komunika-Bční packet"
+        cze "Nemohu rozkomprimovat komunikační packet"
         dan "Kunne ikke dekomprimere kommunikations-pakke (communication packet)"
         nla "Communicatiepakket kon niet worden gedecomprimeerd"
         eng "Couldn't uncompress communication packet"
@@ -3593,6 +3532,7 @@ ER_NET_UNCOMPRESS_ERROR 08S01
         ger "Kommunikationspaket lässt sich nicht entpacken"
         hun "A kommunikacios adatcsomagok nem tomorithetok ki"
         ita "Impossibile scompattare i pacchetti di comunicazione"
+        jpn "圧縮パケットの展開ができませんでした。"
         kor "통신 패킷의 압축해제를 할 수 없었습니다."
         por "Não conseguiu descomprimir pacote de comunicação"
         rum "Nu s-a putut decompresa pachetul de comunicatie (communication packet)"
@@ -3602,7 +3542,7 @@ ER_NET_UNCOMPRESS_ERROR 08S01
         swe "Kunde inte packa up kommunikationspaketet"
         ukr "Не можу декомпресувати комунікаційний пакет"
 ER_NET_READ_ERROR 08S01 
-        cze "Zji-Bštěna chyba při čtení komunikačního packetu"
+        cze "Zjištěna chyba při čtení komunikačního packetu"
         dan "Fik fejlmeddelelse ved læsning af kommunikations-pakker (communication packets)"
         nla "Fout bij het lezen van communicatiepakketten"
         eng "Got an error reading communication packets"
@@ -3611,6 +3551,7 @@ ER_NET_READ_ERROR 08S01
         ger "Fehler beim Lesen eines Kommunikationspakets"
         hun "HIba a kommunikacios adatcsomagok olvasasa soran"
         ita "Rilevato un errore ricevendo i pacchetti di comunicazione"
+        jpn "パケットの受信でエラーが発生しました。"
         kor "통신 패킷을 읽는 중 오류가 발생하였습니다."
         por "Obteve um erro na leitura de pacotes de comunicação"
         rum "Eroare obtinuta citind pachetele de comunicatie (communication packets)"
@@ -3620,7 +3561,7 @@ ER_NET_READ_ERROR 08S01
         swe "Fick ett fel vid läsning från klienten"
         ukr "Отримано помилку читання комунікаційних пакетів"
 ER_NET_READ_INTERRUPTED 08S01 
-        cze "Zji-Bštěn timeout při čtení komunikačního packetu"
+        cze "Zjištěn timeout při čtení komunikačního packetu"
         dan "Timeout-fejl ved læsning af kommunukations-pakker (communication packets)"
         nla "Timeout bij het lezen van communicatiepakketten"
         eng "Got timeout reading communication packets"
@@ -3629,6 +3570,7 @@ ER_NET_READ_INTERRUPTED 08S01
         ger "Zeitüberschreitung beim Lesen eines Kommunikationspakets"
         hun "Idotullepes a kommunikacios adatcsomagok olvasasa soran"
         ita "Rilevato un timeout ricevendo i pacchetti di comunicazione"
+        jpn "パケットの受信でタイムアウトが発生しました。"
         kor "통신 패킷을 읽는 중 timeout이 발생하였습니다."
         por "Obteve expiração de tempo (timeout) na leitura de pacotes de comunicação"
         rum "Timeout obtinut citind pachetele de comunicatie (communication packets)"
@@ -3638,7 +3580,7 @@ ER_NET_READ_INTERRUPTED 08S01
         swe "Fick 'timeout' vid läsning från klienten"
         ukr "Отримано затримку читання комунікаційних пакетів"
 ER_NET_ERROR_ON_WRITE 08S01 
-        cze "Zji-Bštěna chyba při zápisu komunikačního packetu"
+        cze "Zjištěna chyba při zápisu komunikačního packetu"
         dan "Fik fejlmeddelelse ved skrivning af kommunukations-pakker (communication packets)"
         nla "Fout bij het schrijven van communicatiepakketten"
         eng "Got an error writing communication packets"
@@ -3647,6 +3589,7 @@ ER_NET_ERROR_ON_WRITE 08S01
         ger "Fehler beim Schreiben eines Kommunikationspakets"
         hun "Hiba a kommunikacios csomagok irasa soran"
         ita "Rilevato un errore inviando i pacchetti di comunicazione"
+        jpn "パケットの送信でエラーが発生しました。"
         kor "통신 패킷을 기록하는 중 오류가 발생하였습니다."
         por "Obteve um erro na escrita de pacotes de comunicação"
         rum "Eroare in scrierea pachetelor de comunicatie (communication packets)"
@@ -3656,7 +3599,7 @@ ER_NET_ERROR_ON_WRITE 08S01
         swe "Fick ett fel vid skrivning till klienten"
         ukr "Отримано помилку запису комунікаційних пакетів"
 ER_NET_WRITE_INTERRUPTED 08S01 
-        cze "Zji-Bštěn timeout při zápisu komunikačního packetu"
+        cze "Zjištěn timeout při zápisu komunikačního packetu"
         dan "Timeout-fejl ved skrivning af kommunukations-pakker (communication packets)"
         nla "Timeout bij het schrijven van communicatiepakketten"
         eng "Got timeout writing communication packets"
@@ -3665,6 +3608,7 @@ ER_NET_WRITE_INTERRUPTED 08S01
         ger "Zeitüberschreitung beim Schreiben eines Kommunikationspakets"
         hun "Idotullepes a kommunikacios csomagok irasa soran"
         ita "Rilevato un timeout inviando i pacchetti di comunicazione"
+        jpn "パケットの送信でタイムアウトが発生しました。"
         kor "통신 패팃을 기록하는 중 timeout이 발생하였습니다."
         por "Obteve expiração de tempo ('timeout') na escrita de pacotes de comunicação"
         rum "Timeout obtinut scriind pachetele de comunicatie (communication packets)"
@@ -3674,7 +3618,7 @@ ER_NET_WRITE_INTERRUPTED 08S01
         swe "Fick 'timeout' vid skrivning till klienten"
         ukr "Отримано затримку запису комунікаційних пакетів"
 ER_TOO_LONG_STRING 42000 
-        cze "V-Býsledný řetězec je delší než 'max_allowed_packet'"
+        cze "Výsledný řetězec je delší než 'max_allowed_packet'"
         dan "Strengen med resultater er større end 'max_allowed_packet'"
         nla "Resultaat string is langer dan 'max_allowed_packet'"
         eng "Result string is longer than 'max_allowed_packet' bytes"
@@ -3683,6 +3627,7 @@ ER_TOO_LONG_STRING 42000
         ger "Ergebnis-String ist länger als 'max_allowed_packet' Bytes"
         hun "Ez eredmeny sztring nagyobb, mint a lehetseges maximum: 'max_allowed_packet'"
         ita "La stringa di risposta e` piu` lunga di 'max_allowed_packet'"
+        jpn "結果の文字列が 'max_allowed_packet' よりも大きいです。"
         por "'String' resultante é mais longa do que 'max_allowed_packet'"
         rum "Sirul rezultat este mai lung decit 'max_allowed_packet'"
         rus "Результирующая строка больше, чем 'max_allowed_packet'"
@@ -3691,7 +3636,7 @@ ER_TOO_LONG_STRING 42000
         swe "Resultatsträngen är längre än max_allowed_packet"
         ukr "Строка результату довша ніж max_allowed_packet"
 ER_TABLE_CANT_HANDLE_BLOB 42000 
-        cze "Typ pou-Bžité tabulky nepodporuje BLOB/TEXT sloupce"
+        cze "Typ použité tabulky nepodporuje BLOB/TEXT sloupce"
         dan "Denne tabeltype understøtter ikke brug af BLOB og TEXT kolonner"
         nla "Het gebruikte tabel type ondersteunt geen BLOB/TEXT kolommen"
         eng "The used table type doesn't support BLOB/TEXT columns"
@@ -3700,6 +3645,7 @@ ER_TABLE_CANT_HANDLE_BLOB 42000
         ger "Der verwendete Tabellentyp unterstützt keine BLOB- und TEXT-Felder"
         hun "A hasznalt tabla tipus nem tamogatja a BLOB/TEXT mezoket"
         ita "Il tipo di tabella usata non supporta colonne di tipo BLOB/TEXT"
+        jpn "指定されたストレージエンジンでは、BLOB/TEXT型の列を使用できません。"
         por "Tipo de tabela usado não permite colunas BLOB/TEXT"
         rum "Tipul de tabela folosit nu suporta coloane de tip BLOB/TEXT"
         rus "Используемая таблица не поддерживает типы BLOB/TEXT"
@@ -3708,7 +3654,7 @@ ER_TABLE_CANT_HANDLE_BLOB 42000
         swe "Den använda tabelltypen kan inte hantera BLOB/TEXT-kolumner"
         ukr "Використаний тип таблиці не підтримує BLOB/TEXT стовбці"
 ER_TABLE_CANT_HANDLE_AUTO_INCREMENT 42000 
-        cze "Typ pou-Bžité tabulky nepodporuje AUTO_INCREMENT sloupce"
+        cze "Typ použité tabulky nepodporuje AUTO_INCREMENT sloupce"
         dan "Denne tabeltype understøtter ikke brug af AUTO_INCREMENT kolonner"
         nla "Het gebruikte tabel type ondersteunt geen AUTO_INCREMENT kolommen"
         eng "The used table type doesn't support AUTO_INCREMENT columns"
@@ -3716,6 +3662,7 @@ ER_TABLE_CANT_HANDLE_AUTO_INCREMENT 42000
         fre "Ce type de table ne supporte pas les colonnes AUTO_INCREMENT"
         ger "Der verwendete Tabellentyp unterstützt keine AUTO_INCREMENT-Felder"
         hun "A hasznalt tabla tipus nem tamogatja az AUTO_INCREMENT tipusu mezoket"
+        jpn "指定されたストレージエンジンでは、AUTO_INCREMENT列を使用できません。"
         ita "Il tipo di tabella usata non supporta colonne di tipo AUTO_INCREMENT"
         por "Tipo de tabela usado não permite colunas AUTO_INCREMENT"
         rum "Tipul de tabela folosit nu suporta coloane de tip AUTO_INCREMENT"
@@ -3725,7 +3672,7 @@ ER_TABLE_CANT_HANDLE_AUTO_INCREMENT 42000
         swe "Den använda tabelltypen kan inte hantera AUTO_INCREMENT-kolumner"
         ukr "Використаний тип таблиці не підтримує AUTO_INCREMENT стовбці"
 ER_DELAYED_INSERT_TABLE_LOCKED  
-        cze "INSERT DELAYED nen-Bí možno s tabulkou '%-.192s' použít, protože je zamčená pomocí LOCK TABLES"
+        cze "INSERT DELAYED není možno s tabulkou '%-.192s' použít, protože je zamčená pomocí LOCK TABLES"
         dan "INSERT DELAYED kan ikke bruges med tabellen '%-.192s', fordi tabellen er låst med LOCK TABLES"
         nla "INSERT DELAYED kan niet worden gebruikt bij table '%-.192s', vanwege een 'lock met LOCK TABLES"
         eng "INSERT DELAYED can't be used with table '%-.192s' because it is locked with LOCK TABLES"
@@ -3735,7 +3682,7 @@ ER_DELAYED_INSERT_TABLE_LOCKED
         greek "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
         hun "Az INSERT DELAYED nem hasznalhato a '%-.192s' tablahoz, mert a tabla zarolt (LOCK TABLES)"
         ita "L'inserimento ritardato (INSERT DELAYED) non puo` essere usato con la tabella '%-.192s', perche` soggetta a lock da 'LOCK TABLES'"
-        jpn "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
+        jpn "表 '%-.192s' はLOCK TABLESでロックされているため、INSERT DELAYEDを使用できません。"
         kor "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
         nor "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
         norwegian-ny "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
@@ -3749,7 +3696,7 @@ ER_DELAYED_INSERT_TABLE_LOCKED
         swe "INSERT DELAYED kan inte användas med tabell '%-.192s', emedan den är låst med LOCK TABLES"
         ukr "INSERT DELAYED не може бути використано з таблицею '%-.192s', тому що її заблоковано з LOCK TABLES"
 ER_WRONG_COLUMN_NAME 42000 
-        cze "Nespr-Bávné jméno sloupce '%-.100s'"
+        cze "Nesprávné jméno sloupce '%-.100s'"
         dan "Forkert kolonnenavn '%-.100s'"
         nla "Incorrecte kolom naam '%-.100s'"
         eng "Incorrect column name '%-.100s'"
@@ -3758,6 +3705,7 @@ ER_WRONG_COLUMN_NAME 42000
         ger "Falscher Spaltenname '%-.100s'"
         hun "Ervenytelen mezonev: '%-.100s'"
         ita "Nome colonna '%-.100s' non corretto"
+        jpn "列名 '%-.100s' は不正です。"
         por "Nome de coluna '%-.100s' incorreto"
         rum "Nume increct de coloana '%-.100s'"
         rus "Неверное имя столбца '%-.100s'"
@@ -3766,7 +3714,7 @@ ER_WRONG_COLUMN_NAME 42000
         swe "Felaktigt kolumnnamn '%-.100s'"
         ukr "Невірне ім'я стовбця '%-.100s'"
 ER_WRONG_KEY_COLUMN 42000 
-        cze "Handler pou-Bžité tabulky neumí indexovat sloupce '%-.192s'"
+        cze "Handler použité tabulky neumí indexovat sloupce '%-.192s'"
         dan "Den brugte tabeltype kan ikke indeksere kolonnen '%-.192s'"
         nla "De gebruikte tabel 'handler' kan kolom '%-.192s' niet indexeren"
         eng "The used storage engine can't index column '%-.192s'"
@@ -3776,7 +3724,7 @@ ER_WRONG_KEY_COLUMN 42000
         greek "The used table handler can't index column '%-.192s'"
         hun "A hasznalt tablakezelo nem tudja a '%-.192s' mezot indexelni"
         ita "Il gestore delle tabelle non puo` indicizzare la colonna '%-.192s'"
-        jpn "The used table handler can't index column '%-.192s'"
+        jpn "使用のストレージエンジンは列 '%-.192s' の索引を作成できません。"
         kor "The used table handler can't index column '%-.192s'"
         nor "The used table handler can't index column '%-.192s'"
         norwegian-ny "The used table handler can't index column '%-.192s'"
@@ -3790,7 +3738,7 @@ ER_WRONG_KEY_COLUMN 42000
         swe "Den använda tabelltypen kan inte indexera kolumn '%-.192s'"
         ukr "Використаний вказівник таблиці не може індексувати стовбець '%-.192s'"
 ER_WRONG_MRG_TABLE  
-        cze "V-Bšechny tabulky v MERGE tabulce nejsou definovány stejně"
+        cze "Všechny tabulky v MERGE tabulce nejsou definovány stejně"
         dan "Tabellerne i MERGE er ikke defineret ens"
         nla "Niet alle tabellen in de MERGE tabel hebben identieke gedefinities"
         eng "Unable to open underlying table which is differently defined or of non-MyISAM type or doesn't exist"
@@ -3799,7 +3747,7 @@ ER_WRONG_MRG_TABLE
         ger "Nicht alle Tabellen in der MERGE-Tabelle sind gleich definiert"
         hun "A MERGE tablaban talalhato tablak definicioja nem azonos"
         ita "Non tutte le tabelle nella tabella di MERGE sono definite in maniera identica"
-        jpn "All tables in the MERGE table are not defined identically"
+        jpn "MERGE表の構成表がオープンできません。列定義が異なるか、MyISAM表ではないか、存在しません。"
         kor "All tables in the MERGE table are not defined identically"
         nor "All tables in the MERGE table are not defined identically"
         norwegian-ny "All tables in the MERGE table are not defined identically"
@@ -3813,7 +3761,7 @@ ER_WRONG_MRG_TABLE
         swe "Tabellerna i MERGE-tabellen är inte identiskt definierade"
         ukr "Таблиці у MERGE TABLE мають різну структуру"
 ER_DUP_UNIQUE 23000 
-        cze "Kv-Bůli unique constraintu nemozu zapsat do tabulky '%-.192s'"
+        cze "Kvůli unique constraintu nemozu zapsat do tabulky '%-.192s'"
         dan "Kan ikke skrive til tabellen '%-.192s' fordi det vil bryde CONSTRAINT regler"
         nla "Kan niet opslaan naar table '%-.192s' vanwege 'unique' beperking"
         eng "Can't write, because of unique constraint, to table '%-.192s'"
@@ -3821,6 +3769,7 @@ ER_DUP_UNIQUE 23000
         fre "Écriture impossible à cause d'un index UNIQUE sur la table '%-.192s'"
         ger "Schreiben in Tabelle '%-.192s' nicht möglich wegen einer Eindeutigkeitsbeschränkung (unique constraint)"
         hun "A '%-.192s' nem irhato, az egyedi mezok miatt"
+        jpn "一意性制約違反のため、表 '%-.192s' に書き込めません。"
         ita "Impossibile scrivere nella tabella '%-.192s' per limitazione di unicita`"
         por "Não pode gravar, devido à restrição UNIQUE, na tabela '%-.192s'"
         rum "Nu pot scrie pe hard-drive, din cauza constraintului unic (unique constraint) pentru tabela '%-.192s'"
@@ -3830,7 +3779,7 @@ ER_DUP_UNIQUE 23000
         swe "Kan inte skriva till tabell '%-.192s'; UNIQUE-test"
         ukr "Не можу записати до таблиці '%-.192s', з причини вимог унікальності"
 ER_BLOB_KEY_WITHOUT_LENGTH 42000 
-        cze "BLOB sloupec '%-.192s' je pou-Bžit ve specifikaci klíče bez délky"
+        cze "BLOB sloupec '%-.192s' je použit ve specifikaci klíče bez délky"
         dan "BLOB kolonnen '%-.192s' brugt i nøglespecifikation uden nøglelængde"
         nla "BLOB kolom '%-.192s' gebruikt in zoeksleutel specificatie zonder zoeksleutel lengte"
         eng "BLOB/TEXT column '%-.192s' used in key specification without a key length"
@@ -3840,7 +3789,7 @@ ER_BLOB_KEY_WITHOUT_LENGTH 42000
         greek "BLOB column '%-.192s' used in key specification without a key length"
         hun "BLOB mezo '%-.192s' hasznalt a mezo specifikacioban, a mezohossz megadasa nelkul"
         ita "La colonna '%-.192s' di tipo BLOB e` usata in una chiave senza specificarne la lunghezza"
-        jpn "BLOB column '%-.192s' used in key specification without a key length"
+        jpn "BLOB列 '%-.192s' をキーに使用するには長さ指定が必要です。"
         kor "BLOB column '%-.192s' used in key specification without a key length"
         nor "BLOB column '%-.192s' used in key specification without a key length"
         norwegian-ny "BLOB column '%-.192s' used in key specification without a key length"
@@ -3854,7 +3803,7 @@ ER_BLOB_KEY_WITHOUT_LENGTH 42000
         swe "Du har inte angett någon nyckellängd för BLOB '%-.192s'"
         ukr "Стовбець BLOB '%-.192s' використано у визначенні ключа без вказання довжини ключа"
 ER_PRIMARY_CANT_HAVE_NULL 42000 
-        cze "V-Bšechny části primárního klíče musejí být NOT NULL; pokud potřebujete NULL, použijte UNIQUE"
+        cze "Všechny části primárního klíče musejí být NOT NULL; pokud potřebujete NULL, použijte UNIQUE"
         dan "Alle dele af en PRIMARY KEY skal være NOT NULL;  Hvis du skal bruge NULL i nøglen, brug UNIQUE istedet"
         nla "Alle delen van een PRIMARY KEY moeten NOT NULL zijn; Indien u NULL in een zoeksleutel nodig heeft kunt u UNIQUE gebruiken"
         eng "All parts of a PRIMARY KEY must be NOT NULL; if you need NULL in a key, use UNIQUE instead"
@@ -3863,6 +3812,7 @@ ER_PRIMARY_CANT_HAVE_NULL 42000
         ger "Alle Teile eines PRIMARY KEY müssen als NOT NULL definiert sein. Wenn NULL in einem Schlüssel benötigt wird, muss ein UNIQUE-Schlüssel verwendet werden"
         hun "Az elsodleges kulcs teljes egeszeben csak NOT NULL tipusu lehet; Ha NULL mezot szeretne a kulcskent, hasznalja inkabb a UNIQUE-ot"
         ita "Tutte le parti di una chiave primaria devono essere dichiarate NOT NULL; se necessitano valori NULL nelle chiavi utilizzare UNIQUE"
+        jpn "PRIMARY KEYの列は全てNOT NULLでなければいけません。UNIQUE索引であればNULLを含むことが可能です。"
         por "Todas as partes de uma chave primária devem ser não-nulas. Se você precisou usar um valor nulo (NULL) em uma chave, use a cláusula UNIQUE em seu lugar"
         rum "Toate partile unei chei primare (PRIMARY KEY) trebuie sa fie NOT NULL; Daca aveti nevoie de NULL in vreo cheie, folositi UNIQUE in schimb"
         rus "Все части первичного ключа (PRIMARY KEY) должны быть определены как NOT NULL; Если вам нужна поддержка величин NULL в ключе, воспользуйтесь индексом UNIQUE"
@@ -3871,7 +3821,7 @@ ER_PRIMARY_CANT_HAVE_NULL 42000
         swe "Alla delar av en PRIMARY KEY måste vara NOT NULL;  Om du vill ha en nyckel med NULL, använd UNIQUE istället"
         ukr "Усі частини PRIMARY KEY повинні бути NOT NULL; Якщо ви потребуєте NULL у ключі, скористайтеся UNIQUE"
 ER_TOO_MANY_ROWS 42000 
-        cze "V-Býsledek obsahuje více než jeden řádek"
+        cze "Výsledek obsahuje více než jeden řádek"
         dan "Resultatet bestod af mere end een række"
         nla "Resultaat bevatte meer dan een rij"
         eng "Result consisted of more than one row"
@@ -3880,6 +3830,7 @@ ER_TOO_MANY_ROWS 42000
         ger "Ergebnis besteht aus mehr als einer Zeile"
         hun "Az eredmeny tobb, mint egy sort tartalmaz"
         ita "Il risultato consiste di piu` di una riga"
+        jpn "結果が2行以上です。"
         por "O resultado consistiu em mais do que uma linha"
         rum "Resultatul constista din mai multe linii"
         rus "В результате возвращена более чем одна строка"
@@ -3888,7 +3839,7 @@ ER_TOO_MANY_ROWS 42000
         swe "Resultet bestod av mera än en rad"
         ukr "Результат знаходиться у більше ніж одній строці"
 ER_REQUIRES_PRIMARY_KEY 42000 
-        cze "Tento typ tabulky vy-Bžaduje primární klíč"
+        cze "Tento typ tabulky vyžaduje primární klíč"
         dan "Denne tabeltype kræver en primærnøgle"
         nla "Dit tabel type heeft een primaire zoeksleutel nodig"
         eng "This table type requires a primary key"
@@ -3897,6 +3848,7 @@ ER_REQUIRES_PRIMARY_KEY 42000
         ger "Dieser Tabellentyp benötigt einen Primärschlüssel (PRIMARY KEY)"
         hun "Az adott tablatipushoz elsodleges kulcs hasznalata kotelezo"
         ita "Questo tipo di tabella richiede una chiave primaria"
+        jpn "使用のストレージエンジンでは、PRIMARY KEYが必要です。"
         por "Este tipo de tabela requer uma chave primária"
         rum "Aceast tip de tabela are nevoie de o cheie primara"
         rus "Этот тип таблицы требует определения первичного ключа"
@@ -3905,7 +3857,7 @@ ER_REQUIRES_PRIMARY_KEY 42000
         swe "Denna tabelltyp kräver en PRIMARY KEY"
         ukr "Цей тип таблиці потребує первинного ключа"
 ER_NO_RAID_COMPILED  
-        cze "Tato verze MariaDB nen-Bí zkompilována s podporou RAID"
+        cze "Tato verze MySQL není zkompilována s podporou RAID"
         dan "Denne udgave af MariaDB er ikke oversat med understøttelse af RAID"
         nla "Deze versie van MariaDB is niet gecompileerd met RAID ondersteuning"
         eng "This version of MariaDB is not compiled with RAID support"
@@ -3914,6 +3866,7 @@ ER_NO_RAID_COMPILED
         ger "Diese MariaDB-Version ist nicht mit RAID-Unterstützung kompiliert"
         hun "Ezen leforditott MariaDB verzio nem tartalmaz RAID support-ot"
         ita "Questa versione di MYSQL non e` compilata con il supporto RAID"
+        jpn "このバージョンのMySQLはRAIDサポートを含めてコンパイルされていません。"
         por "Esta versão do MariaDB não foi compilada com suporte a RAID"
         rum "Aceasta versiune de MariaDB, nu a fost compilata cu suport pentru RAID"
         rus "Эта версия MariaDB скомпилирована без поддержки RAID"
@@ -3922,7 +3875,7 @@ ER_NO_RAID_COMPILED
         swe "Denna version av MariaDB är inte kompilerad med RAID"
         ukr "Ця версія MariaDB не зкомпільована з підтримкою RAID"
 ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE  
-        cze "Update tabulky bez WHERE s kl-Bíčem není v módu bezpečných update dovoleno"
+        cze "Update tabulky bez WHERE s klíčem není v módu bezpečných update dovoleno"
         dan "Du bruger sikker opdaterings modus ('safe update mode') og du forsøgte at opdatere en tabel uden en WHERE klausul, der gør brug af et KEY felt"
         nla "U gebruikt 'safe update mode' en u probeerde een tabel te updaten zonder een WHERE met een KEY kolom"
         eng "You are using safe update mode and you tried to update a table without a WHERE that uses a KEY column"
@@ -3931,6 +3884,7 @@ ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE
         ger "MariaDB läuft im sicheren Aktualisierungsmodus (safe update mode). Sie haben versucht, eine Tabelle zu aktualisieren, ohne in der WHERE-Klausel ein KEY-Feld anzugeben"
         hun "On a biztonsagos update modot hasznalja, es        WHERE that uses a KEY column"
         ita "In modalita` 'safe update' si e` cercato di aggiornare una tabella senza clausola WHERE su una chiave"
+        jpn "'safe update mode'で、索引を利用するWHERE句の無い更新処理を実行しようとしました。"
         por "Você está usando modo de atualização seguro e tentou atualizar uma tabela sem uma cláusula WHERE que use uma coluna chave"
         rus "Вы работаете в режиме безопасных обновлений (safe update mode) и попробовали изменить таблицу без использования ключевого столбца в части WHERE"
         serbian "Vi koristite safe update mod servera, a probali ste da promenite podatke bez 'WHERE' komande koja koristi kolonu ključa"
@@ -3938,7 +3892,7 @@ ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE
         swe "Du använder 'säker uppdateringsmod' och försökte uppdatera en tabell utan en WHERE-sats som använder sig av en nyckel"
         ukr "Ви у режимі безпечного оновлення та намагаєтесь оновити таблицю без оператора WHERE, що використовує KEY стовбець"
 ER_KEY_DOES_NOT_EXITS 42000 S1009
-        cze "Kl-Bíč '%-.192s' v tabulce '%-.192s' neexistuje"
+        cze "Klíč '%-.192s' v tabulce '%-.192s' neexistuje"
         dan "Nøglen '%-.192s' eksisterer ikke i tabellen '%-.192s'"
         nla "Zoeksleutel '%-.192s' bestaat niet in tabel '%-.192s'"
         eng "Key '%-.192s' doesn't exist in table '%-.192s'"
@@ -3947,6 +3901,7 @@ ER_KEY_DOES_NOT_EXITS 42000 S1009
         ger "Schlüssel '%-.192s' existiert in der Tabelle '%-.192s' nicht"
         hun "A '%-.192s' kulcs nem letezik a '%-.192s' tablaban"
         ita "La chiave '%-.192s' non esiste nella tabella '%-.192s'"
+        jpn "索引 '%-.192s' は表 '%-.192s' には存在しません。"
         por "Chave '%-.192s' não existe na tabela '%-.192s'"
         rus "Ключ '%-.192s' не существует в таблице '%-.192s'"
         serbian "Ključ '%-.192s' ne postoji u tabeli '%-.192s'"
@@ -3954,7 +3909,7 @@ ER_KEY_DOES_NOT_EXITS 42000 S1009
         swe "Nyckel '%-.192s' finns inte in tabell '%-.192s'"
         ukr "Ключ '%-.192s' не існує в таблиці '%-.192s'"
 ER_CHECK_NO_SUCH_TABLE 42000 
-        cze "Nemohu otev-Břít tabulku"
+        cze "Nemohu otevřít tabulku"
         dan "Kan ikke åbne tabellen"
         nla "Kan tabel niet openen"
         eng "Can't open table"
@@ -3963,6 +3918,7 @@ ER_CHECK_NO_SUCH_TABLE 42000
         ger "Kann Tabelle nicht öffnen"
         hun "Nem tudom megnyitni a tablat"
         ita "Impossibile aprire la tabella"
+        jpn "表をオープンできません。"
         por "Não pode abrir a tabela"
         rus "Невозможно открыть таблицу"
         serbian "Ne mogu da otvorim tabelu"
@@ -3980,7 +3936,7 @@ ER_CHECK_NOT_IMPLEMENTED 42000
         greek "The handler for the table doesn't support %s"
         hun "A tabla kezeloje (handler) nem tamogatja az %s"
         ita "Il gestore per la tabella non supporta il %s"
-        jpn "The handler for the table doesn't support %s"
+        jpn "この表のストレージエンジンは '%s' を利用できません。"
         kor "The handler for the table doesn't support %s"
         nor "The handler for the table doesn't support %s"
         norwegian-ny "The handler for the table doesn't support %s"
@@ -3994,7 +3950,7 @@ ER_CHECK_NOT_IMPLEMENTED 42000
         swe "Tabellhanteraren för denna tabell kan inte göra %s"
         ukr "Вказівник таблиці не підтримуе %s"
 ER_CANT_DO_THIS_DURING_AN_TRANSACTION 25000 
-        cze "Proveden-Bí tohoto příkazu není v transakci dovoleno"
+        cze "Provedení tohoto příkazu není v transakci dovoleno"
         dan "Du må ikke bruge denne kommando i en transaktion"
         nla "Het is u niet toegestaan dit commando uit te voeren binnen een transactie"
         eng "You are not allowed to execute this command in a transaction"
@@ -4003,6 +3959,7 @@ ER_CANT_DO_THIS_DURING_AN_TRANSACTION 25000
         ger "Sie dürfen diesen Befehl nicht in einer Transaktion ausführen"
         hun "Az On szamara nem engedelyezett a parancs vegrehajtasa a tranzakcioban"
         ita "Non puoi eseguire questo comando in una transazione"
+        jpn "このコマンドはトランザクション内で実行できません。"
         por "Não lhe é permitido executar este comando em uma transação"
         rus "Вам не разрешено выполнять эту команду в транзакции"
         serbian "Nije Vam dozvoljeno da izvršite ovu komandu u transakciji"
@@ -4010,7 +3967,7 @@ ER_CANT_DO_THIS_DURING_AN_TRANSACTION 25000
         swe "Du får inte utföra detta kommando i en transaktion"
         ukr "Вам не дозволено виконувати цю команду в транзакції"
 ER_ERROR_DURING_COMMIT  
-        cze "Chyba %M p-Bři COMMIT"
+        cze "Chyba %M při COMMIT"
         dan "Modtog fejl %M mens kommandoen COMMIT blev udført"
         nla "Kreeg fout %M tijdens COMMIT"
         eng "Got error %M during COMMIT"
@@ -4019,6 +3976,7 @@ ER_ERROR_DURING_COMMIT
         ger "Fehler %M beim COMMIT"
         hun "%M hiba a COMMIT vegrehajtasa soran"
         ita "Rilevato l'errore %M durante il COMMIT"
+        jpn "COMMIT中にエラー %M が発生しました。"
         por "Obteve erro %M durante COMMIT"
         rus "Получена ошибка %M в процессе COMMIT"
         serbian "Greška %M za vreme izvršavanja komande 'COMMIT'"
@@ -4026,7 +3984,7 @@ ER_ERROR_DURING_COMMIT
         swe "Fick fel %M vid COMMIT"
         ukr "Отримано помилку %M під час COMMIT"
 ER_ERROR_DURING_ROLLBACK  
-        cze "Chyba %M p-Bři ROLLBACK"
+        cze "Chyba %M při ROLLBACK"
         dan "Modtog fejl %M mens kommandoen ROLLBACK blev udført"
         nla "Kreeg fout %M tijdens ROLLBACK"
         eng "Got error %M during ROLLBACK"
@@ -4035,6 +3993,7 @@ ER_ERROR_DURING_ROLLBACK
         ger "Fehler %M beim ROLLBACK"
         hun "%M hiba a ROLLBACK vegrehajtasa soran"
         ita "Rilevato l'errore %M durante il ROLLBACK"
+        jpn "ROLLBACK中にエラー %M が発生しました。"
         por "Obteve erro %M durante ROLLBACK"
         rus "Получена ошибка %M в процессе ROLLBACK"
         serbian "Greška %M za vreme izvršavanja komande 'ROLLBACK'"
@@ -4042,7 +4001,7 @@ ER_ERROR_DURING_ROLLBACK
         swe "Fick fel %M vid ROLLBACK"
         ukr "Отримано помилку %M під час ROLLBACK"
 ER_ERROR_DURING_FLUSH_LOGS  
-        cze "Chyba %M p-Bři FLUSH_LOGS"
+        cze "Chyba %M při FLUSH_LOGS"
         dan "Modtog fejl %M mens kommandoen FLUSH_LOGS blev udført"
         nla "Kreeg fout %M tijdens FLUSH_LOGS"
         eng "Got error %M during FLUSH_LOGS"
@@ -4051,6 +4010,7 @@ ER_ERROR_DURING_FLUSH_LOGS
         ger "Fehler %M bei FLUSH_LOGS"
         hun "%M hiba a FLUSH_LOGS vegrehajtasa soran"
         ita "Rilevato l'errore %M durante il FLUSH_LOGS"
+        jpn "FLUSH_LOGS中にエラー %M が発生しました。"
         por "Obteve erro %M durante FLUSH_LOGS"
         rus "Получена ошибка %M в процессе FLUSH_LOGS"
         serbian "Greška %M za vreme izvršavanja komande 'FLUSH_LOGS'"
@@ -4058,7 +4018,7 @@ ER_ERROR_DURING_FLUSH_LOGS
         swe "Fick fel %M vid FLUSH_LOGS"
         ukr "Отримано помилку %M під час FLUSH_LOGS"
 ER_ERROR_DURING_CHECKPOINT  
-        cze "Chyba %M p-Bři CHECKPOINT"
+        cze "Chyba %M při CHECKPOINT"
         dan "Modtog fejl %M mens kommandoen CHECKPOINT blev udført"
         nla "Kreeg fout %M tijdens CHECKPOINT"
         eng "Got error %M during CHECKPOINT"
@@ -4067,6 +4027,7 @@ ER_ERROR_DURING_CHECKPOINT
         ger "Fehler %M bei CHECKPOINT"
         hun "%M hiba a CHECKPOINT vegrehajtasa soran"
         ita "Rilevato l'errore %M durante il CHECKPOINT"
+        jpn "CHECKPOINT中にエラー %M が発生しました。"
         por "Obteve erro %M durante CHECKPOINT"
         rus "Получена ошибка %M в процессе CHECKPOINT"
         serbian "Greška %M za vreme izvršavanja komande 'CHECKPOINT'"
@@ -4074,7 +4035,7 @@ ER_ERROR_DURING_CHECKPOINT
         swe "Fick fel %M vid CHECKPOINT"
         ukr "Отримано помилку %M під час CHECKPOINT"
 ER_NEW_ABORTING_CONNECTION 08S01 
-        cze "Spojen-Bí %ld do databáze: '%-.192s' uživatel: '%-.48s' stroj: '%-.64s' (%-.64s) bylo přerušeno"
+        cze "Spojení %ld do databáze: '%-.192s' uživatel: '%-.48s' stroj: '%-.64s' (%-.64s) bylo přerušeno"
         dan "Afbrød forbindelsen %ld til databasen '%-.192s' bruger: '%-.48s' vært: '%-.64s' (%-.64s)"
         nla "Afgebroken verbinding %ld naar db: '%-.192s' gebruiker: '%-.48s' host: '%-.64s' (%-.64s)"
         eng "Aborted connection %ld to db: '%-.192s' user: '%-.48s' host: '%-.64s' (%-.64s)"
@@ -4082,6 +4043,7 @@ ER_NEW_ABORTING_CONNECTION 08S01
         fre "Connection %ld avortée vers la bd: '%-.192s' utilisateur: '%-.48s' hôte: '%-.64s' (%-.64s)"
         ger "Abbruch der Verbindung %ld zur Datenbank '%-.192s'. Benutzer: '%-.48s', Host: '%-.64s' (%-.64s)"
         ita "Interrotta la connessione %ld al db: ''%-.192s' utente: '%-.48s' host: '%-.64s' (%-.64s)"
+        jpn "接続 %ld が中断されました。データベース: '%-.192s' ユーザー: '%-.48s' ホスト: '%-.64s' (%-.64s)"
         por "Conexão %ld abortada para banco de dados '%-.192s' - usuário '%-.48s' - 'host' '%-.64s' ('%-.64s')"
         rus "Прервано соединение %ld к базе данных '%-.192s' пользователя '%-.48s' с хоста '%-.64s' (%-.64s)"
         serbian "Prekinuta konekcija broj %ld ka bazi: '%-.192s' korisnik je bio: '%-.48s' a host: '%-.64s' (%-.64s)"
@@ -4089,14 +4051,14 @@ ER_NEW_ABORTING_CONNECTION 08S01
         swe "Avbröt länken för tråd %ld till db '%-.192s', användare '%-.48s', host '%-.64s' (%-.64s)"
         ukr "Перервано з'єднання %ld до бази данних: '%-.192s' користувач: '%-.48s' хост: '%-.64s' (%-.64s)"
 ER_DUMP_NOT_IMPLEMENTED  
-        cze "Handler tabulky nepodporuje bin-Bární dump"
+        cze "Handler tabulky nepodporuje binární dump"
         dan "Denne tabeltype unserstøtter ikke binært tabeldump"
         nla "De 'handler' voor de tabel ondersteund geen binaire tabel dump"
         eng "The storage engine for the table does not support binary table dump"
         fre "Ce type de table ne supporte pas les copies binaires"
         ger "Die Speicher-Engine für die Tabelle unterstützt keinen binären Tabellen-Dump"
         ita "Il gestore per la tabella non supporta il dump binario"
-        jpn "The handler for the table does not support binary table dump"
+        jpn "この表のストレージエンジンはバイナリ形式の表ダンプを利用できません。"
         por "O manipulador de tabela não suporta 'dump' binário de tabela"
         rum "The handler for the table does not support binary table dump"
         rus "Обработчик этой таблицы не поддерживает двоичного сохранения образа таблицы (dump)"
@@ -4107,12 +4069,13 @@ ER_DUMP_NOT_IMPLEMENTED
 ER_FLUSH_MASTER_BINLOG_CLOSED  
         eng "Binlog closed, cannot RESET MASTER"
         ger "Binlog geschlossen. Kann RESET MASTER nicht ausführen"
+        jpn "バイナリログがクローズされています。RESET MASTER を実行できません。"
         por "Binlog fechado. Não pode fazer RESET MASTER"
         rus "Двоичный журнал обновления закрыт, невозможно выполнить RESET MASTER"
         serbian "Binarni log file zatvoren, ne mogu da izvršim komandu 'RESET MASTER'"
         ukr "Реплікаційний лог закрито, не можу виконати RESET MASTER"
 ER_INDEX_REBUILD  
-        cze "P-Břebudování indexu dumpnuté tabulky '%-.192s' nebylo úspěšné"
+        cze "Přebudování indexu dumpnuté tabulky '%-.192s' nebylo úspěšné"
         dan "Kunne ikke genopbygge indekset for den dumpede tabel '%-.192s'"
         nla "Gefaald tijdens heropbouw index van gedumpte tabel '%-.192s'"
         eng "Failed rebuilding the index of  dumped table '%-.192s'"
@@ -4121,6 +4084,7 @@ ER_INDEX_REBUILD
         greek "Failed rebuilding the index of dumped table '%-.192s'"
         hun "Failed rebuilding the index of dumped table '%-.192s'"
         ita "Fallita la ricostruzione dell'indice della tabella copiata '%-.192s'"
+        jpn "ダンプ表 '%-.192s' の索引再構築に失敗しました。"
         por "Falhou na reconstrução do índice da tabela 'dumped' '%-.192s'"
         rus "Ошибка перестройки индекса сохраненной таблицы '%-.192s'"
         serbian "Izgradnja indeksa dump-ovane tabele '%-.192s' nije uspela"
@@ -4134,20 +4098,22 @@ ER_MASTER
         fre "Erreur reçue du maître: '%-.64s'"
         ger "Fehler vom Master: '%-.64s'"
         ita "Errore dal master: '%-.64s"
+        jpn "マスターでエラーが発生: '%-.64s'"
         por "Erro no 'master' '%-.64s'"
         rus "Ошибка от головного сервера: '%-.64s'"
         serbian "Greška iz glavnog servera '%-.64s' u klasteru"
         spa "Error del master: '%-.64s'"
-        swe "Fick en master: '%-.64s'"
+        swe "Fel från master: '%-.64s'"
         ukr "Помилка від головного: '%-.64s'"
 ER_MASTER_NET_READ 08S01 
-        cze "S-Bíťová chyba při čtení z masteru"
+        cze "Síťová chyba při čtení z masteru"
         dan "Netværksfejl ved læsning fra master"
         nla "Net fout tijdens lezen van master"
         eng "Net error reading from master"
         fre "Erreur de lecture réseau reçue du maître"
         ger "Netzfehler beim Lesen vom Master"
         ita "Errore di rete durante la ricezione dal master"
+        jpn "マスターからのデータ受信中のネットワークエラー"
         por "Erro de rede lendo do 'master'"
         rus "Возникла ошибка чтения в процессе коммуникации с головным сервером"
         serbian "Greška u primanju mrežnih paketa sa glavnog servera u klasteru"
@@ -4155,13 +4121,14 @@ ER_MASTER_NET_READ 08S01
         swe "Fick nätverksfel vid läsning från master"
         ukr "Мережева помилка читання від головного"
 ER_MASTER_NET_WRITE 08S01 
-        cze "S-Bíťová chyba při zápisu na master"
+        cze "Síťová chyba při zápisu na master"
         dan "Netværksfejl ved skrivning til master"
         nla "Net fout tijdens schrijven naar master"
         eng "Net error writing to master"
         fre "Erreur d'écriture réseau reçue du maître"
         ger "Netzfehler beim Schreiben zum Master"
         ita "Errore di rete durante l'invio al master"
+        jpn "マスターへのデータ送信中のネットワークエラー"
         por "Erro de rede gravando no 'master'"
         rus "Возникла ошибка записи в процессе коммуникации с головным сервером"
         serbian "Greška u slanju mrežnih paketa na glavni server u klasteru"
@@ -4169,7 +4136,7 @@ ER_MASTER_NET_WRITE 08S01
         swe "Fick nätverksfel vid skrivning till master"
         ukr "Мережева помилка запису до головного"
 ER_FT_MATCHING_KEY_NOT_FOUND  
-        cze "-BŽádný sloupec nemá vytvořen fulltextový index"
+        cze "Žádný sloupec nemá vytvořen fulltextový index"
         dan "Kan ikke finde en FULLTEXT nøgle som svarer til kolonne listen"
         nla "Kan geen FULLTEXT index vinden passend bij de kolom lijst"
         eng "Can't find FULLTEXT index matching the column list"
@@ -4177,6 +4144,7 @@ ER_FT_MATCHING_KEY_NOT_FOUND
         fre "Impossible de trouver un index FULLTEXT correspondant à cette liste de colonnes"
         ger "Kann keinen FULLTEXT-Index finden, der der Feldliste entspricht"
         ita "Impossibile trovare un indice FULLTEXT che corrisponda all'elenco delle colonne"
+        jpn "列リストに対応する全文索引(FULLTEXT)が見つかりません。"
         por "Não pode encontrar um índice para o texto todo que combine com a lista de colunas"
         rus "Невозможно отыскать полнотекстовый (FULLTEXT) индекс, соответствующий списку столбцов"
         serbian "Ne mogu da pronađem 'FULLTEXT' indeks koli odgovara listi kolona"
@@ -4184,7 +4152,7 @@ ER_FT_MATCHING_KEY_NOT_FOUND
         swe "Hittar inte ett FULLTEXT-index i kolumnlistan"
         ukr "Не можу знайти FULLTEXT індекс, що відповідає переліку стовбців"
 ER_LOCK_OR_ACTIVE_TRANSACTION  
-        cze "Nemohu prov-Bést zadaný příkaz, protože existují aktivní zamčené tabulky nebo aktivní transakce"
+        cze "Nemohu provést zadaný příkaz, protože existují aktivní zamčené tabulky nebo aktivní transakce"
         dan "Kan ikke udføre den givne kommando fordi der findes aktive, låste tabeller eller fordi der udføres en transaktion"
         nla "Kan het gegeven commando niet uitvoeren, want u heeft actieve gelockte tabellen of een actieve transactie"
         eng "Can't execute the given command because you have active locked tables or an active transaction"
@@ -4192,6 +4160,7 @@ ER_LOCK_OR_ACTIVE_TRANSACTION
         fre "Impossible d'exécuter la commande car vous avez des tables verrouillées ou une transaction active"
         ger "Kann den angegebenen Befehl wegen einer aktiven Tabellensperre oder einer aktiven Transaktion nicht ausführen"
         ita "Impossibile eseguire il comando richiesto: tabelle sotto lock o transazione in atto"
+        jpn "すでにアクティブな表ロックやトランザクションがあるため、コマンドを実行できません。"
         por "Não pode executar o comando dado porque você tem tabelas ativas travadas ou uma transação ativa"
         rus "Невозможно выполнить указанную команду, поскольку у вас присутствуют активно заблокированные таблица или открытая транзакция"
         serbian "Ne mogu da izvršim datu komandu zbog toga što su tabele zaključane ili je transakcija u toku"
@@ -4199,7 +4168,7 @@ ER_LOCK_OR_ACTIVE_TRANSACTION
         swe "Kan inte utföra kommandot emedan du har en låst tabell eller an aktiv transaktion"
         ukr "Не можу виконати подану команду тому, що таблиця заблокована або виконується транзакція"
 ER_UNKNOWN_SYSTEM_VARIABLE  
-        cze "Nezn-Bámá systémová proměnná '%-.64s'"
+        cze "Neznámá systémová proměnná '%-.64s'"
         dan "Ukendt systemvariabel '%-.64s'"
         nla "Onbekende systeem variabele '%-.64s'"
         eng "Unknown system variable '%-.64s'"
@@ -4207,6 +4176,7 @@ ER_UNKNOWN_SYSTEM_VARIABLE
         fre "Variable système '%-.64s' inconnue"
         ger "Unbekannte Systemvariable '%-.64s'"
         ita "Variabile di sistema '%-.64s' sconosciuta"
+        jpn "'%-.64s' は不明なシステム変数です。"
         por "Variável de sistema '%-.64s' desconhecida"
         rus "Неизвестная системная переменная '%-.64s'"
         serbian "Nepoznata sistemska promenljiva '%-.64s'"
@@ -4214,7 +4184,7 @@ ER_UNKNOWN_SYSTEM_VARIABLE
         swe "Okänd systemvariabel: '%-.64s'"
         ukr "Невідома системна змінна '%-.64s'"
 ER_CRASHED_ON_USAGE  
-        cze "Tabulka '%-.192s' je ozna-Bčena jako porušená a měla by být opravena"
+        cze "Tabulka '%-.192s' je označena jako porušená a měla by být opravena"
         dan "Tabellen '%-.192s' er markeret med fejl og bør repareres"
         nla "Tabel '%-.192s' staat als gecrashed gemarkeerd en dient te worden gerepareerd"
         eng "Table '%-.192s' is marked as crashed and should be repaired"
@@ -4222,6 +4192,7 @@ ER_CRASHED_ON_USAGE
         fre "La table '%-.192s' est marquée 'crashed' et devrait être réparée"
         ger "Tabelle '%-.192s' ist als defekt markiert und sollte repariert werden"
         ita "La tabella '%-.192s' e` segnalata come corrotta e deve essere riparata"
+        jpn "表 '%-.192s' は壊れています。修復が必要です。"
         por "Tabela '%-.192s' está marcada como danificada e deve ser reparada"
         rus "Таблица '%-.192s' помечена как испорченная и должна пройти проверку и ремонт"
         serbian "Tabela '%-.192s' je markirana kao oštećena i trebala bi biti popravljena"
@@ -4229,7 +4200,7 @@ ER_CRASHED_ON_USAGE
         swe "Tabell '%-.192s' är trasig och bör repareras med REPAIR TABLE"
         ukr "Таблицю '%-.192s' марковано як зіпсовану та її потрібно відновити"
 ER_CRASHED_ON_REPAIR  
-        cze "Tabulka '%-.192s' je ozna-Bčena jako porušená a poslední (automatická?) oprava se nezdařila"
+        cze "Tabulka '%-.192s' je označena jako porušená a poslední (automatická?) oprava se nezdařila"
         dan "Tabellen '%-.192s' er markeret med fejl og sidste (automatiske?) REPAIR fejlede"
         nla "Tabel '%-.192s' staat als gecrashed gemarkeerd en de laatste (automatische?) reparatie poging mislukte"
         eng "Table '%-.192s' is marked as crashed and last (automatic?) repair failed"
@@ -4237,6 +4208,7 @@ ER_CRASHED_ON_REPAIR
         fre "La table '%-.192s' est marquée 'crashed' et le dernier 'repair' a échoué"
         ger "Tabelle '%-.192s' ist als defekt markiert und der letzte (automatische?) Reparaturversuch schlug fehl"
         ita "La tabella '%-.192s' e` segnalata come corrotta e l'ultima ricostruzione (automatica?) e` fallita"
+        jpn "表 '%-.192s' は壊れています。修復(自動？)にも失敗しています。"
         por "Tabela '%-.192s' está marcada como danificada e a última reparação (automática?) falhou"
         rus "Таблица '%-.192s' помечена как испорченная и последний (автоматический?) ремонт не был успешным"
         serbian "Tabela '%-.192s' je markirana kao oštećena, a zadnja (automatska?) popravka je bila neuspela"
@@ -4251,6 +4223,7 @@ ER_WARNING_NOT_COMPLETE_ROLLBACK
         fre "Attention: certaines tables ne supportant pas les transactions ont été changées et elles ne pourront pas être restituées"
         ger "Änderungen an einigen nicht transaktionalen Tabellen konnten nicht zurückgerollt werden"
         ita "Attenzione: Alcune delle modifiche alle tabelle non transazionali non possono essere ripristinate (roll back impossibile)"
+        jpn "トランザクション対応ではない表への変更はロールバックされません。"
         por "Aviso: Algumas tabelas não-transacionais alteradas não puderam ser reconstituídas (rolled back)"
         rus "Внимание: по некоторым измененным нетранзакционным таблицам невозможно будет произвести откат транзакции"
         serbian "Upozorenje: Neke izmenjene tabele ne podržavaju komandu 'ROLLBACK'"
@@ -4265,6 +4238,7 @@ ER_TRANS_CACHE_FULL
         fre "Cette transaction à commandes multiples nécessite plus de 'max_binlog_cache_size' octets de stockage, augmentez cette variable de mysqld et réessayez"
         ger "Transaktionen, die aus mehreren Befehlen bestehen, benötigten mehr als 'max_binlog_cache_size' Bytes an Speicher. Btte vergrössern Sie diese Server-Variable versuchen Sie es noch einmal"
         ita "La transazione a comandi multipli (multi-statement) ha richiesto piu` di 'max_binlog_cache_size' bytes di disco: aumentare questa variabile di mysqld e riprovare"
+        jpn "複数ステートメントから成るトランザクションが 'max_binlog_cache_size' 以上の容量を必要としました。このシステム変数を増加して、再試行してください。"
         por "Transações multi-declaradas (multi-statement transactions) requeriram mais do que o valor limite (max_binlog_cache_size) de bytes para armazenagem. Aumente o valor desta variável do mysqld e tente novamente"
         rus "Транзакции, включающей большое количество команд, потребовалось более чем 'max_binlog_cache_size' байт. Увеличьте эту переменную сервера mysqld и попробуйте еще раз"
         spa "Multipla transición necesita mas que 'max_binlog_cache_size' bytes de almacenamiento. Aumente esta variable mysqld y tente de nuevo"
@@ -4290,6 +4264,7 @@ ER_SLAVE_NOT_RUNNING
         fre "Cette opération nécessite un esclave actif, configurez les esclaves et faites START SLAVE"
         ger "Diese Operation benötigt einen aktiven Slave. Bitte Slave konfigurieren und mittels START SLAVE aktivieren"
         ita "Questa operaione richiede un database 'slave', configurarlo ed eseguire START SLAVE"
+        jpn "この処理は、稼働中のスレーブでなければ実行できません。スレーブの設定をしてSTART SLAVEコマンドを実行してください。"
         por "Esta operação requer um 'slave' em execução. Configure  o 'slave' e execute START SLAVE"
         rus "Для этой операции требуется работающий подчиненный сервер. Сначала выполните START SLAVE"
         serbian "Ova operacija zahteva da je aktivan podređeni server. Konfigurišite prvo podređeni server i onda izvršite komandu 'START SLAVE'"
@@ -4303,6 +4278,7 @@ ER_BAD_SLAVE
         fre "Le server n'est pas configuré comme un esclave, changez le fichier de configuration ou utilisez CHANGE MASTER TO"
         ger "Der Server ist nicht als Slave konfiguriert. Bitte in der Konfigurationsdatei oder mittels CHANGE MASTER TO beheben"
         ita "Il server non e' configurato come 'slave', correggere il file di configurazione cambiando CHANGE MASTER TO"
+        jpn "このサーバーはスレーブとして設定されていません。コンフィグファイルかCHANGE MASTER TOコマンドで設定して下さい。"
         por "O servidor não está configurado como 'slave'. Acerte o arquivo de configuração ou use CHANGE MASTER TO"
         rus "Этот сервер не настроен как подчиненный. Внесите исправления в конфигурационном файле или с помощью CHANGE MASTER TO"
         serbian "Server nije konfigurisan kao podređeni server, ispravite konfiguracioni file ili na njemu izvršite komandu 'CHANGE MASTER TO'"
@@ -4313,15 +4289,17 @@ ER_MASTER_INFO
         eng "Could not initialize master info structure for '%.*s'; more error messages can be found in the MariaDB error log"
         fre "Impossible d'initialiser les structures d'information de maître '%.*s', vous trouverez des messages d'erreur supplémentaires dans le journal des erreurs de MariaDB"
         ger "Konnte Master-Info-Struktur '%.*s' nicht initialisieren. Weitere Fehlermeldungen können im MariaDB-Error-Log eingesehen werden"
+        jpn "'master info '%.*s''構造体の初期化ができませんでした。MariaDBエラーログでエラーメッセージを確認してください。"
         serbian "Nisam mogao da inicijalizujem informacionu strukturu glavnog servera, proverite da li imam privilegije potrebne za pristup file-u 'master.info' '%.*s'"
         swe "Kunde inte initialisera replikationsstrukturerna för '%.*s'. See MariaDB fel fil för mera information"
-ER_SLAVE_THREAD  
+ER_SLAVE_THREAD
         dan "Kunne ikke danne en slave-tråd; check systemressourcerne"
         nla "Kon slave thread niet aanmaken, controleer systeem resources"
         eng "Could not create slave thread; check system resources"
         fre "Impossible de créer une tâche esclave, vérifiez les ressources système"
         ger "Konnte Slave-Thread nicht starten. Bitte System-Ressourcen überprüfen"
         ita "Impossibile creare il thread 'slave', controllare le risorse di sistema"
+        jpn "スレーブスレッドを作成できません。システムリソースを確認してください。"
         por "Não conseguiu criar 'thread' de 'slave'. Verifique os recursos do sistema"
         rus "Невозможно создать поток подчиненного сервера. Проверьте системные ресурсы"
         serbian "Nisam mogao da startujem thread za podređeni server, proverite sistemske resurse"
@@ -4336,6 +4314,7 @@ ER_TOO_MANY_USER_CONNECTIONS 42000
         fre "L'utilisateur %-.64s possède déjà plus de 'max_user_connections' connexions actives"
         ger "Benutzer '%-.64s' hat mehr als 'max_user_connections' aktive Verbindungen"
         ita "L'utente %-.64s ha gia' piu' di 'max_user_connections' connessioni attive"
+        jpn "ユーザー '%-.64s' はすでに 'max_user_connections' 以上のアクティブな接続を行っています。"
         por "Usuário '%-.64s' já possui mais que o valor máximo de conexões (max_user_connections) ativas"
         rus "У пользователя %-.64s уже больше чем 'max_user_connections' активных соединений"
         serbian "Korisnik %-.64s već ima više aktivnih konekcija nego što je to određeno 'max_user_connections' promenljivom"
@@ -4350,6 +4329,7 @@ ER_SET_CONSTANTS_ONLY
         fre "Seules les expressions constantes sont autorisées avec SET"
         ger "Bei diesem Befehl dürfen nur konstante Ausdrücke verwendet werden"
         ita "Si possono usare solo espressioni costanti con SET"
+        jpn "SET処理が失敗しました。"
         por "Você pode usar apenas expressões constantes com SET"
         rus "С этой командой вы можете использовать только константные выражения"
         serbian "Možete upotrebiti samo konstantan iskaz sa komandom 'SET'"
@@ -4364,6 +4344,7 @@ ER_LOCK_WAIT_TIMEOUT
         fre "Timeout sur l'obtention du verrou"
         ger "Beim Warten auf eine Sperre wurde die zulässige Wartezeit überschritten. Bitte versuchen Sie, die Transaktion neu zu starten"
         ita "E' scaduto il timeout per l'attesa del lock"
+        jpn "ロック待ちがタイムアウトしました。トランザクションを再試行してください。"
         por "Tempo de espera (timeout) de travamento excedido. Tente reiniciar a transação."
         rus "Таймаут ожидания блокировки истек; попробуйте перезапустить транзакцию"
         serbian "Vremenski limit za zaključavanje tabele je istekao; Probajte da ponovo startujete transakciju"
@@ -4378,6 +4359,7 @@ ER_LOCK_TABLE_FULL
         fre "Le nombre total de verrou dépasse la taille de la table des verrous"
         ger "Die Gesamtzahl der Sperren überschreitet die Größe der Sperrtabelle"
         ita "Il numero totale di lock e' maggiore della grandezza della tabella di lock"
+        jpn "ロックの数が多すぎます。"
         por "O número total de travamentos excede o tamanho da tabela de travamentos"
         rus "Общее количество блокировок превысило размеры таблицы блокировок"
         serbian "Broj totalnih zaključavanja tabele premašuje veličinu tabele zaključavanja"
@@ -4392,6 +4374,7 @@ ER_READ_ONLY_TRANSACTION 25000
         fre "Un verrou en update ne peut être acquit pendant une transaction READ UNCOMMITTED"
         ger "Während einer READ-UNCOMMITTED-Transaktion können keine UPDATE-Sperren angefordert werden"
         ita "I lock di aggiornamento non possono essere acquisiti durante una transazione 'READ UNCOMMITTED'"
+        jpn "読み込み専用トランザクションです。"
         por "Travamentos de atualização não podem ser obtidos durante uma transação de tipo READ UNCOMMITTED"
         rus "Блокировки обновлений нельзя получить в процессе чтения не принятой (в режиме READ UNCOMMITTED) транзакции"
         serbian "Zaključavanja izmena ne mogu biti realizovana sve dok traje 'READ UNCOMMITTED' transakcija"
@@ -4406,6 +4389,7 @@ ER_DROP_DB_WITH_READ_LOCK
         fre "DROP DATABASE n'est pas autorisée pendant qu'une tâche possède un verrou global en lecture"
         ger "DROP DATABASE ist nicht erlaubt, solange der Thread eine globale Lesesperre hält"
         ita "DROP DATABASE non e' permesso mentre il thread ha un lock globale di lettura"
+        jpn "グローバルリードロックを保持している間は、DROP DATABASE を実行できません。"
         por "DROP DATABASE não permitido enquanto uma 'thread' está mantendo um travamento global de leitura"
         rus "Не допускается DROP DATABASE, пока поток держит глобальную блокировку чтения"
         serbian "Komanda 'DROP DATABASE' nije dozvoljena dok thread globalno zaključava čitanje podataka"
@@ -4420,6 +4404,7 @@ ER_CREATE_DB_WITH_READ_LOCK
         fre "CREATE DATABASE n'est pas autorisée pendant qu'une tâche possède un verrou global en lecture"
         ger "CREATE DATABASE ist nicht erlaubt, solange der Thread eine globale Lesesperre hält"
         ita "CREATE DATABASE non e' permesso mentre il thread ha un lock globale di lettura"
+        jpn "グローバルリードロックを保持している間は、CREATE DATABASE を実行できません。"
         por "CREATE DATABASE não permitido enquanto uma 'thread' está mantendo um travamento global de leitura"
         rus "Не допускается CREATE DATABASE, пока поток держит глобальную блокировку чтения"
         serbian "Komanda 'CREATE DATABASE' nije dozvoljena dok thread globalno zaključava čitanje podataka"
@@ -4433,6 +4418,7 @@ ER_WRONG_ARGUMENTS
         fre "Mauvais arguments à %s"
         ger "Falsche Argumente für %s"
         ita "Argomenti errati a %s"
+        jpn "%s の引数が不正です"
         por "Argumentos errados para %s"
         rus "Неверные параметры для %s"
         serbian "Pogrešni argumenti prosleđeni na %s"
@@ -4446,6 +4432,7 @@ ER_NO_PERMISSION_TO_CREATE_USER 42000
         fre "'%-.48s'@'%-.64s' n'est pas autorisé à créer de nouveaux utilisateurs"
         ger "'%-.48s'@'%-.64s' ist nicht berechtigt, neue Benutzer hinzuzufügen"
         ita "A '%-.48s'@'%-.64s' non e' permesso creare nuovi utenti"
+        jpn "'%-.48s'@'%-.64s' は新しいユーザーを作成できません。"
         por "Não é permitido a '%-.48s'@'%-.64s' criar novos usuários"
         rus "'%-.48s'@'%-.64s' не разрешается создавать новых пользователей"
         serbian "Korisniku '%-.48s'@'%-.64s' nije dozvoljeno da kreira nove korisnike"
@@ -4459,6 +4446,7 @@ ER_UNION_TABLES_IN_DIFFERENT_DIR
         fre "Définition de table incorrecte; toutes les tables MERGE doivent être dans la même base de donnée"
         ger "Falsche Tabellendefinition. Alle MERGE-Tabellen müssen sich in derselben Datenbank befinden"
         ita "Definizione della tabella errata; tutte le tabelle di tipo MERGE devono essere nello stesso database"
+        jpn "不正な表定義です。MERGE表の構成表はすべて同じデータベース内になければなりません。"
         por "Definição incorreta da tabela. Todas as tabelas contidas na junção devem estar no mesmo banco de dados."
         rus "Неверное определение таблицы; Все таблицы в MERGE должны принадлежать одной и той же базе данных"
         serbian "Pogrešna definicija tabele; sve 'MERGE' tabele moraju biti u istoj bazi podataka"
@@ -4471,6 +4459,7 @@ ER_LOCK_DEADLOCK 40001
         fre "Deadlock découvert en essayant d'obtenir les verrous : essayez de redémarrer la transaction"
         ger "Beim Versuch, eine Sperre anzufordern, ist ein Deadlock aufgetreten. Versuchen Sie, die Transaktion neu zu starten"
         ita "Trovato deadlock durante il lock; Provare a far ripartire la transazione"
+        jpn "ロック取得中にデッドロックが検出されました。トランザクションを再試行してください。"
         por "Encontrado um travamento fatal (deadlock) quando tentava obter uma trava. Tente reiniciar a transação."
         rus "Возникла тупиковая ситуация в процессе получения блокировки; Попробуйте перезапустить транзакцию"
         serbian "Unakrsno zaključavanje pronađeno kada sam pokušao da dobijem pravo na zaključavanje; Probajte da restartujete transakciju"
@@ -4483,6 +4472,7 @@ ER_TABLE_CANT_HANDLE_FT
         fre "Le type de table utilisé ne supporte pas les index FULLTEXT"
         ger "Der verwendete Tabellentyp unterstützt keine FULLTEXT-Indizes"
         ita "La tabella usata non supporta gli indici FULLTEXT"
+        jpn "使用の表は全文索引を利用できません。"
         por "O tipo de tabela utilizado não suporta índices de texto completo (fulltext indexes)"
         rus "Используемый тип таблиц не поддерживает полнотекстовых индексов"
         serbian "Upotrebljeni tip tabele ne podržava 'FULLTEXT' indekse"
@@ -4495,6 +4485,7 @@ ER_CANNOT_ADD_FOREIGN
         fre "Impossible d'ajouter des contraintes d'index externe"
         ger "Fremdschlüssel-Beschränkung kann nicht hinzugefügt werden"
         ita "Impossibile aggiungere il vincolo di integrita' referenziale (foreign key constraint)"
+        jpn "外部キー制約を追加できません。"
         por "Não pode acrescentar uma restrição de chave estrangeira"
         rus "Невозможно добавить ограничения внешнего ключа"
         serbian "Ne mogu da dodam proveru spoljnog ključa"
@@ -4508,6 +4499,7 @@ ER_NO_REFERENCED_ROW 23000
         greek "Cannot add a child row: a foreign key constraint fails"
         hun "Cannot add a child row: a foreign key constraint fails"
         ita "Impossibile aggiungere la riga: un vincolo d'integrita' referenziale non e' soddisfatto"
+        jpn "親キーがありません。外部キー制約違反です。"
         norwegian-ny "Cannot add a child row: a foreign key constraint fails"
         por "Não pode acrescentar uma linha filha: uma restrição de chave estrangeira falhou"
         rus "Невозможно добавить или обновить дочернюю строку: проверка ограничений внешнего ключа не выполняется"
@@ -4520,6 +4512,7 @@ ER_ROW_IS_REFERENCED 23000
         greek "Cannot delete a parent row: a foreign key constraint fails"
         hun "Cannot delete a parent row: a foreign key constraint fails"
         ita "Impossibile cancellare la riga: un vincolo d'integrita' referenziale non e' soddisfatto"
+        jpn "子レコードがあります。外部キー制約違反です。"
         por "Não pode apagar uma linha pai: uma restrição de chave estrangeira falhou"
         rus "Невозможно удалить или обновить родительскую строку: проверка ограничений внешнего ключа не выполняется"
         serbian "Ne mogu da izbrišem roditeljski slog: provera spoljnog ključa je neuspela"
@@ -4530,6 +4523,7 @@ ER_CONNECT_TO_MASTER 08S01
         eng "Error connecting to master: %-.128s"
         ger "Fehler bei der Verbindung zum Master: %-.128s"
         ita "Errore durante la connessione al master: %-.128s"
+        jpn "マスターへの接続エラー: %-.128s"
         por "Erro conectando com o master: %-.128s"
         rus "Ошибка соединения с головным сервером: %-.128s"
         spa "Error de coneccion a master: %-.128s"
@@ -4539,6 +4533,7 @@ ER_QUERY_ON_MASTER
         eng "Error running query on master: %-.128s"
         ger "Beim Ausführen einer Abfrage auf dem Master trat ein Fehler auf: %-.128s"
         ita "Errore eseguendo una query sul master: %-.128s"
+        jpn "マスターでのクエリ実行エラー: %-.128s"
         por "Erro rodando consulta no master: %-.128s"
         rus "Ошибка выполнения запроса на головном сервере: %-.128s"
         spa "Error executando el query en master: %-.128s"
@@ -4549,6 +4544,7 @@ ER_ERROR_WHEN_EXECUTING_COMMAND
         est "Viga käsu %s täitmisel: %-.128s"
         ger "Fehler beim Ausführen des Befehls %s: %-.128s"
         ita "Errore durante l'esecuzione del comando %s: %-.128s"
+        jpn "%s コマンドの実行エラー: %-.128s"
         por "Erro quando executando comando %s: %-.128s"
         rus "Ошибка при выполнении команды %s: %-.128s"
         serbian "Greška pri izvršavanju komande %s: %-.128s"
@@ -4560,6 +4556,7 @@ ER_WRONG_USAGE
         est "Vigane %s ja %s kasutus"
         ger "Falsche Verwendung von %s und %s"
         ita "Uso errato di %s e %s"
+        jpn "%s の %s に関する不正な使用法です。"
         por "Uso errado de %s e %s"
         rus "Неверное использование %s и %s"
         serbian "Pogrešna upotreba %s i %s"
@@ -4572,6 +4569,7 @@ ER_WRONG_NUMBER_OF_COLUMNS_IN_SELECT 21000
         est "Tulpade arv kasutatud SELECT lausetes ei kattu"
         ger "Die verwendeten SELECT-Befehle liefern unterschiedliche Anzahlen von Feldern zurück"
         ita "La SELECT utilizzata ha un numero di colonne differente"
+        jpn "使用のSELECT文が返す列数が違います。"
         por "Os comandos SELECT usados têm diferente número de colunas"
         rus "Использованные операторы выборки (SELECT) дают разное количество столбцов"
         serbian "Upotrebljene 'SELECT' komande adresiraju različit broj kolona"
@@ -4583,6 +4581,7 @@ ER_CANT_UPDATE_WITH_READLOCK
         est "Ei suuda täita päringut konfliktse luku tõttu"
         ger "Augrund eines READ-LOCK-Konflikts kann die Abfrage nicht ausgeführt werden"
         ita "Impossibile eseguire la query perche' c'e' un conflitto con in lock di lettura"
+        jpn "競合するリードロックを保持しているので、クエリを実行できません。"
         por "Não posso executar a consulta porque você tem um conflito de travamento de leitura"
         rus "Невозможно исполнить запрос, поскольку у вас установлены конфликтующие блокировки чтения"
         serbian "Ne mogu da izvršim upit zbog toga što imate zaključavanja čitanja podataka u konfliktu"
@@ -4594,6 +4593,7 @@ ER_MIXING_NOT_ALLOWED
         est "Transaktsioone toetavate ning mittetoetavate tabelite kooskasutamine ei ole lubatud"
         ger "Die gleichzeitige Verwendung von Tabellen mit und ohne Transaktionsunterstützung ist deaktiviert"
         ita "E' disabilitata la possibilita' di mischiare tabelle transazionali e non-transazionali"
+        jpn "トランザクション対応の表と非対応の表の同時使用は無効化されています。"
         por "Mistura de tabelas transacional e não-transacional está desabilitada"
         rus "Использование транзакционных таблиц наряду с нетранзакционными запрещено"
         serbian "Mešanje tabela koje podržavaju transakcije i onih koje ne podržavaju transakcije je isključeno"
@@ -4605,6 +4605,7 @@ ER_DUP_ARGUMENT
         est "Määrangut '%s' on lauses kasutatud topelt"
         ger "Option '%s' wird im Befehl zweimal verwendet"
         ita "L'opzione '%s' e' stata usata due volte nel comando"
+        jpn "オプション '%s' が2度使用されています。"
         por "Opção '%s' usada duas vezes no comando"
         rus "Опция '%s' дважды использована в выражении"
         spa "Opción '%s' usada dos veces en el comando"
@@ -4614,6 +4615,7 @@ ER_USER_LIMIT_REACHED 42000
         eng "User '%-.64s' has exceeded the '%s' resource (current value: %ld)"
         ger "Benutzer '%-.64s' hat die Ressourcenbeschränkung '%s' überschritten (aktueller Wert: %ld)"
         ita "L'utente '%-.64s' ha ecceduto la risorsa '%s' (valore corrente: %ld)"
+        jpn "ユーザー '%-.64s' はリソースの上限 '%s' に達しました。(現在値: %ld)"
         por "Usuário '%-.64s' tem excedido o '%s' recurso (atual valor: %ld)"
         rus "Пользователь '%-.64s' превысил использование ресурса '%s' (текущее значение: %ld)"
         spa "Usuario '%-.64s' ha excedido el recurso '%s' (actual valor: %ld)"
@@ -4623,6 +4625,7 @@ ER_SPECIFIC_ACCESS_DENIED_ERROR 42000
         eng "Access denied; you need (at least one of) the %-.128s privilege(s) for this operation"
         ger "Kein Zugriff. Hierfür wird die Berechtigung %-.128s benötigt"
         ita "Accesso non consentito. Serve il privilegio %-.128s per questa operazione"
+        jpn "アクセスは拒否されました。この操作には %-.128s 権限が(複数の場合はどれか1つ)必要です。"
         por "Acesso negado. Você precisa o privilégio %-.128s para essa operação"
         rus "В доступе отказано. Вам нужны привилегии %-.128s для этой операции"
         spa "Acceso negado. Usted necesita el privilegio %-.128s para esta operación"
@@ -4633,6 +4636,7 @@ ER_LOCAL_VARIABLE
         eng "Variable '%-.64s' is a SESSION variable and can't be used with SET GLOBAL"
         ger "Variable '%-.64s' ist eine lokale Variable und kann nicht mit SET GLOBAL verändert werden"
         ita "La variabile '%-.64s' e' una variabile locale ( SESSION ) e non puo' essere cambiata usando SET GLOBAL"
+        jpn "変数 '%-.64s' はセッション変数です。SET GLOBALでは使用できません。"
         por "Variável '%-.64s' é uma SESSION variável e não pode ser usada com SET GLOBAL"
         rus "Переменная '%-.64s' является потоковой (SESSION) переменной и не может быть изменена с помощью SET GLOBAL"
         spa "Variable '%-.64s' es una SESSION variable y no puede ser usada con SET GLOBAL"
@@ -4642,6 +4646,7 @@ ER_GLOBAL_VARIABLE
         eng "Variable '%-.64s' is a GLOBAL variable and should be set with SET GLOBAL"
         ger "Variable '%-.64s' ist eine globale Variable und muss mit SET GLOBAL verändert werden"
         ita "La variabile '%-.64s' e' una variabile globale ( GLOBAL ) e deve essere cambiata usando SET GLOBAL"
+        jpn "変数 '%-.64s' はグローバル変数です。SET GLOBALを使用してください。"
         por "Variável '%-.64s' é uma GLOBAL variável e deve ser configurada com SET GLOBAL"
         rus "Переменная '%-.64s' является глобальной (GLOBAL) переменной, и ее следует изменять с помощью SET GLOBAL"
         spa "Variable '%-.64s' es una GLOBAL variable y no puede ser configurada con SET GLOBAL"
@@ -4651,6 +4656,7 @@ ER_NO_DEFAULT 42000
         eng "Variable '%-.64s' doesn't have a default value"
         ger "Variable '%-.64s' hat keinen Vorgabewert"
         ita "La variabile '%-.64s' non ha un valore di default"
+        jpn "変数 '%-.64s' にはデフォルト値がありません。"
         por "Variável '%-.64s' não tem um valor padrão"
         rus "Переменная '%-.64s' не имеет значения по умолчанию"
         spa "Variable '%-.64s' no tiene un valor patrón"
@@ -4660,6 +4666,7 @@ ER_WRONG_VALUE_FOR_VAR 42000
         eng "Variable '%-.64s' can't be set to the value of '%-.200s'"
         ger "Variable '%-.64s' kann nicht auf '%-.200s' gesetzt werden"
         ita "Alla variabile '%-.64s' non puo' essere assegato il valore '%-.200s'"
+        jpn "変数 '%-.64s' に値 '%-.200s' を設定できません。"
         por "Variável '%-.64s' não pode ser configurada para o valor de '%-.200s'"
         rus "Переменная '%-.64s' не может быть установлена в значение '%-.200s'"
         spa "Variable '%-.64s' no puede ser configurada para el valor de '%-.200s'"
@@ -4669,6 +4676,7 @@ ER_WRONG_TYPE_FOR_VAR 42000
         eng "Incorrect argument type to variable '%-.64s'"
         ger "Falscher Argumenttyp für Variable '%-.64s'"
         ita "Tipo di valore errato per la variabile '%-.64s'"
+        jpn "変数 '%-.64s' への値の型が不正です。"
         por "Tipo errado de argumento para variável '%-.64s'"
         rus "Неверный тип аргумента для переменной '%-.64s'"
         spa "Tipo de argumento equivocado para variable '%-.64s'"
@@ -4678,6 +4686,7 @@ ER_VAR_CANT_BE_READ
         eng "Variable '%-.64s' can only be set, not read"
         ger "Variable '%-.64s' kann nur verändert, nicht gelesen werden"
         ita "Alla variabile '%-.64s' e' di sola scrittura quindi puo' essere solo assegnato un valore, non letto"
+        jpn "変数 '%-.64s' は書き込み専用です。読み込みはできません。"
         por "Variável '%-.64s' somente pode ser configurada, não lida"
         rus "Переменная '%-.64s' может быть только установлена, но не считана"
         spa "Variable '%-.64s' solamente puede ser configurada, no leída"
@@ -4687,6 +4696,7 @@ ER_CANT_USE_OPTION_HERE 42000
         eng "Incorrect usage/placement of '%s'"
         ger "Falsche Verwendung oder Platzierung von '%s'"
         ita "Uso/posizione di '%s' sbagliato"
+        jpn "'%s' の使用法または場所が不正です。"
         por "Errado uso/colocação de '%s'"
         rus "Неверное использование или в неверном месте указан '%s'"
         spa "Equivocado uso/colocación de '%s'"
@@ -4696,6 +4706,7 @@ ER_NOT_SUPPORTED_YET 42000
         eng "This version of MariaDB doesn't yet support '%s'"
         ger "Diese MariaDB-Version unterstützt '%s' nicht"
         ita "Questa versione di MariaDB non supporta ancora '%s'"
+        jpn "このバージョンのMariaDBでは、まだ '%s' を利用できません。"
         por "Esta versão de MariaDB não suporta ainda '%s'"
         rus "Эта версия MariaDB пока еще не поддерживает '%s'"
         spa "Esta versión de MariaDB no soporta todavia '%s'"
@@ -4705,6 +4716,7 @@ ER_MASTER_FATAL_ERROR_READING_BINLOG
         eng "Got fatal error %d from master when reading data from binary log: '%-.320s'"
         ger "Schwerer Fehler %d: '%-.320s vom Master beim Lesen des binären Logs"
         ita "Errore fatale %d: '%-.320s' dal master leggendo i dati dal log binario"
+        jpn "致命的なエラー %d: '%-.320s' がマスターでバイナリログ読み込み中に発生しました。"
         por "Obteve fatal erro %d: '%-.320s' do master quando lendo dados do binary log"
         rus "Получена неисправимая ошибка %d: '%-.320s' от головного сервера в процессе выборки данных из двоичного журнала"
         spa "Recibió fatal error %d: '%-.320s' del master cuando leyendo datos del binary log"
@@ -4712,6 +4724,7 @@ ER_MASTER_FATAL_ERROR_READING_BINLOG
 ER_SLAVE_IGNORED_TABLE  
         eng "Slave SQL thread ignored the query because of replicate-*-table rules"
         ger "Slave-SQL-Thread hat die Abfrage aufgrund von replicate-*-table-Regeln ignoriert"
+        jpn "replicate-*-table ルールに従って、スレーブSQLスレッドはクエリを無視しました。"
         nla "Slave SQL thread negeerde de query vanwege replicate-*-table opties"
         por "Slave SQL thread ignorado a consulta devido às normas de replicação-*-tabela"
         spa "Slave SQL thread ignorado el query debido a las reglas de replicación-*-tabla"
@@ -4720,12 +4733,14 @@ ER_INCORRECT_GLOBAL_LOCAL_VAR
         eng "Variable '%-.192s' is a %s variable"
         serbian "Promenljiva '%-.192s' je %s promenljiva"
         ger "Variable '%-.192s' ist eine %s-Variable"
+        jpn "変数 '%-.192s' は %s 変数です。"
         nla "Variabele '%-.192s' is geen %s variabele"
         spa "Variable '%-.192s' es una %s variable"
         swe "Variabel '%-.192s' är av typ %s"
 ER_WRONG_FK_DEF 42000 
         eng "Incorrect foreign key definition for '%-.192s': %s"
         ger "Falsche Fremdschlüssel-Definition für '%-.192s': %s"
+        jpn "外部キー '%-.192s' の定義の不正: %s"
         nla "Incorrecte foreign key definitie voor '%-.192s': %s"
         por "Definição errada da chave estrangeira para '%-.192s': %s"
         spa "Equivocada definición de llave extranjera para '%-.192s': %s"
@@ -4733,6 +4748,7 @@ ER_WRONG_FK_DEF 42000
 ER_KEY_REF_DO_NOT_MATCH_TABLE_REF  
         eng "Key reference and table reference don't match"
         ger "Schlüssel- und Tabellenverweis passen nicht zusammen"
+        jpn "外部キーの参照表と定義が一致しません。"
         nla "Sleutel- en tabelreferentie komen niet overeen"
         por "Referência da chave e referência da tabela não coincidem"
         spa "Referencia de llave y referencia de tabla no coinciden"
@@ -4740,6 +4756,7 @@ ER_KEY_REF_DO_NOT_MATCH_TABLE_REF
 ER_OPERAND_COLUMNS 21000 
         eng "Operand should contain %d column(s)"
         ger "Operand sollte %d Spalte(n) enthalten"
+        jpn "オペランドに %d 個の列が必要です。"
         nla "Operand behoort %d kolommen te bevatten"
         rus "Операнд должен содержать %d колонок"
         spa "Operando debe tener %d columna(s)"
@@ -4747,6 +4764,7 @@ ER_OPERAND_COLUMNS 21000
 ER_SUBQUERY_NO_1_ROW 21000 
         eng "Subquery returns more than 1 row"
         ger "Unterabfrage lieferte mehr als einen Datensatz zurück"
+        jpn "サブクエリが2行以上の結果を返します。"
         nla "Subquery retourneert meer dan 1 rij"
         por "Subconsulta retorna mais que 1 registro"
         rus "Подзапрос возвращает более одной записи"
@@ -4757,6 +4775,7 @@ ER_UNKNOWN_STMT_HANDLER
         dan "Unknown prepared statement handler (%.*s) given to %s"
         eng "Unknown prepared statement handler (%.*s) given to %s"
         ger "Unbekannter Prepared-Statement-Handler (%.*s) für %s angegeben"
+        jpn "'%.*s' はプリペアードステートメントの不明なハンドルです。(%s で指定されました)"
         nla "Onebekende prepared statement handler (%.*s) voor %s aangegeven"
         por "Desconhecido manipulador de declaração preparado (%.*s) determinado para %s"
         spa "Desconocido preparado comando handler (%.*s) dado para %s"
@@ -4765,6 +4784,7 @@ ER_UNKNOWN_STMT_HANDLER
 ER_CORRUPT_HELP_DB  
         eng "Help database is corrupt or does not exist"
         ger "Die Hilfe-Datenbank ist beschädigt oder existiert nicht"
+        jpn "ヘルプデータベースは壊れているか存在しません。"
         nla "Help database is beschadigd of bestaat niet"
         por "Banco de dado de ajuda corrupto ou não existente"
         spa "Base de datos Help está corrupto o no existe"
@@ -4772,6 +4792,7 @@ ER_CORRUPT_HELP_DB
 ER_CYCLIC_REFERENCE  
         eng "Cyclic reference on subqueries"
         ger "Zyklischer Verweis in Unterabfragen"
+        jpn "サブクエリの参照がループしています。"
         nla "Cyclische verwijzing in subqueries"
         por "Referência cíclica em subconsultas"
         rus "Циклическая ссылка на подзапрос"
@@ -4781,6 +4802,7 @@ ER_CYCLIC_REFERENCE
 ER_AUTO_CONVERT  
         eng "Converting column '%s' from %s to %s"
         ger "Feld '%s' wird von %s nach %s umgewandelt"
+        jpn "列 '%s' を %s から %s へ変換します。"
         nla "Veld '%s' wordt van %s naar %s geconverteerd"
         por "Convertendo coluna '%s' de %s para %s"
         rus "Преобразование поля '%s' из %s в %s"
@@ -4790,6 +4812,7 @@ ER_AUTO_CONVERT
 ER_ILLEGAL_REFERENCE 42S22 
         eng "Reference '%-.64s' not supported (%s)"
         ger "Verweis '%-.64s' wird nicht unterstützt (%s)"
+        jpn "'%-.64s' の参照はできません。(%s)"
         nla "Verwijzing '%-.64s' niet ondersteund (%s)"
         por "Referência '%-.64s' não suportada (%s)"
         rus "Ссылка '%-.64s' не поддерживается (%s)"
@@ -4799,6 +4822,7 @@ ER_ILLEGAL_REFERENCE 42S22
 ER_DERIVED_MUST_HAVE_ALIAS 42000 
         eng "Every derived table must have its own alias"
         ger "Für jede abgeleitete Tabelle muss ein eigener Alias angegeben werden"
+        jpn "導出表には別名が必須です。"
         nla "Voor elke afgeleide tabel moet een unieke alias worden gebruikt"
         por "Cada tabela derivada deve ter seu próprio alias"
         spa "Cada tabla derivada debe tener su propio alias"
@@ -4806,6 +4830,7 @@ ER_DERIVED_MUST_HAVE_ALIAS 42000
 ER_SELECT_REDUCED 01000 
         eng "Select %u was reduced during optimization"
         ger "Select %u wurde während der Optimierung reduziert"
+        jpn "Select %u は最適化によって減らされました。"
         nla "Select %u werd geredureerd tijdens optimtalisatie"
         por "Select %u foi reduzido durante otimização"
         rus "Select %u был упразднен в процессе оптимизации"
@@ -4815,6 +4840,7 @@ ER_SELECT_REDUCED 01000
 ER_TABLENAME_NOT_ALLOWED_HERE 42000 
         eng "Table '%-.192s' from one of the SELECTs cannot be used in %-.32s"
         ger "Tabelle '%-.192s', die in einem der SELECT-Befehle verwendet wurde, kann nicht in %-.32s verwendet werden"
+        jpn "特定のSELECTのみで使用の表 '%-.192s' は %-.32s では使用できません。"
         nla "Tabel '%-.192s' uit een van de SELECTS kan niet in %-.32s gebruikt worden"
         por "Tabela '%-.192s' de um dos SELECTs não pode ser usada em %-.32s"
         spa "Tabla '%-.192s' de uno de los SELECT no puede ser usada en %-.32s"
@@ -4822,6 +4848,7 @@ ER_TABLENAME_NOT_ALLOWED_HERE 42000
 ER_NOT_SUPPORTED_AUTH_MODE 08004 
         eng "Client does not support authentication protocol requested by server; consider upgrading MariaDB client"
         ger "Client unterstützt das vom Server erwartete Authentifizierungsprotokoll nicht. Bitte aktualisieren Sie Ihren MariaDB-Client"
+        jpn "クライアントはサーバーが要求する認証プロトコルに対応できません。MariaDBクライアントのアップグレードを検討してください。"
         nla "Client ondersteunt het door de server verwachtte authenticatieprotocol niet. Overweeg een nieuwere MariaDB client te gebruiken"
         por "Cliente não suporta o protocolo de autenticação exigido pelo servidor; considere a atualização do cliente MariaDB"
         spa "Cliente no soporta protocolo de autenticación solicitado por el servidor; considere actualizar el cliente MariaDB"
@@ -4829,6 +4856,7 @@ ER_NOT_SUPPORTED_AUTH_MODE 08004
 ER_SPATIAL_CANT_HAVE_NULL 42000 
         eng "All parts of a SPATIAL index must be NOT NULL"
         ger "Alle Teile eines SPATIAL-Index müssen als NOT NULL deklariert sein"
+        jpn "空間索引のキー列は NOT NULL でなければいけません。"
         nla "Alle delete van een  SPATIAL index dienen als NOT NULL gedeclareerd te worden"
         por "Todas as partes de uma SPATIAL index devem ser NOT NULL"
         spa "Todas las partes de una SPATIAL index deben ser NOT NULL"
@@ -4836,6 +4864,7 @@ ER_SPATIAL_CANT_HAVE_NULL 42000
 ER_COLLATION_CHARSET_MISMATCH 42000 
         eng "COLLATION '%s' is not valid for CHARACTER SET '%s'"
         ger "COLLATION '%s' ist für CHARACTER SET '%s' ungültig"
+        jpn "COLLATION '%s' は CHARACTER SET '%s' に適用できません。"
         nla "COLLATION '%s' is niet geldig voor CHARACTER SET '%s'"
         por "COLLATION '%s' não é válida para CHARACTER SET '%s'"
         spa "COLLATION '%s' no es válido para CHARACTER SET '%s'"
@@ -4843,6 +4872,7 @@ ER_COLLATION_CHARSET_MISMATCH 42000
 ER_SLAVE_WAS_RUNNING  
         eng "Slave is already running"
         ger "Slave läuft bereits"
+        jpn "スレーブはすでに稼働中です。"
         nla "Slave is reeds actief"
         por "O slave já está rodando"
         spa "Slave ya está funcionando"
@@ -4850,6 +4880,7 @@ ER_SLAVE_WAS_RUNNING
 ER_SLAVE_WAS_NOT_RUNNING  
         eng "Slave already has been stopped"
         ger "Slave wurde bereits angehalten"
+        jpn "スレーブはすでに停止しています。"
         nla "Slave is reeds gestopt"
         por "O slave já está parado"
         spa "Slave ya fué parado"
@@ -4857,24 +4888,28 @@ ER_SLAVE_WAS_NOT_RUNNING
 ER_TOO_BIG_FOR_UNCOMPRESS  
         eng "Uncompressed data size too large; the maximum size is %d (probably, length of uncompressed data was corrupted)"
         ger "Unkomprimierte Daten sind zu groß. Die maximale Größe beträgt %d (wahrscheinlich wurde die Länge der unkomprimierten Daten beschädigt)"
+        jpn "展開後のデータが大きすぎます。最大サイズは %d です。(展開後データの長さ情報が壊れている可能性もあります。)"
         nla "Ongecomprimeerder data is te groot; de maximum lengte is %d (waarschijnlijk, de lengte van de gecomprimeerde data was beschadigd)"
         por "Tamanho muito grande dos dados des comprimidos. O máximo tamanho é %d. (provavelmente, o comprimento dos dados descomprimidos está corrupto)"
         spa "Tamaño demasiado grande para datos descomprimidos. El máximo tamaño es %d. (probablemente, extensión de datos descomprimidos fué corrompida)"
 ER_ZLIB_Z_MEM_ERROR  
         eng "ZLIB: Not enough memory"
         ger "ZLIB: Nicht genug Speicher"
+        jpn "ZLIB: メモリ不足です。"
         nla "ZLIB: Onvoldoende geheugen"
         por "ZLIB: Não suficiente memória disponível"
         spa "Z_MEM_ERROR: No suficiente memoria para zlib"
 ER_ZLIB_Z_BUF_ERROR  
         eng "ZLIB: Not enough room in the output buffer (probably, length of uncompressed data was corrupted)"
         ger "ZLIB: Im Ausgabepuffer ist nicht genug Platz vorhanden (wahrscheinlich wurde die Länge der unkomprimierten Daten beschädigt)"
+        jpn "ZLIB: 出力バッファに十分な空きがありません。(展開後データの長さ情報が壊れている可能性もあります。)"
         nla "ZLIB: Onvoldoende ruimte in uitgaande buffer (waarschijnlijk, de lengte van de ongecomprimeerde data was beschadigd)"
         por "ZLIB: Não suficiente espaço no buffer emissor (provavelmente, o comprimento dos dados descomprimidos está corrupto)"
         spa "Z_BUF_ERROR: No suficiente espacio en el búfer de salida para zlib (probablemente, extensión de datos descomprimidos fué corrompida)"
 ER_ZLIB_Z_DATA_ERROR  
         eng "ZLIB: Input data corrupted"
         ger "ZLIB: Eingabedaten beschädigt"
+        jpn "ZLIB: 入力データが壊れています。"
         nla "ZLIB: Invoer data beschadigd"
         por "ZLIB: Dados de entrada está corrupto"
         spa "ZLIB: Dato de entrada fué corrompido para zlib"
@@ -4883,18 +4918,21 @@ ER_CUT_VALUE_GROUP_CONCAT
 ER_WARN_TOO_FEW_RECORDS 01000 
         eng "Row %lu doesn't contain data for all columns"
         ger "Zeile %lu enthält nicht für alle Felder Daten"
+        jpn "行 %lu はすべての列へのデータを含んでいません。"
         nla "Rij %lu bevat niet de data voor alle kolommen"
         por "Conta de registro é menor que a conta de coluna na linha %lu"
         spa "Línea %lu no contiene datos para todas las columnas"
 ER_WARN_TOO_MANY_RECORDS 01000 
         eng "Row %lu was truncated; it contained more data than there were input columns"
         ger "Zeile %lu gekürzt, die Zeile enthielt mehr Daten, als es Eingabefelder gibt"
+        jpn "行 %lu はデータを切り捨てられました。列よりも多いデータを含んでいました。"
         nla "Regel %lu ingekort, bevatte meer data dan invoer kolommen"
         por "Conta de registro é maior que a conta de coluna na linha %lu"
         spa "Línea %lu fué truncada; La misma contine mas datos que las que existen en las columnas de entrada"
 ER_WARN_NULL_TO_NOTNULL 22004 
         eng "Column set to default value; NULL supplied to NOT NULL column '%s' at row %lu"
         ger "Feld auf Vorgabewert gesetzt, da NULL für NOT-NULL-Feld '%s' in Zeile %lu angegeben"
+        jpn "列にデフォルト値が設定されました。NOT NULLの列 '%s' に 行 %lu で NULL が与えられました。"
         por "Dado truncado, NULL fornecido para NOT NULL coluna '%s' na linha %lu"
         spa "Datos truncado, NULL suministrado para NOT NULL columna '%s' en la línea %lu"
 ER_WARN_DATA_OUT_OF_RANGE 22003 
@@ -4902,17 +4940,20 @@ ER_WARN_DATA_OUT_OF_RANGE 22003
 WARN_DATA_TRUNCATED 01000 
         eng "Data truncated for column '%s' at row %lu"
         ger "Daten abgeschnitten für Feld '%s' in Zeile %lu"
+        jpn "列 '%s' の 行 %lu でデータが切り捨てられました。"
         por "Dado truncado para coluna '%s' na linha %lu"
         spa "Datos truncados para columna '%s' en la línea %lu"
 ER_WARN_USING_OTHER_HANDLER  
         eng "Using storage engine %s for table '%s'"
         ger "Für Tabelle '%s' wird Speicher-Engine %s benutzt"
+        jpn "ストレージエンジン %s が表 '%s' に利用されています。"
         por "Usando engine de armazenamento %s para tabela '%s'"
         spa "Usando motor de almacenamiento %s para tabla '%s'"
         swe "Använder handler %s för tabell '%s'"
 ER_CANT_AGGREGATE_2COLLATIONS  
         eng "Illegal mix of collations (%s,%s) and (%s,%s) for operation '%s'"
         ger "Unerlaubte Mischung von Sortierreihenfolgen (%s, %s) und (%s, %s) für Operation '%s'"
+        jpn "照合順序 (%s,%s) と (%s,%s) の混在は操作 '%s' では不正です。"
         por "Combinação ilegal de collations (%s,%s) e (%s,%s) para operação '%s'"
         spa "Ilegal mezcla de collations (%s,%s) y (%s,%s) para operación '%s'"
 ER_DROP_USER  
@@ -4921,42 +4962,50 @@ ER_DROP_USER
 ER_REVOKE_GRANTS  
         eng "Can't revoke all privileges for one or more of the requested users"
         ger "Kann nicht alle Berechtigungen widerrufen, die für einen oder mehrere Benutzer gewährt wurden"
+        jpn "指定されたユーザーから指定された全ての権限を剥奪することができませんでした。"
         por "Não pode revocar todos os privilégios, grant para um ou mais dos usuários pedidos"
         spa "No puede revocar todos los privilegios, derecho para uno o mas de los usuarios solicitados"
 ER_CANT_AGGREGATE_3COLLATIONS  
         eng "Illegal mix of collations (%s,%s), (%s,%s), (%s,%s) for operation '%s'"
         ger "Unerlaubte Mischung von Sortierreihenfolgen (%s, %s), (%s, %s), (%s, %s) für Operation '%s'"
+        jpn "照合順序 (%s,%s), (%s,%s), (%s,%s) の混在は操作 '%s' では不正です。"
         por "Ilegal combinação de collations (%s,%s), (%s,%s), (%s,%s) para operação '%s'"
         spa "Ilegal mezcla de collations (%s,%s), (%s,%s), (%s,%s) para operación '%s'"
 ER_CANT_AGGREGATE_NCOLLATIONS  
         eng "Illegal mix of collations for operation '%s'"
         ger "Unerlaubte Mischung von Sortierreihenfolgen für Operation '%s'"
+        jpn "操作 '%s' では不正な照合順序の混在です。"
         por "Ilegal combinação de collations para operação '%s'"
         spa "Ilegal mezcla de collations para operación '%s'"
 ER_VARIABLE_IS_NOT_STRUCT  
         eng "Variable '%-.64s' is not a variable component (can't be used as XXXX.variable_name)"
         ger "Variable '%-.64s' ist keine Variablen-Komponente (kann nicht als XXXX.variablen_name verwendet werden)"
+        jpn "変数 '%-.64s' は構造変数の構成要素ではありません。(XXXX.変数名 という指定はできません。)"
         por "Variável '%-.64s' não é uma variável componente (Não pode ser usada como XXXX.variável_nome)"
         spa "Variable '%-.64s' no es una variable componente (No puede ser usada como XXXX.variable_name)"
 ER_UNKNOWN_COLLATION  
         eng "Unknown collation: '%-.64s'"
         ger "Unbekannte Sortierreihenfolge: '%-.64s'"
+        jpn "不明な照合順序: '%-.64s'"
         por "Collation desconhecida: '%-.64s'"
         spa "Collation desconocida: '%-.64s'"
 ER_SLAVE_IGNORED_SSL_PARAMS  
         eng "SSL parameters in CHANGE MASTER are ignored because this MariaDB slave was compiled without SSL support; they can be used later if MariaDB slave with SSL is started"
         ger "SSL-Parameter in CHANGE MASTER werden ignoriert, weil dieser MariaDB-Slave ohne SSL-Unterstützung kompiliert wurde. Sie können aber später verwendet werden, wenn ein MariaDB-Slave mit SSL gestartet wird"
+        jpn "このMySQLスレーブはSSLサポートを含めてコンパイルされていないので、CHANGE MASTER のSSLパラメータは無視されました。今後SSLサポートを持つMySQLスレーブを起動する際に利用されます。"
         por "SSL parâmetros em CHANGE MASTER são ignorados porque este escravo MariaDB foi compilado sem o SSL suporte. Os mesmos podem ser usados mais tarde quando o escravo MariaDB com SSL seja iniciado."
         spa "Parametros SSL en CHANGE MASTER son ignorados porque este slave MariaDB fue compilado sin soporte SSL; pueden ser usados despues cuando el slave MariaDB con SSL sea inicializado"
 ER_SERVER_IS_IN_SECURE_AUTH_MODE  
         eng "Server is running in --secure-auth mode, but '%s'@'%s' has a password in the old format; please change the password to the new format"
         ger "Server läuft im Modus --secure-auth, aber '%s'@'%s' hat ein Passwort im alten Format. Bitte Passwort ins neue Format ändern"
+        jpn "サーバーは --secure-auth モードで稼働しています。しかし '%s'@'%s' は古い形式のパスワードを使用しています。新しい形式のパスワードに変更してください。"
         por "Servidor está rodando em --secure-auth modo, porêm '%s'@'%s' tem senha no formato antigo; por favor troque a senha para o novo formato"
         rus "Сервер запущен в режиме --secure-auth (безопасной авторизации), но для пользователя '%s'@'%s' пароль сохранён в старом формате; необходимо обновить формат пароля"
         spa "Servidor está rodando en modo --secure-auth, pero '%s'@'%s' tiene clave en el antiguo formato; por favor cambie la clave para el nuevo formato"
 ER_WARN_FIELD_RESOLVED  
         eng "Field or reference '%-.192s%s%-.192s%s%-.192s' of SELECT #%d was resolved in SELECT #%d"
         ger "Feld oder Verweis '%-.192s%s%-.192s%s%-.192s' im SELECT-Befehl Nr. %d wurde im SELECT-Befehl Nr. %d aufgelöst"
+        jpn "フィールドまたは参照 '%-.192s%s%-.192s%s%-.192s' は SELECT #%d ではなく、SELECT #%d で解決されました。"
         por "Campo ou referência '%-.192s%s%-.192s%s%-.192s' de SELECT #%d foi resolvido em SELECT #%d"
         rus "Поле или ссылка '%-.192s%s%-.192s%s%-.192s' из SELECTа #%d была найдена в SELECTе #%d"
         spa "Campo o referencia '%-.192s%s%-.192s%s%-.192s' de SELECT #%d fue resolvido en SELECT #%d"
@@ -4964,68 +5013,80 @@ ER_WARN_FIELD_RESOLVED
 ER_BAD_SLAVE_UNTIL_COND  
         eng "Incorrect parameter or combination of parameters for START SLAVE UNTIL"
         ger "Falscher Parameter oder falsche Kombination von Parametern für START SLAVE UNTIL"
+        jpn "START SLAVE UNTIL へのパラメータまたはその組み合わせが不正です。"
         por "Parâmetro ou combinação de parâmetros errado para START SLAVE UNTIL"
         spa "Parametro equivocado o combinación de parametros para START SLAVE UNTIL"
 ER_MISSING_SKIP_SLAVE  
         eng "It is recommended to use --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL; otherwise, you will get problems if you get an unexpected slave's mysqld restart"
         ger "Es wird empfohlen, mit --skip-slave-start zu starten, wenn mit START SLAVE UNTIL eine Schritt-für-Schritt-Replikation ausgeführt wird. Ansonsten gibt es Probleme, wenn ein Slave-Server unerwartet neu startet"
+        jpn "START SLAVE UNTIL で段階的にレプリケーションを行う際には、--skip-slave-start オプションを使うことを推奨します。使わない場合、スレーブのmysqldが不慮の再起動をすると問題が発生します。"
         por "É recomendado para rodar com --skip-slave-start quando fazendo replicação passo-por-passo com START SLAVE UNTIL, de outra forma você não está seguro em caso de inesperada reinicialição do mysqld escravo"
         spa "Es recomendado rodar con --skip-slave-start cuando haciendo replicación step-by-step con START SLAVE UNTIL, a menos que usted no esté seguro en caso de inesperada reinicialización del mysqld slave"
 ER_UNTIL_COND_IGNORED  
         eng "SQL thread is not to be started so UNTIL options are ignored"
         ger "SQL-Thread soll nicht gestartet werden. Daher werden UNTIL-Optionen ignoriert"
+        jpn "スレーブSQLスレッドが開始されないため、UNTILオプションは無視されました。"
         por "Thread SQL não pode ser inicializado tal que opções UNTIL são ignoradas"
         spa "SQL thread no es inicializado tal que opciones UNTIL son ignoradas"
 ER_WRONG_NAME_FOR_INDEX 42000 
         eng "Incorrect index name '%-.100s'"
         ger "Falscher Indexname '%-.100s'"
+        jpn "索引名 '%-.100s' は不正です。"
         por "Incorreto nome de índice '%-.100s'"
         spa "Nombre de índice incorrecto '%-.100s'"
         swe "Felaktigt index namn '%-.100s'"
 ER_WRONG_NAME_FOR_CATALOG 42000 
         eng "Incorrect catalog name '%-.100s'"
         ger "Falscher Katalogname '%-.100s'"
+        jpn "カタログ名 '%-.100s' は不正です。"
         por "Incorreto nome de catálogo '%-.100s'"
         spa "Nombre de catalog incorrecto '%-.100s'"
         swe "Felaktigt katalog namn '%-.100s'"
 ER_WARN_QC_RESIZE  
         eng "Query cache failed to set size %lu; new query cache size is %lu"
         ger "Änderung der Query-Cache-Größe auf %lu fehlgeschlagen; neue Query-Cache-Größe ist %lu"
+        jpn "クエリキャッシュのサイズを %lu にできませんでした。サイズは %lu になりました。"
         por "Falha em Query cache para configurar tamanho %lu, novo tamanho de query cache é %lu"
         rus "Кеш запросов не может установить размер %lu, новый размер кеша зпросов - %lu"
         spa "Query cache fallada para configurar tamaño %lu, nuevo tamaño de query cache es %lu"
-        swe "Storleken av "Query cache" kunde inte sättas till %lu, ny storlek är %lu"
+        swe "Storleken av 'Query cache' kunde inte sättas till %lu, ny storlek är %lu"
         ukr "Кеш запитів неспроможен встановити розмір %lu, новий розмір кеша запитів - %lu"
 ER_BAD_FT_COLUMN  
         eng "Column '%-.192s' cannot be part of FULLTEXT index"
         ger "Feld '%-.192s' kann nicht Teil eines FULLTEXT-Index sein"
+        jpn "列 '%-.192s' は全文索引のキーにはできません。"
         por "Coluna '%-.192s' não pode ser parte de índice FULLTEXT"
         spa "Columna '%-.192s' no puede ser parte de FULLTEXT index"
         swe "Kolumn '%-.192s' kan inte vara del av ett FULLTEXT index"
 ER_UNKNOWN_KEY_CACHE  
         eng "Unknown key cache '%-.100s'"
         ger "Unbekannter Schlüssel-Cache '%-.100s'"
+        jpn "'%-.100s' は不明なキーキャッシュです。"
         por "Key cache desconhecida '%-.100s'"
         spa "Desconocida key cache '%-.100s'"
         swe "Okänd nyckel cache '%-.100s'"
 ER_WARN_HOSTNAME_WONT_WORK  
         eng "MariaDB is started in --skip-name-resolve mode; you must restart it without this switch for this grant to work"
         ger "MariaDB wurde mit --skip-name-resolve gestartet. Diese Option darf nicht verwendet werden, damit diese Rechtevergabe möglich ist"
+        jpn "MariaDBは --skip-name-resolve モードで起動しています。このオプションを外して再起動しなければ、この権限操作は機能しません。"
         por "MariaDB foi inicializado em modo --skip-name-resolve. Você necesita reincializá-lo sem esta opção para este grant funcionar"
         spa "MariaDB esta inicializado en modo --skip-name-resolve. Usted necesita reinicializarlo sin esta opción para este derecho funcionar"
 ER_UNKNOWN_STORAGE_ENGINE 42000 
         eng "Unknown storage engine '%s'"
         ger "Unbekannte Speicher-Engine '%s'"
+        jpn "'%s' は不明なストレージエンジンです。"
         por "Motor de tabela desconhecido '%s'"
         spa "Desconocido motor de tabla '%s'"
 ER_WARN_DEPRECATED_SYNTAX  
         eng "'%s' is deprecated and will be removed in a future release. Please use %s instead"
         ger "'%s' ist veraltet. Bitte benutzen Sie '%s'"
+        jpn "'%s' は将来のリリースで廃止予定です。代わりに %s を使用してください。"
         por "'%s' é desatualizado. Use '%s' em seu lugar"
         spa "'%s' está desaprobado, use '%s' en su lugar"
 ER_NON_UPDATABLE_TABLE  
         eng "The target table %-.100s of the %s is not updatable"
         ger "Die Zieltabelle %-.100s von %s ist nicht aktualisierbar"
+        jpn "対象表 %-.100s は更新可能ではないので、%s を行えません。"
         por "A tabela destino %-.100s do %s não é atualizável"
         rus "Таблица %-.100s в %s не может изменятся"
         spa "La tabla destino %-.100s del %s no es actualizable"
@@ -5034,33 +5095,39 @@ ER_NON_UPDATABLE_TABLE
 ER_FEATURE_DISABLED  
         eng "The '%s' feature is disabled; you need MariaDB built with '%s' to have it working"
         ger "Das Feature '%s' ist ausgeschaltet, Sie müssen MariaDB mit '%s' übersetzen, damit es verfügbar ist"
+        jpn "機能 '%s' は無効です。利用するためには '%s' を含めてビルドしたMariaDBが必要です。"
         por "O recurso '%s' foi desativado; você necessita MariaDB construído com '%s' para ter isto funcionando"
         spa "El recurso '%s' fue deshabilitado; usted necesita construir MariaDB con '%s' para tener eso funcionando"
         swe "'%s' är inte aktiverad; För att aktivera detta måste du bygga om MariaDB med '%s' definierad"
 ER_OPTION_PREVENTS_STATEMENT  
         eng "The MariaDB server is running with the %s option so it cannot execute this statement"
         ger "Der MariaDB-Server läuft mit der Option %s und kann diese Anweisung deswegen nicht ausführen"
+        jpn "MariaDBサーバーが %s オプションで実行されているので、このステートメントは実行できません。"
         por "O servidor MariaDB está rodando com a opção %s razão pela qual não pode executar esse commando"
         spa "El servidor MariaDB está rodando con la opción %s tal que no puede ejecutar este comando"
         swe "MariaDB är startad med %s. Pga av detta kan du inte använda detta kommando"
 ER_DUPLICATED_VALUE_IN_TYPE  
         eng "Column '%-.100s' has duplicated value '%-.64s' in %s"
         ger "Feld '%-.100s' hat doppelten Wert '%-.64s' in %s"
+        jpn "列 '%-.100s' で、重複する値 '%-.64s' が %s に指定されています。"
         por "Coluna '%-.100s' tem valor duplicado '%-.64s' em %s"
         spa "Columna '%-.100s' tiene valor doblado '%-.64s' en %s"
 ER_TRUNCATED_WRONG_VALUE 22007 
         eng "Truncated incorrect %-.32s value: '%-.128s'"
         ger "Falscher %-.32s-Wert gekürzt: '%-.128s'"
+        jpn "不正な %-.32s の値が切り捨てられました。: '%-.128s'"
         por "Truncado errado %-.32s valor: '%-.128s'"
         spa "Equivocado truncado %-.32s valor: '%-.128s'"
 ER_TOO_MUCH_AUTO_TIMESTAMP_COLS  
         eng "Incorrect table definition; there can be only one TIMESTAMP column with CURRENT_TIMESTAMP in DEFAULT or ON UPDATE clause"
         ger "Fehlerhafte Tabellendefinition. Es kann nur eine einzige TIMESTAMP-Spalte mit CURRENT_TIMESTAMP als DEFAULT oder in einer ON-UPDATE-Klausel geben"
+        jpn "不正な表定義です。DEFAULT句またはON UPDATE句に CURRENT_TIMESTAMP をともなうTIMESTAMP型の列は1つまでです。"
         por "Incorreta definição de tabela; Pode ter somente uma coluna TIMESTAMP com CURRENT_TIMESTAMP em DEFAULT ou ON UPDATE cláusula"
         spa "Incorrecta definición de tabla; Solamente debe haber una columna TIMESTAMP con CURRENT_TIMESTAMP en DEFAULT o ON UPDATE cláusula"
 ER_INVALID_ON_UPDATE  
         eng "Invalid ON UPDATE clause for '%-.192s' column"
         ger "Ungültige ON-UPDATE-Klausel für Spalte '%-.192s'"
+        jpn "列 '%-.192s' に ON UPDATE句は無効です。"
         por "Inválida cláusula ON UPDATE para campo '%-.192s'"
         spa "Inválido ON UPDATE cláusula para campo '%-.192s'"
 ER_UNSUPPORTED_PS  
@@ -5070,11 +5137,13 @@ ER_GET_ERRMSG
         dan "Modtog fejl %d '%-.100s' fra %s"
         eng "Got error %d '%-.100s' from %s"
         ger "Fehler %d '%-.100s' von %s"
+        jpn "エラー %d '%-.100s' が %s から返されました。"
         nor "Mottok feil %d '%-.100s' fa %s"
         norwegian-ny "Mottok feil %d '%-.100s' fra %s"
 ER_GET_TEMPORARY_ERRMSG  
         dan "Modtog temporary fejl %d '%-.100s' fra %s"
         eng "Got temporary error %d '%-.100s' from %s"
+        jpn "一時エラー %d '%-.100s' が %s から返されました。"
         ger "Temporärer Fehler %d '%-.100s' von %s"
         nor "Mottok temporary feil %d '%-.100s' fra %s"
         norwegian-ny "Mottok temporary feil %d '%-.100s' fra %s"
@@ -5540,6 +5609,7 @@ ER_TRG_IN_WRONG_SCHEMA
 ER_STACK_OVERRUN_NEED_MORE
         eng "Thread stack overrun:  %ld bytes used of a %ld byte stack, and %ld bytes needed.  Use 'mysqld --thread_stack=#' to specify a bigger stack."
         ger "Thread-Stack-Überlauf: %ld Bytes eines %ld-Byte-Stacks in Verwendung, und %ld Bytes benötigt. Verwenden Sie 'mysqld --thread_stack=#', um einen größeren Stack anzugeben"
+        jpn "スレッドスタック不足です(使用: %ld ; サイズ: %ld ; 要求: %ld)。より大きい値で 'mysqld --thread_stack=#' の指定をしてください。"
 ER_TOO_LONG_BODY 42000 S1009
         eng "Routine body for '%-.100s' is too long"
         ger "Routinen-Body für '%-.100s' ist zu lang"
@@ -5645,6 +5715,7 @@ ER_WRONG_STRING_LENGTH
 ER_NON_INSERTABLE_TABLE  
         eng "The target table %-.100s of the %s is not insertable-into"
         ger "Die Zieltabelle %-.100s von %s ist nicht einfügbar"
+        jpn "対象表 %-.100s は挿入可能ではないので、%s を行えません。"
 ER_ADMIN_WRONG_MRG_TABLE
   eng "Table '%-.64s' is differently defined or of non-MyISAM type or doesn't exist"
   ger "Tabelle '%-.64s' ist unterschiedlich definiert, nicht vom Typ MyISAM oder existiert nicht"
@@ -6041,29 +6112,28 @@ ER_NATIVE_FCT_NAME_COLLISION
 # When using this error message, use the ER_DUP_ENTRY error code.  See, for
 # example, code in handler.cc.
 ER_DUP_ENTRY_WITH_KEY_NAME 23000 S1009
-        cze "Zvojen-Bý klíč '%-.64s' (číslo klíče '%-.192s')"
+        cze "Zvojený klíč '%-.64s' (číslo klíče '%-.192s')"
         dan "Ens værdier '%-.64s' for indeks '%-.192s'"
         nla "Dubbele ingang '%-.64s' voor zoeksleutel '%-.192s'"
         eng "Duplicate entry '%-.64s' for key '%-.192s'"
-        jps "'%-.64s' は key '%-.192s' において重複しています",
         est "Kattuv väärtus '%-.64s' võtmele '%-.192s'"
         fre "Duplicata du champ '%-.64s' pour la clef '%-.192s'"
         ger "Doppelter Eintrag '%-.64s' für Schlüssel '%-.192s'"
         greek "Διπλή εγγραφή '%-.64s' για το κλειδί '%-.192s'"
         hun "Duplikalt bejegyzes '%-.64s' a '%-.192s' kulcs szerint."
         ita "Valore duplicato '%-.64s' per la chiave '%-.192s'"
-        jpn "'%-.64s' は key '%-.192s' において重複しています"
+        jpn "'%-.64s' は索引 '%-.192s' で重複しています。"
         kor "중복된 입력 값 '%-.64s': key '%-.192s'"
         nor "Like verdier '%-.64s' for nøkkel '%-.192s'"
         norwegian-ny "Like verdiar '%-.64s' for nykkel '%-.192s'"
-        pol "Powtórzone wyst?pienie '%-.64s' dla klucza '%-.192s'"
+        pol "Powtórzone wystąpienie '%-.64s' dla klucza '%-.192s'"
         por "Entrada '%-.64s' duplicada para a chave '%-.192s'"
         rum "Cimpul '%-.64s' e duplicat pentru cheia '%-.192s'"
         rus "Дублирующаяся запись '%-.64s' по ключу '%-.192s'"
         serbian "Dupliran unos '%-.64s' za ključ '%-.192s'"
         slo "Opakovaný kľúč '%-.64s' (číslo kľúča '%-.192s')"
         spa "Entrada duplicada '%-.64s' para la clave '%-.192s'"
-        swe "Dubbel nyckel '%-.64s' för nyckel '%-.192s'"
+        swe "Dublett '%-.64s' för nyckel '%-.192s'"
         ukr "Дублюючий запис '%-.64s' для ключа '%-.192s'"
 ER_BINLOG_PURGE_EMFILE
   eng "Too many files opened, please execute the command again"
@@ -6394,7 +6464,7 @@ ER_VALUES_IS_NOT_INT_TYPE_ERROR
   swe "Värden i VALUES för partition '%-.64s' måste ha typen INT"
 
 ER_ACCESS_DENIED_NO_PASSWORD_ERROR 28000 
-        cze "P-Břístup pro uživatele '%-.48s'@'%-.64s'"
+        cze "Přístup pro uživatele '%-.48s'@'%-.64s'"
         dan "Adgang nægtet bruger: '%-.48s'@'%-.64s'"
         nla "Toegang geweigerd voor gebruiker: '%-.48s'@'%-.64s'"
         eng "Access denied for user '%-.48s'@'%-.64s'"
@@ -6404,6 +6474,7 @@ ER_ACCESS_DENIED_NO_PASSWORD_ERROR 28000
         greek "Δεν επιτέρεται η πρόσβαση στο χρήστη: '%-.48s'@'%-.64s'"
         hun "A(z) '%-.48s'@'%-.64s' felhasznalo szamara tiltott eleres."
         ita "Accesso non consentito per l'utente: '%-.48s'@'%-.64s'"
+        jpn "ユーザー '%-.48s'@'%-.64s' のアクセスは拒否されました。"
         kor "'%-.48s'@'%-.64s' 사용자는 접근이 거부 되었습니다."
         nor "Tilgang nektet for bruker: '%-.48s'@'%-.64s'"
         norwegian-ny "Tilgang ikke tillate for brukar: '%-.48s'@'%-.64s'"
@@ -6487,7 +6558,6 @@ ER_PLUGIN_NO_UNINSTALL
 ER_PLUGIN_NO_INSTALL
   eng "Plugin '%s' is marked as not dynamically installable. You have to stop the server to install it."
 
-
 ER_BINLOG_UNSAFE_WRITE_AUTOINC_SELECT
   eng "Statements writing to a table with an auto-increment column after selecting from another table are unsafe because the order in which rows are retrieved determines what (if any) rows will be written. This order cannot be predicted and may differ on master and the slave."
 
@@ -6500,8 +6570,8 @@ ER_BINLOG_UNSAFE_INSERT_TWO_KEYS
 ER_TABLE_IN_FK_CHECK
   eng "Table is being used in foreign key check."
 
-ER_UNUSED_1
-  eng "You should never see it"
+ER_UNSUPPORTED_ENGINE
+  eng "Storage engine '%s' does not support system tables. [%s.%s]"
 
 ER_BINLOG_UNSAFE_AUTOINC_NOT_FIRST
   eng "INSERT into autoincrement field which is not the first part in the composed primary key is unsafe."
@@ -6510,15 +6580,10 @@ ER_BINLOG_UNSAFE_AUTOINC_NOT_FIRST
 #  End of 5.5 error messages.
 #
 
-ER_COL_COUNT_DOESNT_MATCH_CORRUPTED_V2
-  eng "Column count of %s.%s is wrong. Expected %d, found %d. The table is probably corrupted"
-  ger "Spaltenanzahl von %s.%s falsch. %d erwartet, aber %d gefunden. Tabelle ist wahrscheinlich beschädigt"
-
 ER_CANNOT_LOAD_FROM_TABLE_V2
   eng "Cannot load from %s.%s. The table is probably corrupted"
   ger "Kann %s.%s nicht einlesen. Tabelle ist wahrscheinlich beschädigt"
 
-
 ER_MASTER_DELAY_VALUE_OUT_OF_RANGE
   eng "The requested value %u for the master delay exceeds the maximum %u"
 ER_ONLY_FD_AND_RBR_EVENTS_ALLOWED_IN_BINLOG_STATEMENT
@@ -6573,7 +6638,10 @@ ER_PARTITION_CLAUSE_ON_NONPARTITIONED
 ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET
   eng "Found a row not matching the given partition set"
   swe "Hittade en rad som inte passar i någon given partition"
-ER_NO_SUCH_PARTITION 
+
+# Not used any longer, use ER_UNKNOWN_PARTITION which includes the tablename.
+# was ER_NO_SUCH_PARTITION
+ER_NO_SUCH_PARTITION__UNUSED
   cze "partion '%-.64s' neexistuje"
   dan "partition '%-.64s' eksisterer ikke"
   nla "partition '%-.64s' bestaat niet"
@@ -6583,6 +6651,7 @@ ER_NO_SUCH_PARTITION
   ger "Die partition '%-.64s' existiert nicht"
   hun "A '%-.64s' partition nem letezik"
   ita "La tabella particione '%-.64s' non esiste"
+  jpn "パーティション '%-.64s' は存在しません。"
   nor "Partition '%-.64s' doesn't exist"
   norwegian-ny "Partition '%-.64s' doesn't exist"
   pol "Partition '%-.64s' doesn't exist"
@@ -6633,18 +6702,13 @@ ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO 23000 S1009
         eng "Foreign key constraint for table '%.192s', record '%-.192s' would lead to a duplicate entry in a child table"
         ger "Fremdschlüssel-Beschränkung für Tabelle '%.192s', Datensatz '%-.192s' würde zu einem doppelten Eintrag in einer Kind-Tabelle führen"
         swe "FOREIGN KEY constraint för tabell '%.192s', posten '%-.192s' kan inte uppdatera en barntabell på grund av UNIQUE-test"
+
 ER_SQLTHREAD_WITH_SECURE_SLAVE
   eng "Setting authentication options is not possible when only the Slave SQL Thread is being started."
 
 ER_TABLE_HAS_NO_FT
   eng "The table does not have FULLTEXT index to support this query"
 
-ER_INNODB_FT_LIMIT
-  eng "InnoDB presently supports one FULLTEXT index per table"
-
-ER_INNODB_NO_FT_TEMP_TABLE
-  eng "Cannot create FULLTEXT index on temporary InnoDB table"
-
 ER_VARIABLE_NOT_SETTABLE_IN_SF_OR_TRIGGER
   eng "The system variable %.200s cannot be set in stored functions or triggers."
 
@@ -6682,13 +6746,13 @@ ER_BAD_SLAVE_AUTO_POSITION
   eng "Parameters MASTER_LOG_FILE, MASTER_LOG_POS, RELAY_LOG_FILE and RELAY_LOG_POS cannot be set when MASTER_AUTO_POSITION is active."
 
 ER_AUTO_POSITION_REQUIRES_GTID_MODE_ON
-  eng "CHANGE MASTER TO AUTO_POSITION = 1 can only be executed when GTID_MODE = ON."
+  eng "CHANGE MASTER TO MASTER_AUTO_POSITION = 1 can only be executed when GTID_MODE = ON."
 
 ER_CANT_DO_IMPLICIT_COMMIT_IN_TRX_WHEN_GTID_NEXT_IS_SET
   eng "Cannot execute statements with implicit commit inside a transaction when GTID_NEXT != AUTOMATIC or GTID_NEXT_LIST != NULL."
 
-ER_GTID_MODE_2_OR_3_REQUIRES_DISABLE_GTID_UNSAFE_STATEMENTS_ON
-  eng "GTID_MODE = ON or GTID_MODE = UPGRADE_STEP_2 requires DISABLE_GTID_UNSAFE_STATEMENTS = 1."
+ER_GTID_MODE_2_OR_3_REQUIRES_ENFORCE_GTID_CONSISTENCY_ON
+  eng "GTID_MODE = ON or GTID_MODE = UPGRADE_STEP_2 requires ENFORCE_GTID_CONSISTENCY = 1."
 
 ER_GTID_MODE_REQUIRES_BINLOG
   eng "GTID_MODE = ON or UPGRADE_STEP_1 or UPGRADE_STEP_2 requires --log-bin and --log-slave-updates."
@@ -6706,13 +6770,13 @@ ER_FOUND_GTID_EVENT_WHEN_GTID_MODE_IS_OFF
   eng "Found a Gtid_log_event or Previous_gtids_log_event when GTID_MODE = OFF."
 
 ER_GTID_UNSAFE_NON_TRANSACTIONAL_TABLE
-  eng "Updates to non-transactional tables are forbidden when DISABLE_GTID_UNSAFE_STATEMENTS = 1."
+  eng "When ENFORCE_GTID_CONSISTENCY = 1, updates to non-transactional tables can only be done in either autocommitted statements or single-statement transactions, and never in the same statement as updates to transactional tables."
 
 ER_GTID_UNSAFE_CREATE_SELECT
-  eng "CREATE TABLE ... SELECT is forbidden when DISABLE_GTID_UNSAFE_STATEMENTS = 1."
+  eng "CREATE TABLE ... SELECT is forbidden when ENFORCE_GTID_CONSISTENCY = 1."
 
 ER_GTID_UNSAFE_CREATE_DROP_TEMPORARY_TABLE_IN_TRANSACTION
-  eng "When DISABLE_GTID_UNSAFE_STATEMENTS = 1, the statements CREATE TEMPORARY TABLE and DROP TEMPORARY TABLE can be executed in a non-transactional context only, and require that AUTOCOMMIT = 1."
+ eng "When ENFORCE_GTID_CONSISTENCY = 1, the statements CREATE TEMPORARY TABLE and DROP TEMPORARY TABLE can be executed in a non-transactional context only, and require that AUTOCOMMIT = 1."
 
 ER_GTID_MODE_CAN_ONLY_CHANGE_ONE_STEP_AT_A_TIME
   eng "The value of GTID_MODE can only change one step at a time: OFF <-> UPGRADE_STEP_1 <-> UPGRADE_STEP_2 <-> ON. Also note that this value must be stepped up or down simultaneously on all servers; see the Manual for instructions." 
@@ -6730,6 +6794,241 @@ ER_UNKNOWN_EXPLAIN_FORMAT
 ER_CANT_EXECUTE_IN_READ_ONLY_TRANSACTION 25006
   eng "Cannot execute statement in a READ ONLY transaction."
 
+ER_TOO_LONG_TABLE_PARTITION_COMMENT
+  eng "Comment for table partition '%-.64s' is too long (max = %lu)"
+
+ER_SLAVE_CONFIGURATION
+  eng "Slave is not configured or failed to initialize properly. You must at least set --server-id to enable either a master or a slave. Additional error messages can be found in the MySQL error log."
+
+ER_INNODB_FT_LIMIT
+  eng "InnoDB presently supports one FULLTEXT index creation at a time"
+
+ER_INNODB_NO_FT_TEMP_TABLE
+  eng "Cannot create FULLTEXT index on temporary InnoDB table"
+
+ER_INNODB_FT_WRONG_DOCID_COLUMN
+  eng "Column '%-.192s' is of wrong type for an InnoDB FULLTEXT index"
+
+ER_INNODB_FT_WRONG_DOCID_INDEX
+  eng "Index '%-.192s' is of wrong type for an InnoDB FULLTEXT index"
+
+ER_INNODB_ONLINE_LOG_TOO_BIG
+  eng "Creating index '%-.192s' required more than 'innodb_online_alter_log_max_size' bytes of modification log. Please try again."
+
+ER_UNKNOWN_ALTER_ALGORITHM
+  eng "Unknown ALGORITHM '%s'"
+
+ER_UNKNOWN_ALTER_LOCK
+  eng "Unknown LOCK type '%s'"
+
+ER_MTS_CHANGE_MASTER_CANT_RUN_WITH_GAPS
+  eng "CHANGE MASTER cannot be executed when the slave was stopped with an error or killed in MTS mode. Consider using RESET SLAVE or START SLAVE UNTIL."
+
+ER_MTS_RECOVERY_FAILURE
+  eng "Cannot recover after SLAVE errored out in parallel execution mode. Additional error messages can be found in the MySQL error log."
+
+ER_MTS_RESET_WORKERS
+  eng "Cannot clean up worker info tables. Additional error messages can be found in the MySQL error log."
+
+ER_COL_COUNT_DOESNT_MATCH_CORRUPTED_V2
+  eng "Column count of %s.%s is wrong. Expected %d, found %d. The table is probably corrupted"
+  ger "Spaltenanzahl von %s.%s falsch. %d erwartet, aber %d gefunden. Tabelle ist wahrscheinlich beschädigt"
+
+ER_SLAVE_SILENT_RETRY_TRANSACTION
+  eng "Slave must silently retry current transaction"
+
+ER_DISCARD_FK_CHECKS_RUNNING
+  eng "There is a foreign key check running on table '%-.192s'. Cannot discard the table."
+
+ER_TABLE_SCHEMA_MISMATCH
+  eng "Schema mismatch (%s)"
+
+ER_TABLE_IN_SYSTEM_TABLESPACE
+  eng "Table '%-.192s' in system tablespace"
+
+ER_IO_READ_ERROR
+  eng "IO Read error: (%lu, %s) %s"
+
+ER_IO_WRITE_ERROR
+  eng "IO Write error: (%lu, %s) %s"
+
+ER_TABLESPACE_MISSING
+  eng "Tablespace is missing for table '%-.192s'"
+
+ER_TABLESPACE_EXISTS
+  eng "Tablespace for table '%-.192s' exists. Please DISCARD the tablespace before IMPORT."
+
+ER_TABLESPACE_DISCARDED
+  eng "Tablespace has been discarded for table '%-.192s'"
+
+ER_INTERNAL_ERROR
+  eng "Internal error: '%-.192s'"
+
+ER_INNODB_IMPORT_ERROR
+  eng "ALTER TABLE '%-.192s' IMPORT TABLESPACE failed with error %lu : '%s'"
+
+ER_INNODB_INDEX_CORRUPT
+  eng "Index corrupt: %s"
+
+ER_INVALID_YEAR_COLUMN_LENGTH
+  eng "YEAR(%lu) column type is deprecated. Creating YEAR(4) column instead."
+  rus "Тип YEAR(%lu) более не поддерживается, вместо него будет создана колонка с типом YEAR(4)."
+
+ER_NOT_VALID_PASSWORD
+  eng "Your password does not satisfy the current policy requirements"
+
+ER_MUST_CHANGE_PASSWORD
+  eng "You must SET PASSWORD before executing this statement"
+  bgn "Трябва първо да си смените паролата със SET PASSWORD за да можете да изпълните тази команда"
+
+ER_FK_NO_INDEX_CHILD
+        eng "Failed to add the foreign key constaint. Missing index for constraint '%s' in the foreign table '%s'"
+
+ER_FK_NO_INDEX_PARENT
+        eng "Failed to add the foreign key constaint. Missing index for constraint '%s' in the referenced table '%s'"
+
+ER_FK_FAIL_ADD_SYSTEM
+        eng "Failed to add the foreign key constraint '%s' to system tables"
+
+ER_FK_CANNOT_OPEN_PARENT
+        eng "Failed to open the referenced table '%s'"
+
+ER_FK_INCORRECT_OPTION
+        eng "Failed to add the foreign key constraint on table '%s'. Incorrect options in FOREIGN KEY constraint '%s'"
+
+ER_FK_DUP_NAME
+	eng "Duplicate foreign key constraint name '%s'"
+
+ER_PASSWORD_FORMAT
+  eng "The password hash doesn't have the expected format. Check if the correct password algorithm is being used with the PASSWORD() function."
+
+ER_FK_COLUMN_CANNOT_DROP
+        eng "Cannot drop column '%-.192s': needed in a foreign key constraint '%-.192s'"
+        ger "Kann Spalte '%-.192s' nicht löschen: wird für eine Fremdschlüsselbeschränkung '%-.192s' benötigt"
+
+ER_FK_COLUMN_CANNOT_DROP_CHILD
+        eng "Cannot drop column '%-.192s': needed in a foreign key constraint '%-.192s' of table '%-.192s'"
+        ger "Kann Spalte '%-.192s' nicht löschen: wird für eine Fremdschlüsselbeschränkung '%-.192s' der Tabelle '%-.192s' benötigt"
+
+ER_FK_COLUMN_NOT_NULL
+        eng "Column '%-.192s' cannot be NOT NULL: needed in a foreign key constraint '%-.192s' SET NULL"
+        ger "Spalte '%-.192s' kann nicht NOT NULL sein: wird für eine Fremdschlüsselbeschränkung '%-.192s' SET NULL benötigt"
+
+ER_DUP_INDEX
+  eng "Duplicate index '%-.64s' defined on the table '%-.64s.%-.64s'. This is deprecated and will be disallowed in a future release."
+
+ER_FK_COLUMN_CANNOT_CHANGE
+  eng "Cannot change column '%-.192s': used in a foreign key constraint '%-.192s'"
+
+ER_FK_COLUMN_CANNOT_CHANGE_CHILD
+  eng "Cannot change column '%-.192s': used in a foreign key constraint '%-.192s' of table '%-.192s'"
+
+ER_FK_CANNOT_DELETE_PARENT
+  eng "Cannot delete rows from table which is parent in a foreign key constraint '%-.192s' of table '%-.192s'"
+
+ER_MALFORMED_PACKET
+  eng "Malformed communication packet."
+
+ER_READ_ONLY_MODE
+        eng "Running in read-only mode"
+
+ER_GTID_NEXT_TYPE_UNDEFINED_GROUP
+  eng "When GTID_NEXT is set to a GTID, you must explicitly set it again after a COMMIT or ROLLBACK. If you see this error message in the slave SQL thread, it means that a table in the current transaction is transactional on the master and non-transactional on the slave. In a client connection, it means that you executed SET GTID_NEXT before a transaction and forgot to set GTID_NEXT to a different identifier or to 'AUTOMATIC' after COMMIT or ROLLBACK. Current GTID_NEXT is '%s'."
+
+ER_VARIABLE_NOT_SETTABLE_IN_SP
+  eng "The system variable %.200s cannot be set in stored procedures."
+
+ER_CANT_SET_GTID_PURGED_WHEN_GTID_MODE_IS_OFF
+  eng "GTID_PURGED can only be set when GTID_MODE = ON."
+
+ER_CANT_SET_GTID_PURGED_WHEN_GTID_EXECUTED_IS_NOT_EMPTY
+  eng "GTID_PURGED can only be set when GTID_EXECUTED is empty."
+
+ER_CANT_SET_GTID_PURGED_WHEN_OWNED_GTIDS_IS_NOT_EMPTY
+  eng "GTID_PURGED can only be set when there are no ongoing transactions (not even in other clients)."
+
+ER_GTID_PURGED_WAS_CHANGED
+  eng "GTID_PURGED was changed from '%s' to '%s'."
+
+ER_GTID_EXECUTED_WAS_CHANGED
+  eng "GTID_EXECUTED was changed from '%s' to '%s'."
+
+ER_BINLOG_STMT_MODE_AND_NO_REPL_TABLES
+  eng "Cannot execute statement: impossible to write to binary log since BINLOG_FORMAT = STATEMENT, and both replicated and non replicated tables are written to."
+
+ER_ALTER_OPERATION_NOT_SUPPORTED 0A000
+  eng "%s is not supported for this operation. Try %s."
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON 0A000
+  eng "%s is not supported. Reason: %s. Try %s."
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_COPY
+  eng "COPY algorithm requires a lock"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_PARTITION
+  eng "Partition specific operations do not yet support LOCK/ALGORITHM"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_RENAME
+  eng "Columns participating in a foreign key are renamed"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_COLUMN_TYPE
+  eng "Cannot change column type INPLACE"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_CHECK
+  eng "Adding foreign keys needs foreign_key_checks=OFF"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_IGNORE
+  eng "Creating unique indexes with IGNORE requires COPY algorithm to remove duplicate rows"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOPK
+  eng "Dropping a primary key is not allowed without also adding a new primary key"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_AUTOINC
+  eng "Adding an auto-increment column requires a lock"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_HIDDEN_FTS
+  eng "Cannot replace hidden FTS_DOC_ID with a user-visible one"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_CHANGE_FTS
+  eng "Cannot drop or rename FTS_DOC_ID"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS
+  eng "Fulltext index creation requires a lock"
+
+ER_SQL_SLAVE_SKIP_COUNTER_NOT_SETTABLE_IN_GTID_MODE
+  eng "sql_slave_skip_counter can not be set when the server is running with GTID_MODE = ON. Instead, for each transaction that you want to skip, generate an empty transaction with the same GTID as the transaction"
+
+ER_DUP_UNKNOWN_IN_INDEX 23000
+  cze "Zdvojený klíč (číslo klíče '%-.192s')"
+  dan "Flere ens nøgler for indeks '%-.192s'"
+  nla "Dubbele ingang voor zoeksleutel '%-.192s'"
+  eng "Duplicate entry for key '%-.192s'"
+  est "Kattuv väärtus võtmele '%-.192s'"
+  fre "Duplicata du champ pour la clef '%-.192s'"
+  ger "Doppelter Eintrag für Schlüssel '%-.192s'"
+  greek "Διπλή εγγραφή για το κλειδί '%-.192s'"
+  hun "Duplikalt bejegyzes a '%-.192s' kulcs szerint."
+  ita "Valore duplicato per la chiave '%-.192s'"
+  jpn "は索引 '%-.192s' で重複しています。"
+  kor "중복된 입력 값: key '%-.192s'"
+  nor "Like verdier for nøkkel '%-.192s'"
+  norwegian-ny "Like verdiar for nykkel '%-.192s'"
+  pol "Powtórzone wystąpienie dla klucza '%-.192s'"
+  por "Entrada duplicada para a chave '%-.192s'"
+  rum "Cimpul e duplicat pentru cheia '%-.192s'"
+  rus "Дублирующаяся запись по ключу '%-.192s'"
+  serbian "Dupliran unos za ključ '%-.192s'"
+  slo "Opakovaný kľúč (číslo kľúča '%-.192s')"
+  spa "Entrada duplicada para la clave '%-.192s'"
+  swe "Dublett för nyckel '%-.192s'"
+  ukr "Дублюючий запис для ключа '%-.192s'"
+
+ER_IDENT_CAUSES_TOO_LONG_PATH
+  eng "Long database name and identifier for object resulted in path length exceeding %d characters. Path: '%s'."
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL
+  eng "cannot silently convert NULL values, as required in this SQL_MODE"
+
 #
 # MariaDB error messages section starts here
 #
@@ -6799,8 +7098,6 @@ ER_VIEW_ORDERBY_IGNORED
         eng "View '%-.192s'.'%-.192s' ORDER BY clause ignored because there is other ORDER BY clause already."
 ER_CONNECTION_KILLED 70100 
         eng "Connection was killed"
-ER_INTERNAL_ERROR
-        eng "Internal error: '%-.192s'"
 ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_SKIP_REPLICATION
         eng "Cannot modify @@session.skip_replication inside a transaction"
 ER_STORED_FUNCTION_PREVENTS_SWITCH_SKIP_REPLICATION
@@ -6821,4 +7118,4 @@ ER_CANT_START_STOP_SLAVE
 ER_SLAVE_STARTED
         eng "SLAVE '%.*s' started"
 ER_SLAVE_STOPPED
-        eng "SLAVE '%.*s' stopped"
-\ No newline at end of file
+        eng "SLAVE '%.*s' stopped"
diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc
index edc33c4d63b..9437db6c318 100644
--- a/sql/signal_handler.cc
+++ b/sql/signal_handler.cc
@@ -190,7 +190,7 @@ extern "C" sig_handler handle_fatal_signal(int sig)
       "Some pointers may be invalid and cause the dump to abort.\n");
 
     my_safe_printf_stderr("Query (%p): ", thd->query());
-    my_safe_print_str(thd->query(), min(65536U, thd->query_length()));
+    my_safe_print_str(thd->query(), MY_MIN(65536U, thd->query_length()));
     my_safe_printf_stderr("\nConnection ID (thread ID): %lu\n",
                           (ulong) thd->thread_id);
     my_safe_printf_stderr("Status: %s\n\n", kreason);
diff --git a/sql/slave.cc b/sql/slave.cc
index 78fa7998012..a79514a0d8b 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -377,7 +377,7 @@ int init_recovery(Master_info* mi, const char** errmsg)
   Relay_log_info *rli= &mi->rli;
   if (rli->group_master_log_name[0])
   {
-    mi->master_log_pos= max(BIN_LOG_HEADER_SIZE,
+    mi->master_log_pos= MY_MAX(BIN_LOG_HEADER_SIZE,
                              rli->group_master_log_pos);
     strmake(mi->master_log_name, rli->group_master_log_name,
             sizeof(mi->master_log_name)-1);
@@ -2259,13 +2259,13 @@ static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full)
         slave is 2. At SHOW SLAVE STATUS time, assume that the difference
         between timestamp of slave and rli->last_master_timestamp is 0
         (i.e. they are in the same second), then we get 0-(2-1)=-1 as a result.
-        This confuses users, so we don't go below 0: hence the max().
+        This confuses users, so we don't go below 0: hence the MY_MAX().
 
         last_master_timestamp == 0 (an "impossible" timestamp 1970) is a
         special marker to say "consider we have caught up".
       */
       protocol->store((longlong)(mi->rli.last_master_timestamp ?
-                                 max(0, time_diff) : 0));
+                                 MY_MAX(0, time_diff) : 0));
     }
     else
     {
@@ -3023,7 +3023,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
             exec_res= 0;
             rli->cleanup_context(thd, 1);
             /* chance for concurrent connection to get more locks */
-            slave_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
+            slave_sleep(thd, MY_MIN(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
                        sql_slave_killed, rli);
             mysql_mutex_lock(&rli->data_lock); // because of SHOW STATUS
             rli->trans_retries++;
@@ -5096,7 +5096,7 @@ static IO_CACHE *reopen_relay_log(Relay_log_info *rli, const char **errmsg)
     relay_log_pos       Current log pos
     pending             Number of bytes already processed from the event
   */
-  rli->event_relay_log_pos= max(rli->event_relay_log_pos, BIN_LOG_HEADER_SIZE);
+  rli->event_relay_log_pos= MY_MAX(rli->event_relay_log_pos, BIN_LOG_HEADER_SIZE);
   my_b_seek(cur_log,rli->event_relay_log_pos);
   DBUG_RETURN(cur_log);
 }
diff --git a/sql/sp_head.cc b/sql/sp_head.cc
index 7cd2e789351..e2901125f14 100644
--- a/sql/sp_head.cc
+++ b/sql/sp_head.cc
@@ -2663,7 +2663,7 @@ sp_head::show_create_routine(THD *thd, int type)
 
     Item_empty_string *stmt_fld=
       new Item_empty_string(col3_caption,
-                            max(m_defstr.length, 1024));
+                            MY_MAX(m_defstr.length, 1024));
 
     stmt_fld->maybe_null= TRUE;
 
@@ -2863,7 +2863,7 @@ sp_head::show_routine_code(THD *thd)
   field_list.push_back(new Item_uint("Pos", 9));
   // 1024 is for not to confuse old clients
   field_list.push_back(new Item_empty_string("Instruction",
-                                             max(buffer.length(), 1024)));
+                                             MY_MAX(buffer.length(), 1024)));
   if (protocol->send_result_set_metadata(&field_list, Protocol::SEND_NUM_ROWS |
                                          Protocol::SEND_EOF))
     DBUG_RETURN(1);
diff --git a/sql/spatial.h b/sql/spatial.h
index 1277e7bc01c..1db9b5767e5 100644
--- a/sql/spatial.h
+++ b/sql/spatial.h
@@ -195,8 +195,8 @@ struct MBR
     if (d != mbr->dimension() || d <= 0 || contains(mbr) || within(mbr))
       return 0;
 
-    MBR intersection(max(xmin, mbr->xmin), max(ymin, mbr->ymin),
-                     min(xmax, mbr->xmax), min(ymax, mbr->ymax));
+    MBR intersection(MY_MAX(xmin, mbr->xmin), MY_MAX(ymin, mbr->ymin),
+                     MY_MIN(xmax, mbr->xmax), MY_MIN(ymax, mbr->ymax));
 
     return (d == intersection.dimension());
   }
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index cb7e35fae09..9f085133059 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -1328,7 +1328,7 @@ static ulong get_sort(uint count,...)
         chars= 128;                             // Marker that chars existed
       }
     }
-    sort= (sort << 8) + (wild_pos ? min(wild_pos, 127U) : chars);
+    sort= (sort << 8) + (wild_pos ? MY_MIN(wild_pos, 127U) : chars);
   }
   va_end(args);
   return sort;
diff --git a/sql/sql_alter.h b/sql/sql_alter.h
index 6660748f666..f0c0a873a5c 100644
--- a/sql/sql_alter.h
+++ b/sql/sql_alter.h
@@ -16,51 +16,412 @@
 #ifndef SQL_ALTER_TABLE_H
 #define SQL_ALTER_TABLE_H
 
+class Alter_drop;
+class Alter_column;
+class Key;
+
+/**
+  Data describing the table being created by CREATE TABLE or
+  altered by ALTER TABLE.
+*/
+
+class Alter_info
+{
+public:
+  /*
+    These flags are set by the parser and describes the type of
+    operation(s) specified by the ALTER TABLE statement.
+
+    They do *not* describe the type operation(s) to be executed
+    by the storage engine. For example, we don't yet know the
+    type of index to be added/dropped.
+  */
+
+  // Set for ADD [COLUMN]
+  static const uint ALTER_ADD_COLUMN            = 1L <<  0;
+
+  // Set for DROP [COLUMN]
+  static const uint ALTER_DROP_COLUMN           = 1L <<  1;
+
+  // Set for CHANGE [COLUMN] | MODIFY [CHANGE]
+  // Set by mysql_recreate_table()
+  static const uint ALTER_CHANGE_COLUMN         = 1L <<  2;
+
+  // Set for ADD INDEX | ADD KEY | ADD PRIMARY KEY | ADD UNIQUE KEY |
+  //         ADD UNIQUE INDEX | ALTER ADD [COLUMN]
+  static const uint ALTER_ADD_INDEX             = 1L <<  3;
+
+  // Set for DROP PRIMARY KEY | DROP FOREIGN KEY | DROP KEY | DROP INDEX
+  static const uint ALTER_DROP_INDEX            = 1L <<  4;
+
+  // Set for RENAME [TO]
+  static const uint ALTER_RENAME                = 1L <<  5;
+
+  // Set for ORDER BY
+  static const uint ALTER_ORDER                 = 1L <<  6;
+
+  // Set for table_options
+  static const uint ALTER_OPTIONS               = 1L <<  7;
+
+  // Set for ALTER [COLUMN] ... SET DEFAULT ... | DROP DEFAULT
+  static const uint ALTER_CHANGE_COLUMN_DEFAULT = 1L <<  8;
+
+  // Set for DISABLE KEYS | ENABLE KEYS
+  static const uint ALTER_KEYS_ONOFF            = 1L <<  9;
+
+  // Set for CONVERT TO CHARACTER SET
+  static const uint ALTER_CONVERT               = 1L << 10;
+
+  // Set for FORCE
+  // Set by mysql_recreate_table()
+  static const uint ALTER_RECREATE              = 1L << 11;
+
+  // Set for ADD PARTITION
+  static const uint ALTER_ADD_PARTITION         = 1L << 12;
+
+  // Set for DROP PARTITION
+  static const uint ALTER_DROP_PARTITION        = 1L << 13;
+
+  // Set for COALESCE PARTITION
+  static const uint ALTER_COALESCE_PARTITION    = 1L << 14;
+
+  // Set for REORGANIZE PARTITION ... INTO
+  static const uint ALTER_REORGANIZE_PARTITION  = 1L << 15;
+
+  // Set for partition_options
+  static const uint ALTER_PARTITION             = 1L << 16;
+
+  // Set for LOAD INDEX INTO CACHE ... PARTITION
+  // Set for CACHE INDEX ... PARTITION
+  static const uint ALTER_ADMIN_PARTITION       = 1L << 17;
+
+  // Set for REORGANIZE PARTITION
+  static const uint ALTER_TABLE_REORG           = 1L << 18;
+
+  // Set for REBUILD PARTITION
+  static const uint ALTER_REBUILD_PARTITION     = 1L << 19;
+
+  // Set for partitioning operations specifying ALL keyword
+  static const uint ALTER_ALL_PARTITION         = 1L << 20;
+
+  // Set for REMOVE PARTITIONING
+  static const uint ALTER_REMOVE_PARTITIONING   = 1L << 21;
+
+  // Set for ADD FOREIGN KEY
+  static const uint ADD_FOREIGN_KEY             = 1L << 22;
+
+  // Set for DROP FOREIGN KEY
+  static const uint DROP_FOREIGN_KEY            = 1L << 23;
+
+  // Set for EXCHANGE PARITION
+  static const uint ALTER_EXCHANGE_PARTITION    = 1L << 24;
+
+  // Set by Sql_cmd_alter_table_truncate_partition::execute()
+  static const uint ALTER_TRUNCATE_PARTITION    = 1L << 25;
+
+  // Set for ADD [COLUMN] FIRST | AFTER
+  static const uint ALTER_COLUMN_ORDER          = 1L << 26;
+
+
+  enum enum_enable_or_disable { LEAVE_AS_IS, ENABLE, DISABLE };
+
+  /**
+     The different values of the ALGORITHM clause.
+     Describes which algorithm to use when altering the table.
+  */
+  enum enum_alter_table_algorithm
+  {
+    // In-place if supported, copy otherwise.
+    ALTER_TABLE_ALGORITHM_DEFAULT,
+
+    // In-place if supported, error otherwise.
+    ALTER_TABLE_ALGORITHM_INPLACE,
+
+    // Copy if supported, error otherwise.
+    ALTER_TABLE_ALGORITHM_COPY
+  };
+
+
+  /**
+     The different values of the LOCK clause.
+     Describes the level of concurrency during ALTER TABLE.
+  */
+  enum enum_alter_table_lock
+  {
+    // Maximum supported level of concurency for the given operation.
+    ALTER_TABLE_LOCK_DEFAULT,
+
+    // Allow concurrent reads & writes. If not supported, give erorr.
+    ALTER_TABLE_LOCK_NONE,
+
+    // Allow concurrent reads only. If not supported, give error.
+    ALTER_TABLE_LOCK_SHARED,
+
+    // Block reads and writes.
+    ALTER_TABLE_LOCK_EXCLUSIVE
+  };
+
+
+  // Columns and keys to be dropped.
+  List<Alter_drop>              drop_list;
+  // Columns for ALTER_COLUMN_CHANGE_DEFAULT.
+  List<Alter_column>            alter_list;
+  // List of keys, used by both CREATE and ALTER TABLE.
+  List<Key>                     key_list;
+  // List of columns, used by both CREATE and ALTER TABLE.
+  List<Create_field>            create_list;
+  // Type of ALTER TABLE operation.
+  uint                          flags;
+  // Enable or disable keys.
+  enum_enable_or_disable        keys_onoff;
+  // List of partitions.
+  List<char>                    partition_names;
+  // Number of partitions.
+  uint                          num_parts;
+  // Type of ALTER TABLE algorithm.
+  enum_alter_table_algorithm    requested_algorithm;
+  // Type of ALTER TABLE lock.
+  enum_alter_table_lock         requested_lock;
+
+
+  Alter_info() :
+    flags(0),
+    keys_onoff(LEAVE_AS_IS),
+    num_parts(0),
+    requested_algorithm(ALTER_TABLE_ALGORITHM_DEFAULT),
+    requested_lock(ALTER_TABLE_LOCK_DEFAULT)
+  {}
+
+  void reset()
+  {
+    drop_list.empty();
+    alter_list.empty();
+    key_list.empty();
+    create_list.empty();
+    flags= 0;
+    keys_onoff= LEAVE_AS_IS;
+    num_parts= 0;
+    partition_names.empty();
+    requested_algorithm= ALTER_TABLE_ALGORITHM_DEFAULT;
+    requested_lock= ALTER_TABLE_LOCK_DEFAULT;
+  }
+
+
+  /**
+    Construct a copy of this object to be used for mysql_alter_table
+    and mysql_create_table.
+
+    Historically, these two functions modify their Alter_info
+    arguments. This behaviour breaks re-execution of prepared
+    statements and stored procedures and is compensated by always
+    supplying a copy of Alter_info to these functions.
+
+    @param  rhs       Alter_info to make copy of
+    @param  mem_root  Mem_root for new Alter_info
+
+    @note You need to use check the error in THD for out
+    of memory condition after calling this function.
+  */
+  Alter_info(const Alter_info &rhs, MEM_ROOT *mem_root);
+
+
+  /**
+     Parses the given string and sets requested_algorithm
+     if the string value matches a supported value.
+     Supported values: INPLACE, COPY, DEFAULT
+
+     @param  str    String containing the supplied value
+     @retval false  Supported value found, state updated
+     @retval true   Not supported value, no changes made
+  */
+  bool set_requested_algorithm(const LEX_STRING *str);
+
+
+  /**
+     Parses the given string and sets requested_lock
+     if the string value matches a supported value.
+     Supported values: NONE, SHARED, EXCLUSIVE, DEFAULT
+
+     @param  str    String containing the supplied value
+     @retval false  Supported value found, state updated
+     @retval true   Not supported value, no changes made
+  */
+
+  bool set_requested_lock(const LEX_STRING *str);
+
+private:
+  Alter_info &operator=(const Alter_info &rhs); // not implemented
+  Alter_info(const Alter_info &rhs);            // not implemented
+};
+
+
+/** Runtime context for ALTER TABLE. */
+class Alter_table_ctx
+{
+public:
+  Alter_table_ctx();
+
+  Alter_table_ctx(THD *thd, TABLE_LIST *table_list, uint tables_opened_arg,
+                  char *new_db_arg, char *new_name_arg);
+
+  /**
+     @return true if the table is moved to another database, false otherwise.
+  */
+  bool is_database_changed() const
+  { return (new_db != db); };
+
+  /**
+     @return true if the table is renamed, false otherwise.
+  */
+  bool is_table_renamed() const
+  { return (is_database_changed() || new_name != table_name); };
+
+  /**
+     @return filename (including .frm) for the new table.
+  */
+  const char *get_new_filename() const
+  {
+    DBUG_ASSERT(!tmp_table);
+    return new_filename;
+  }
+
+  /**
+     @return path to the original table.
+  */
+  const char *get_path() const
+  {
+    DBUG_ASSERT(!tmp_table);
+    return path;
+  }
+
+  /**
+     @return path to the new table.
+  */
+  const char *get_new_path() const
+  {
+    DBUG_ASSERT(!tmp_table);
+    return new_path;
+  }
+
+  /**
+     @return path to the temporary table created during ALTER TABLE.
+  */
+  const char *get_tmp_path() const
+  { return tmp_path; }
+
+  /**
+    Mark ALTER TABLE as needing to produce foreign key error if
+    it deletes a row from the table being changed.
+  */
+  void set_fk_error_if_delete_row(FOREIGN_KEY_INFO *fk)
+  {
+    fk_error_if_delete_row= true;
+    fk_error_id= fk->foreign_id->str;
+    fk_error_table= fk->foreign_table->str;
+  }
+
+public:
+  Create_field *datetime_field;
+  bool         error_if_not_empty;
+  uint         tables_opened;
+  char         *db;
+  char         *table_name;
+  char         *alias;
+  char         *new_db;
+  char         *new_name;
+  char         *new_alias;
+  char         tmp_name[80];
+  /**
+    Indicates that if a row is deleted during copying of data from old version
+    of table to the new version ER_FK_CANNOT_DELETE_PARENT error should be
+    emitted.
+  */
+  bool         fk_error_if_delete_row;
+  /** Name of foreign key for the above error. */
+  const char   *fk_error_id;
+  /** Name of table for the above error. */
+  const char   *fk_error_table;
+
+private:
+  char new_filename[FN_REFLEN + 1];
+  char new_alias_buff[FN_REFLEN + 1];
+  char path[FN_REFLEN + 1];
+  char new_path[FN_REFLEN + 1];
+  char tmp_path[FN_REFLEN + 1];
+
+#ifndef DBUG_OFF
+  /** Indicates that we are altering temporary table. Used only in asserts. */
+  bool tmp_table;
+#endif
+
+  Alter_table_ctx &operator=(const Alter_table_ctx &rhs); // not implemented
+  Alter_table_ctx(const Alter_table_ctx &rhs);            // not implemented
+};
+
+
 /**
-  Alter_table_common represents the common properties of the ALTER TABLE
+  Sql_cmd_common_alter_table represents the common properties of the ALTER TABLE
   statements.
   @todo move Alter_info and other ALTER generic structures from Lex here.
 */
-class Alter_table_common : public Sql_statement
+class Sql_cmd_common_alter_table : public Sql_cmd
 {
 protected:
   /**
     Constructor.
-    @param lex the LEX structure for this statement.
   */
-  Alter_table_common(LEX *lex)
-    : Sql_statement(lex)
+  Sql_cmd_common_alter_table()
   {}
 
-  virtual ~Alter_table_common()
+  virtual ~Sql_cmd_common_alter_table()
   {}
 
+  virtual enum_sql_command sql_command_code() const
+  {
+    return SQLCOM_ALTER_TABLE;
+  }
 };
 
 /**
-  Alter_table_statement represents the generic ALTER TABLE statement.
+  Sql_cmd_alter_table represents the generic ALTER TABLE statement.
   @todo move Alter_info and other ALTER specific structures from Lex here.
 */
-class Alter_table_statement : public Alter_table_common
+class Sql_cmd_alter_table : public Sql_cmd_common_alter_table
 {
 public:
   /**
     Constructor, used to represent a ALTER TABLE statement.
-    @param lex the LEX structure for this statement.
   */
-  Alter_table_statement(LEX *lex)
-    : Alter_table_common(lex)
+  Sql_cmd_alter_table()
   {}
 
-  ~Alter_table_statement()
+  ~Sql_cmd_alter_table()
   {}
 
-  /**
-    Execute a ALTER TABLE statement at runtime.
-    @param thd the current thread.
-    @return false on success.
-  */
   bool execute(THD *thd);
 };
 
+
+/**
+  Sql_cmd_alter_table_tablespace represents ALTER TABLE
+  IMPORT/DISCARD TABLESPACE statements.
+*/
+class Sql_cmd_discard_import_tablespace : public Sql_cmd_common_alter_table
+{
+public:
+  enum enum_tablespace_op_type
+  {
+    DISCARD_TABLESPACE, IMPORT_TABLESPACE
+  };
+
+  Sql_cmd_discard_import_tablespace(enum_tablespace_op_type tablespace_op_arg)
+    : m_tablespace_op(tablespace_op_arg)
+  {}
+
+  bool execute(THD *thd);
+
+private:
+  const enum_tablespace_op_type m_tablespace_op;
+};
+
 #endif
diff --git a/sql/sql_analyse.cc b/sql/sql_analyse.cc
index 31e13882515..6a590c91e5e 100644
--- a/sql/sql_analyse.cc
+++ b/sql/sql_analyse.cc
@@ -282,16 +282,16 @@ bool get_ev_num_info(EV_NUM_INFO *ev_info, NUM_INFO *info, const char *num)
   {
     if (((longlong) info->ullval) < 0)
       return 0; // Impossible to store as a negative number
-    ev_info->llval =  -(longlong) max((ulonglong) -ev_info->llval, 
+    ev_info->llval =  -(longlong) MY_MAX((ulonglong) -ev_info->llval, 
 				      info->ullval);
-    ev_info->min_dval = (double) -max(-ev_info->min_dval, info->dval);
+    ev_info->min_dval = (double) -MY_MAX(-ev_info->min_dval, info->dval);
   }
   else		// ulonglong is as big as bigint in MySQL
   {
     if ((check_ulonglong(num, info->integers) == DECIMAL_NUM))
       return 0;
-    ev_info->ullval = (ulonglong) max(ev_info->ullval, info->ullval);
-    ev_info->max_dval =  (double) max(ev_info->max_dval, info->dval);
+    ev_info->ullval = (ulonglong) MY_MAX(ev_info->ullval, info->ullval);
+    ev_info->max_dval =  (double) MY_MAX(ev_info->max_dval, info->dval);
   }
   return 1;
 } // get_ev_num_info
@@ -1040,7 +1040,7 @@ String *field_decimal::avg(String *s, ha_rows rows)
   my_decimal_div(E_DEC_FATAL_ERROR, &avg_val, sum+cur_sum, &num, prec_increment);
   /* TODO remove this after decimal_div returns proper frac */
   my_decimal_round(E_DEC_FATAL_ERROR, &avg_val,
-                   min(sum[cur_sum].frac + prec_increment, DECIMAL_MAX_SCALE),
+                   MY_MIN(sum[cur_sum].frac + prec_increment, DECIMAL_MAX_SCALE),
                    FALSE,&rounded_avg);
   my_decimal2string(E_DEC_FATAL_ERROR, &rounded_avg, 0, 0, '0', s);
   return s;
@@ -1065,7 +1065,7 @@ String *field_decimal::std(String *s, ha_rows rows)
   my_decimal_div(E_DEC_FATAL_ERROR, &tmp, &sum2, &num, prec_increment);
   my_decimal2double(E_DEC_FATAL_ERROR, &tmp, &std_sqr);
   s->set_real(((double) std_sqr <= 0.0 ? 0.0 : sqrt(std_sqr)),
-         min(item->decimals + prec_increment, NOT_FIXED_DEC), my_thd_charset);
+         MY_MIN(item->decimals + prec_increment, NOT_FIXED_DEC), my_thd_charset);
 
   return s;
 }
@@ -1182,7 +1182,7 @@ bool analyse::change_columns(List<Item> &field_list)
   func_items[8] = new Item_proc_string("Std", 255);
   func_items[8]->maybe_null = 1;
   func_items[9] = new Item_proc_string("Optimal_fieldtype",
-				       max(64, output_str_length));
+				       MY_MAX(64, output_str_length));
 
   for (uint i = 0; i < array_elements(func_items); i++)
     field_list.push_back(func_items[i]);
diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc
index f5e1deab546..dca0da651f7 100644
--- a/sql/sql_cache.cc
+++ b/sql/sql_cache.cc
@@ -1221,7 +1221,7 @@ void Query_cache::end_of_result(THD *thd)
     }
     last_result_block= header->result()->prev;
     allign_size= ALIGN_SIZE(last_result_block->used);
-    len= max(query_cache.min_allocation_unit, allign_size);
+    len= MY_MAX(query_cache.min_allocation_unit, allign_size);
     if (last_result_block->length >= query_cache.min_allocation_unit + len)
       query_cache.split_block(last_result_block,len);
 
@@ -2875,7 +2875,7 @@ Query_cache::write_block_data(ulong data_len, uchar* data,
   DBUG_ENTER("Query_cache::write_block_data");
   DBUG_PRINT("qcache", ("data: %ld, header: %ld, all header: %ld",
 		      data_len, header_len, all_headers_len));
-  Query_cache_block *block= allocate_block(max(align_len,
+  Query_cache_block *block= allocate_block(MY_MAX(align_len,
                                            min_allocation_unit),1, 0);
   if (block != 0)
   {
@@ -2930,7 +2930,7 @@ Query_cache::append_result_data(Query_cache_block **current_block,
   ulong append_min = get_min_append_result_data_size();
   if (last_block_free_space < data_len &&
       append_next_free_block(last_block,
-			     max(tail, append_min)))
+			     MY_MAX(tail, append_min)))
     last_block_free_space = last_block->length - last_block->used;
   // If no space in last block (even after join) allocate new block
   if (last_block_free_space < data_len)
@@ -2958,7 +2958,7 @@ Query_cache::append_result_data(Query_cache_block **current_block,
   // Now finally write data to the last block
   if (success && last_block_free_space > 0)
   {
-    ulong to_copy = min(data_len,last_block_free_space);
+    ulong to_copy = MY_MIN(data_len,last_block_free_space);
     DBUG_PRINT("qcache", ("use free space %lub at block 0x%lx to copy %lub",
 			last_block_free_space, (ulong)last_block, to_copy));
     memcpy((uchar*) last_block + last_block->used, data, to_copy);
@@ -3046,8 +3046,8 @@ inline ulong Query_cache::get_min_first_result_data_size()
   if (queries_in_cache < QUERY_CACHE_MIN_ESTIMATED_QUERIES_NUMBER)
     return min_result_data_size;
   ulong avg_result = (query_cache_size - free_memory) / queries_in_cache;
-  avg_result = min(avg_result, query_cache_limit);
-  return max(min_result_data_size, avg_result);
+  avg_result = MY_MIN(avg_result, query_cache_limit);
+  return MY_MAX(min_result_data_size, avg_result);
 }
 
 inline ulong Query_cache::get_min_append_result_data_size()
@@ -3079,7 +3079,7 @@ my_bool Query_cache::allocate_data_chain(Query_cache_block **result_block,
     ulong len= data_len + all_headers_len;
     ulong align_len= ALIGN_SIZE(len);
 
-    if (!(new_block= allocate_block(max(min_size, align_len),
+    if (!(new_block= allocate_block(MY_MAX(min_size, align_len),
 				    min_result_data_size == 0,
 				    all_headers_len + min_result_data_size)))
     {
@@ -3088,7 +3088,7 @@ my_bool Query_cache::allocate_data_chain(Query_cache_block **result_block,
     }
 
     new_block->n_tables = 0;
-    new_block->used = min(len, new_block->length);
+    new_block->used = MY_MIN(len, new_block->length);
     new_block->type = Query_cache_block::RES_INCOMPLETE;
     new_block->next = new_block->prev = new_block;
     Query_cache_result *header = new_block->result();
@@ -3494,7 +3494,7 @@ Query_cache::allocate_block(ulong len, my_bool not_less, ulong min)
   DBUG_PRINT("qcache", ("len %lu, not less %d, min %lu",
              len, not_less,min));
 
-  if (len >= min(query_cache_size, query_cache_limit))
+  if (len >= MY_MIN(query_cache_size, query_cache_limit))
   {
     DBUG_PRINT("qcache", ("Query cache hase only %lu memory and limit %lu",
 			query_cache_size, query_cache_limit));
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index a068cdc8f88..3f95f13c464 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -361,18 +361,6 @@ void thd_lock_thread_count(THD *)
 }
 
 /**
-  Lock connection data for the set of connections this connection
-  belongs to
-
-  @param thd                       THD object
-*/
-void thd_unlock_thread_count(THD *)
-{
-  mysql_cond_broadcast(&COND_thread_count);
-  mysql_mutex_unlock(&LOCK_thread_count);
-}
-
-/**
   Close the socket used by this connection
 
   @param thd                THD object
@@ -604,7 +592,7 @@ void THD::enter_stage(const PSI_stage_info *new_stage,
     proc_info= msg;
 
 #ifdef HAVE_PSI_THREAD_INTERFACE
-    PSI_CALL(set_thread_state)(msg);
+    PSI_THREAD_CALL(set_thread_state)(msg);
     MYSQL_SET_STAGE(m_current_stage_key, calling_file, calling_line);
 #endif
   }
@@ -782,7 +770,7 @@ char *thd_security_context(THD *thd, char *buffer, unsigned int length,
       if (max_query_len < 1)
         len= thd->query_length();
       else
-        len= min(thd->query_length(), max_query_len);
+        len= MY_MIN(thd->query_length(), max_query_len);
       str.append('\n');
       str.append(thd->query(), len);
     }
@@ -797,7 +785,7 @@ char *thd_security_context(THD *thd, char *buffer, unsigned int length,
     was reallocated to a larger buffer to be able to fit.
   */
   DBUG_ASSERT(buffer != NULL);
-  length= min(str.length(), length-1);
+  length= MY_MIN(str.length(), length-1);
   memcpy(buffer, str.c_ptr_quick(), length);
   /* Make sure that the new string is null terminated */
   buffer[length]= '\0';
@@ -937,6 +925,7 @@ THD::THD()
   mysys_var=0;
   binlog_evt_union.do_union= FALSE;
   enable_slow_log= 0;
+  durability_property= HA_REGULAR_DURABILITY;
 
 #ifndef DBUG_OFF
   dbug_sentry=THD_SENTRY_MAGIC;
@@ -1953,6 +1942,42 @@ void THD::cleanup_after_query()
   table_map_for_update= 0;
   m_binlog_invoker= FALSE;
 
+extern "C" enum durability_properties thd_get_durability_property(const MYSQL_THD thd)
+{
+  enum durability_properties ret= HA_REGULAR_DURABILITY;
+  
+  if (thd != NULL)
+    ret= thd->durability_property;
+
+  return ret;
+}
+
+/** Get the auto_increment_offset auto_increment_increment.
+Needed by InnoDB.
+@param thd	Thread object
+@param off	auto_increment_offset
+@param inc	auto_increment_increment */
+extern "C" void thd_get_autoinc(const MYSQL_THD thd, ulong* off, ulong* inc)
+{
+  *off = thd->variables.auto_increment_offset;
+  *inc = thd->variables.auto_increment_increment;
+}
+
+
+/**
+  Is strict sql_mode set.
+  Needed by InnoDB.
+  @param thd	Thread object
+  @return True if sql_mode has strict mode (all or trans).
+    @retval true  sql_mode has strict mode (all or trans).
+    @retval false sql_mode has not strict mode (all or trans).
+*/
+extern "C" bool thd_is_strict_mode(const MYSQL_THD thd)
+{
+  return thd->is_strict_mode();
+}
+
+
 #ifndef EMBEDDED_LIBRARY
   if (rli_slave)
     rli_slave->cleanup_after_query();
@@ -2826,7 +2851,7 @@ int select_export::send_data(List<Item> &items)
     else
     {
       if (fixed_row_size)
-	used_length=min(res->length(),item->max_length);
+	used_length=MY_MIN(res->length(),item->max_length);
       else
 	used_length=res->length();
       if ((result_type == STRING_RESULT || is_unsafe_field_sep) &&
@@ -4000,7 +4025,7 @@ static void thd_send_progress(THD *thd)
   ulonglong report_time= my_interval_timer();
   if (report_time > thd->progress.next_report_time)
   {
-    uint seconds_to_next= max(thd->variables.progress_report_time,
+    uint seconds_to_next= MY_MAX(thd->variables.progress_report_time,
                               global_system_variables.progress_report_time);
     if (seconds_to_next == 0)             // Turned off
       seconds_to_next= 1;                 // Check again after 1 second
@@ -4412,7 +4437,7 @@ void THD::inc_status_created_tmp_disk_tables()
 {
   status_var_increment(status_var.created_tmp_disk_tables_);
 #ifdef HAVE_PSI_STATEMENT_INTERFACE
-  PSI_CALL(inc_statement_created_tmp_disk_tables)(m_statement_psi, 1);
+  PSI_STATEMENT_CALL(inc_statement_created_tmp_disk_tables)(m_statement_psi, 1);
 #endif
 }
 
@@ -4420,7 +4445,7 @@ void THD::inc_status_created_tmp_tables()
 {
   status_var_increment(status_var.created_tmp_tables_);
 #ifdef HAVE_PSI_STATEMENT_INTERFACE
-  PSI_CALL(inc_statement_created_tmp_tables)(m_statement_psi, 1);
+  PSI_STATEMENT_CALL(inc_statement_created_tmp_tables)(m_statement_psi, 1);
 #endif
 }
 
@@ -4428,7 +4453,7 @@ void THD::inc_status_select_full_join()
 {
   status_var_increment(status_var.select_full_join_count_);
 #ifdef HAVE_PSI_STATEMENT_INTERFACE
-  PSI_CALL(inc_statement_select_full_join)(m_statement_psi, 1);
+  PSI_STATEMENT_CALL(inc_statement_select_full_join)(m_statement_psi, 1);
 #endif
 }
 
@@ -4436,7 +4461,7 @@ void THD::inc_status_select_full_range_join()
 {
   status_var_increment(status_var.select_full_range_join_count_);
 #ifdef HAVE_PSI_STATEMENT_INTERFACE
-  PSI_CALL(inc_statement_select_full_range_join)(m_statement_psi, 1);
+  PSI_STATEMENT_CALL(inc_statement_select_full_range_join)(m_statement_psi, 1);
 #endif
 }
 
@@ -4444,7 +4469,7 @@ void THD::inc_status_select_range()
 {
   status_var_increment(status_var.select_range_count_);
 #ifdef HAVE_PSI_STATEMENT_INTERFACE
-  PSI_CALL(inc_statement_select_range)(m_statement_psi, 1);
+  PSI_STATEMENT_CALL(inc_statement_select_range)(m_statement_psi, 1);
 #endif
 }
 
@@ -4452,7 +4477,7 @@ void THD::inc_status_select_range_check()
 {
   status_var_increment(status_var.select_range_check_count_);
 #ifdef HAVE_PSI_STATEMENT_INTERFACE
-  PSI_CALL(inc_statement_select_range_check)(m_statement_psi, 1);
+  PSI_STATEMENT_CALL(inc_statement_select_range_check)(m_statement_psi, 1);
 #endif
 }
 
@@ -4460,7 +4485,7 @@ void THD::inc_status_select_scan()
 {
   status_var_increment(status_var.select_scan_count_);
 #ifdef HAVE_PSI_STATEMENT_INTERFACE
-  PSI_CALL(inc_statement_select_scan)(m_statement_psi, 1);
+  PSI_STATEMENT_CALL(inc_statement_select_scan)(m_statement_psi, 1);
 #endif
 }
 
@@ -4468,7 +4493,7 @@ void THD::inc_status_sort_merge_passes()
 {
   status_var_increment(status_var.filesort_merge_passes_);
 #ifdef HAVE_PSI_STATEMENT_INTERFACE
-  PSI_CALL(inc_statement_sort_merge_passes)(m_statement_psi, 1);
+  PSI_STATEMENT_CALL(inc_statement_sort_merge_passes)(m_statement_psi, 1);
 #endif
 }
 
@@ -4476,7 +4501,7 @@ void THD::inc_status_sort_range()
 {
   status_var_increment(status_var.filesort_range_count_);
 #ifdef HAVE_PSI_STATEMENT_INTERFACE
-  PSI_CALL(inc_statement_sort_range)(m_statement_psi, 1);
+  PSI_STATEMENT_CALL(inc_statement_sort_range)(m_statement_psi, 1);
 #endif
 }
 
@@ -4484,7 +4509,7 @@ void THD::inc_status_sort_rows(ha_rows count)
 {
   statistic_add(status_var.filesort_rows_, count, &LOCK_status);
 #ifdef HAVE_PSI_STATEMENT_INTERFACE
-  PSI_CALL(inc_statement_sort_rows)(m_statement_psi, count);
+  PSI_STATEMENT_CALL(inc_statement_sort_rows)(m_statement_psi, count);
 #endif
 }
 
@@ -4492,7 +4517,7 @@ void THD::inc_status_sort_scan()
 {
   status_var_increment(status_var.filesort_scan_count_);
 #ifdef HAVE_PSI_STATEMENT_INTERFACE
-  PSI_CALL(inc_statement_sort_scan)(m_statement_psi, 1);
+  PSI_STATEMENT_CALL(inc_statement_sort_scan)(m_statement_psi, 1);
 #endif
 }
 
@@ -4500,7 +4525,7 @@ void THD::set_status_no_index_used()
 {
   server_status|= SERVER_QUERY_NO_INDEX_USED;
 #ifdef HAVE_PSI_STATEMENT_INTERFACE
-  PSI_CALL(set_statement_no_index_used)(m_statement_psi);
+  PSI_STATEMENT_CALL(set_statement_no_index_used)(m_statement_psi);
 #endif
 }
 
@@ -4508,7 +4533,7 @@ void THD::set_status_no_good_index_used()
 {
   server_status|= SERVER_QUERY_NO_GOOD_INDEX_USED;
 #ifdef HAVE_PSI_STATEMENT_INTERFACE
-  PSI_CALL(set_statement_no_good_index_used)(m_statement_psi);
+  PSI_STATEMENT_CALL(set_statement_no_good_index_used)(m_statement_psi);
 #endif
 }
 
@@ -4516,7 +4541,7 @@ void THD::set_command(enum enum_server_command command)
 {
   m_command= command;
 #ifdef HAVE_PSI_THREAD_INTERFACE
-  PSI_CALL(set_thread_command)(m_command);
+  PSI_STATEMENT_CALL(set_thread_command)(m_command);
 #endif
 }
 
@@ -4529,7 +4554,7 @@ void THD::set_query(const CSET_STRING &string_arg)
   mysql_mutex_unlock(&LOCK_thd_data);
 
 #ifdef HAVE_PSI_THREAD_INTERFACE
-  PSI_CALL(set_thread_info)(query(), query_length());
+  PSI_THREAD_CALL(set_thread_info)(query(), query_length());
 #endif
 }
 
diff --git a/sql/sql_class.h b/sql/sql_class.h
index fccca9e8cbf..ba9f75b5d84 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -232,11 +232,14 @@ public:
 
 class Alter_drop :public Sql_alloc {
 public:
-  enum drop_type {KEY, COLUMN };
+  enum drop_type {KEY, COLUMN, FOREIGN_KEY };
   const char *name;
   enum drop_type type;
   Alter_drop(enum drop_type par_type,const char *par_name)
-    :name(par_name), type(par_type) {}
+    :name(par_name), type(par_type)
+  {
+    DBUG_ASSERT(par_name != NULL);
+  }
   /**
     Used to make a clone of this object for ALTER/CREATE TABLE
     @sa comment for Key_part_spec::clone
@@ -309,17 +312,22 @@ public:
   enum fk_option { FK_OPTION_UNDEF, FK_OPTION_RESTRICT, FK_OPTION_CASCADE,
 		   FK_OPTION_SET_NULL, FK_OPTION_NO_ACTION, FK_OPTION_DEFAULT};
 
-  Table_ident *ref_table;
+  LEX_STRING ref_db;
+  LEX_STRING ref_table;
   List<Key_part_spec> ref_columns;
   uint delete_opt, update_opt, match_opt;
   Foreign_key(const LEX_STRING &name_arg, List<Key_part_spec> &cols,
-	      Table_ident *table,   List<Key_part_spec> &ref_cols,
+	      const LEX_STRING &ref_db_arg, const LEX_STRING &ref_table_arg,
+	      List<Key_part_spec> &ref_cols,
 	      uint delete_opt_arg, uint update_opt_arg, uint match_opt_arg)
     :Key(FOREIGN_KEY, name_arg, &default_key_create_info, 0, cols, NULL),
-    ref_table(table), ref_columns(ref_cols),
+    ref_db(ref_db_arg), ref_table(ref_table_arg), ref_columns(ref_cols),
     delete_opt(delete_opt_arg), update_opt(update_opt_arg),
     match_opt(match_opt_arg)
-  {}
+  {
+    // We don't check for duplicate FKs.
+    key_create_info.check_for_duplicate_indexes= false;
+  }
   Foreign_key(const Foreign_key &rhs, MEM_ROOT *mem_root);
   /**
     Used to make a clone of this object for ALTER/CREATE TABLE
@@ -327,8 +335,6 @@ public:
   */
   virtual Key *clone(MEM_ROOT *mem_root) const
   { return new (mem_root) Foreign_key(*this, mem_root); }
-  /* Used to validate foreign key options */
-  bool validate(List<Create_field> &table_fields);
 };
 
 typedef struct st_mysql_lock
@@ -2289,6 +2295,12 @@ public:
   MEM_ROOT      *user_var_events_alloc; /* Allocate above array elements here */
 
   /*
+    Define durability properties that engines may check to
+    improve performance. Not yet used in MariaDB
+  */
+  enum durability_properties durability_property;
+ 
+  /*
     If checking this in conjunction with a wait condition, please
     include a check after enter_cond() if you want to avoid a race
     condition. For details see the implementation of awake(),
@@ -2586,7 +2598,7 @@ public:
     start_time= hrtime_to_my_time(hrtime);
     start_time_sec_part= hrtime_sec_part(hrtime);
 #ifdef HAVE_PSI_THREAD_INTERFACE
-    PSI_CALL(set_thread_start_time)(start_time);
+    PSI_THREAD_CALL(set_thread_start_time)(start_time);
 #endif
   }
   inline void set_start_time()
@@ -2596,7 +2608,7 @@ public:
       start_time= hrtime_to_my_time(user_time);
       start_time_sec_part= hrtime_sec_part(user_time);
 #ifdef HAVE_PSI_THREAD_INTERFACE
-      PSI_CALL(set_thread_start_time)(start_time);
+      PSI_THREAD_CALL(set_thread_start_time)(start_time);
 #endif
     }
     else
@@ -3032,7 +3044,7 @@ public:
     result= new_db && !db;
 #ifdef HAVE_PSI_THREAD_INTERFACE
     if (result)
-      PSI_CALL(set_thread_db)(new_db, new_db_len);
+      PSI_THREAD_CALL(set_thread_db)(new_db, new_db_len);
 #endif
     return result;
   }
@@ -3053,7 +3065,7 @@ public:
     db= new_db;
     db_length= new_db_len;
 #ifdef HAVE_PSI_THREAD_INTERFACE
-    PSI_CALL(set_thread_db)(new_db, new_db_len);
+    PSI_THREAD_CALL(set_thread_db)(new_db, new_db_len);
 #endif
   }
   /*
diff --git a/sql/sql_client.cc b/sql/sql_client.cc
index eb6c039c065..e7c555b5947 100644
--- a/sql/sql_client.cc
+++ b/sql/sql_client.cc
@@ -36,7 +36,7 @@ void my_net_local_init(NET *net)
                            (uint)global_system_variables.net_write_timeout);
 
   net->retry_count=  (uint) global_system_variables.net_retry_count;
-  net->max_packet_size= max(global_system_variables.net_buffer_length,
+  net->max_packet_size= MY_MAX(global_system_variables.net_buffer_length,
 			    global_system_variables.max_allowed_packet);
 #endif
 }
diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc
index 59aa51916fb..33b5394de9f 100644
--- a/sql/sql_connect.cc
+++ b/sql/sql_connect.cc
@@ -431,7 +431,7 @@ void init_user_stats(USER_STATS *user_stats,
   DBUG_ENTER("init_user_stats");
   DBUG_PRINT("enter", ("user: %s  priv_user: %s", user, priv_user));
 
-  user_length= min(user_length, sizeof(user_stats->user)-1);
+  user_length= MY_MIN(user_length, sizeof(user_stats->user)-1);
   memcpy(user_stats->user, user, user_length);
   user_stats->user[user_length]= 0;
   user_stats->user_name_length= user_length;
@@ -934,7 +934,7 @@ static int check_connection(THD *thd)
       if (thd->main_security_ctx.host)
       {
         if (thd->main_security_ctx.host != my_localhost)
-          thd->main_security_ctx.host[min(strlen(thd->main_security_ctx.host),
+          thd->main_security_ctx.host[MY_MIN(strlen(thd->main_security_ctx.host),
                                           HOSTNAME_LENGTH)]= 0;
         thd->main_security_ctx.host_or_ip= thd->main_security_ctx.host;
       }
diff --git a/sql/sql_const.h b/sql/sql_const.h
index c6aa52197d5..9849f10b6ac 100644
--- a/sql/sql_const.h
+++ b/sql/sql_const.h
@@ -67,7 +67,7 @@
 #define PSEUDO_TABLE_BITS (PARAM_TABLE_BIT | OUTER_REF_TABLE_BIT | \
                            RAND_TABLE_BIT)
 #define MAX_FIELDS	4096			/* Limit in the .frm file */
-#define MAX_PARTITIONS  1024
+#define MAX_PARTITIONS  8192
 
 #define MAX_SELECT_NESTING (sizeof(nesting_map)*8-1)
 
@@ -128,6 +128,13 @@
 */
 #define TABLE_DEF_CACHE_MIN     400
 
+/**
+ Maximum number of connections default value.
+ 151 is larger than Apache's default max children,
+ to avoid "too many connections" error in a common setup.
+*/
+#define MAX_CONNECTIONS_DEFAULT 151
+
 /*
   Stack reservation.
   Feel free to raise this by the smallest amount you can to get the
@@ -232,7 +239,7 @@
 #define DELAYED_LIMIT		100		/**< pause after xxx inserts */
 #define DELAYED_QUEUE_SIZE	1000
 #define DELAYED_WAIT_TIMEOUT	5*60		/**< Wait for delayed insert */
-#define MAX_CONNECT_ERRORS	10		///< errors before disabling host
+#define MAX_CONNECT_ERRORS	100		///< errors before disabling host
 
 #define LONG_TIMEOUT ((ulong) 3600L*24L*365L)
 
diff --git a/sql/sql_error.cc b/sql/sql_error.cc
index 23a60267737..038f4876b14 100644
--- a/sql/sql_error.cc
+++ b/sql/sql_error.cc
@@ -834,7 +834,7 @@ uint32 convert_error_message(char *to, uint32 to_length, CHARSET_INFO *to_cs,
 
   if (!to_cs || from_cs == to_cs || to_cs == &my_charset_bin)
   {
-    length= min(to_length, from_length);
+    length= MY_MIN(to_length, from_length);
     memmove(to, from, length);
     to[length]= 0;
     return length;
diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc
index fde9f70fa79..ad327c378b8 100644
--- a/sql/sql_join_cache.cc
+++ b/sql/sql_join_cache.cc
@@ -696,7 +696,7 @@ void JOIN_CACHE::set_constants()
   pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *);
   min_buff_size= 0;
   min_records= 1;
-  buff_size= max(join->thd->variables.join_buff_size,
+  buff_size= MY_MAX(join->thd->variables.join_buff_size,
                  get_min_join_buffer_size());
   size_of_rec_ofs= offset_size(buff_size);
   size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len); 
@@ -2739,7 +2739,7 @@ int JOIN_CACHE_HASHED::init_hash_table()
   key_entries= 0;
 
   /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */
-  uint max_size_of_key_ofs= max(2, get_size_of_rec_offset());  
+  uint max_size_of_key_ofs= MY_MAX(2, get_size_of_rec_offset());  
   for (size_of_key_ofs= 2;
        size_of_key_ofs <= max_size_of_key_ofs;
        size_of_key_ofs+= 2)
diff --git a/sql/sql_join_cache.h b/sql/sql_join_cache.h
index 6953f6881ee..1c56fc9b178 100644
--- a/sql/sql_join_cache.h
+++ b/sql/sql_join_cache.h
@@ -420,7 +420,7 @@ protected:
   /* Shall calculate how much space is remaining in the join buffer */ 
   virtual size_t rem_space() 
   { 
-    return max(buff_size-(end_pos-buff)-aux_buff_size,0);
+    return MY_MAX(buff_size-(end_pos-buff)-aux_buff_size,0);
   }
 
   /* 
@@ -943,7 +943,7 @@ protected:
   */ 
   size_t rem_space() 
   { 
-    return max(last_key_entry-end_pos-aux_buff_size,0);
+    return MY_MAX(last_key_entry-end_pos-aux_buff_size,0);
   }
 
   /* 
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index cc117e18d1e..61230113506 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -26,6 +26,7 @@
 #include "thr_lock.h"                  /* thr_lock_type, TL_UNLOCK */
 #include "mem_root_array.h"
 #include "sql_cmd.h"
+#include "sql_alter.h"                // Alter_info
 
 /* YACC and LEX Definitions */
 
@@ -991,110 +992,6 @@ inline bool st_select_lex_unit::is_union ()
     first_select()->next_select()->linkage == UNION_TYPE;
 }
 
-#define ALTER_ADD_COLUMN	(1L << 0)
-#define ALTER_DROP_COLUMN	(1L << 1)
-#define ALTER_CHANGE_COLUMN	(1L << 2)
-#define ALTER_ADD_INDEX		(1L << 3)
-#define ALTER_DROP_INDEX	(1L << 4)
-#define ALTER_RENAME		(1L << 5)
-#define ALTER_ORDER		(1L << 6)
-#define ALTER_OPTIONS		(1L << 7)
-#define ALTER_CHANGE_COLUMN_DEFAULT (1L << 8)
-#define ALTER_KEYS_ONOFF        (1L << 9)
-#define ALTER_CONVERT           (1L << 10)
-#define ALTER_RECREATE          (1L << 11)
-#define ALTER_ADD_PARTITION     (1L << 12)
-#define ALTER_DROP_PARTITION    (1L << 13)
-#define ALTER_COALESCE_PARTITION (1L << 14)
-#define ALTER_REORGANIZE_PARTITION (1L << 15)
-#define ALTER_PARTITION          (1L << 16)
-#define ALTER_ADMIN_PARTITION    (1L << 17)
-#define ALTER_TABLE_REORG        (1L << 18)
-#define ALTER_REBUILD_PARTITION  (1L << 19)
-#define ALTER_ALL_PARTITION      (1L << 20)
-#define ALTER_REMOVE_PARTITIONING (1L << 21)
-#define ALTER_FOREIGN_KEY        (1L << 22)
-#define ALTER_TRUNCATE_PARTITION (1L << 23)
-
-enum enum_alter_table_change_level
-{
-  ALTER_TABLE_METADATA_ONLY= 0,
-  ALTER_TABLE_DATA_CHANGED= 1,
-  ALTER_TABLE_INDEX_CHANGED= 2
-};
-
-
-/**
-  Temporary hack to enable a class bound forward declaration
-  of the enum_alter_table_change_level enumeration. To be
-  removed once Alter_info is moved to the sql_alter.h
-  header.
-*/
-class Alter_table_change_level
-{
-private:
-  typedef enum enum_alter_table_change_level enum_type;
-  enum_type value;
-public:
-  void operator = (enum_type v) { value = v; }
-  operator enum_type () { return value; }
-};
-
-
-/**
-  @brief Parsing data for CREATE or ALTER TABLE.
-
-  This structure contains a list of columns or indexes to be created,
-  altered or dropped.
-*/
-
-class Alter_info
-{
-public:
-  List<Alter_drop>              drop_list;
-  List<Alter_column>            alter_list;
-  List<Key>                     key_list;
-  List<Create_field>            create_list;
-  uint                          flags;
-  enum enum_enable_or_disable   keys_onoff;
-  enum tablespace_op_type       tablespace_op;
-  List<char>                    partition_names;
-  uint                          num_parts;
-  enum_alter_table_change_level change_level;
-  Create_field                 *datetime_field;
-  bool                          error_if_not_empty;
-
-
-  Alter_info() :
-    flags(0),
-    keys_onoff(LEAVE_AS_IS),
-    tablespace_op(NO_TABLESPACE_OP),
-    num_parts(0),
-    change_level(ALTER_TABLE_METADATA_ONLY),
-    datetime_field(NULL),
-    error_if_not_empty(FALSE)
-  {}
-
-  void reset()
-  {
-    drop_list.empty();
-    alter_list.empty();
-    key_list.empty();
-    create_list.empty();
-    flags= 0;
-    keys_onoff= LEAVE_AS_IS;
-    tablespace_op= NO_TABLESPACE_OP;
-    num_parts= 0;
-    partition_names.empty();
-    change_level= ALTER_TABLE_METADATA_ONLY;
-    datetime_field= 0;
-    error_if_not_empty= FALSE;
-  }
-  Alter_info(const Alter_info &rhs, MEM_ROOT *mem_root);
-private:
-  Alter_info &operator=(const Alter_info &rhs); // not implemented
-  Alter_info(const Alter_info &rhs);            // not implemented
-};
 
 struct st_sp_chistics
 {
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index 11e23b56f71..64505b82413 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -1364,7 +1364,7 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs,
   line_term_char= line_term_length ? (uchar) line_term_ptr[0] : INT_MAX;
 
   /* Set of a stack for unget if long terminators */
-  uint length= max(cs->mbmaxlen, max(field_term_length, line_term_length)) + 1;
+  uint length= MY_MAX(cs->mbmaxlen, MY_MAX(field_term_length, line_term_length)) + 1;
   set_if_bigger(length,line_start.length());
   stack=stack_pos=(int*) sql_alloc(sizeof(int)*length);
 
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index f1362674d0c..5beba029b20 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -5739,7 +5739,7 @@ bool check_stack_overrun(THD *thd, long margin,
     return 1;
   }
 #ifndef DBUG_OFF
-  max_stack_used= max(max_stack_used, stack_used);
+  max_stack_used= MY_MAX(max_stack_used, stack_used);
 #endif
   return 0;
 }
@@ -7217,7 +7217,7 @@ bool check_simple_select()
     char command[80];
     Lex_input_stream *lip= & thd->m_parser_state->m_lip;
     strmake(command, lip->yylval->symbol.str,
-	    min(lip->yylval->symbol.length, sizeof(command)-1));
+	    MY_MIN(lip->yylval->symbol.length, sizeof(command)-1));
     my_error(ER_CANT_USE_OPTION_HERE, MYF(0), command);
     return 1;
   }
diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc
index f042f028450..b8aa4c7e944 100644
--- a/sql/sql_partition.cc
+++ b/sql/sql_partition.cc
@@ -4839,7 +4839,7 @@ that are reorganised.
             */
             start_part= 0;
             end_part= new_total_partitions - (upper_2n + 1);
-            end_part= max(lower_2n - 1, end_part);
+            end_part= MY_MAX(lower_2n - 1, end_part);
           }
           else if (new_total_partitions <= upper_2n)
           {
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index 8778713d7e7..a81c05c5513 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -695,7 +695,7 @@ static my_bool read_maria_plugin_info(struct st_plugin_dl *plugin_dl,
       for (i=0;
            (old= (struct st_maria_plugin *)(ptr + i * sizeof_st_plugin))->info;
            i++)
-        memcpy(cur + i, old, min(sizeof(cur[i]), sizeof_st_plugin));
+        memcpy(cur + i, old, MY_MIN(sizeof(cur[i]), sizeof_st_plugin));
 
       sym= cur;
       plugin_dl->allocated= true;
diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc
index 025ff8820e6..002fdb40036 100644
--- a/sql/sql_prepare.cc
+++ b/sql/sql_prepare.cc
@@ -343,7 +343,7 @@ static bool send_prep_stmt(Prepared_statement *stmt, uint columns)
   int2store(buff+5, columns);
   int2store(buff+7, stmt->param_count);
   buff[9]= 0;                                   // Guard against a 4.1 client
-  tmp= min(stmt->thd->warning_info->statement_warn_count(), 65535);
+  tmp= MY_MIN(stmt->thd->warning_info->statement_warn_count(), 65535);
   int2store(buff+10, tmp);
 
   /*
diff --git a/sql/sql_profile.cc b/sql/sql_profile.cc
index feb7810fa28..dc7aacb3d94 100644
--- a/sql/sql_profile.cc
+++ b/sql/sql_profile.cc
@@ -288,7 +288,7 @@ void QUERY_PROFILE::set_query_source(char *query_source_arg,
                                      uint query_length_arg)
 {
   /* Truncate to avoid DoS attacks. */
-  uint length= min(MAX_QUERY_LENGTH, query_length_arg);
+  uint length= MY_MIN(MAX_QUERY_LENGTH, query_length_arg);
 
   DBUG_ASSERT(query_source == NULL); /* we don't leak memory */
   if (query_source_arg != NULL)
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 917f4ea1a80..9e6cd55fe07 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -1818,7 +1818,7 @@ bool change_master(THD* thd, Master_info* mi, bool *master_info_added)
   if (lex_mi->heartbeat_opt != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
     mi->heartbeat_period = lex_mi->heartbeat_period;
   else
-    mi->heartbeat_period= (float) min(SLAVE_MAX_HEARTBEAT_PERIOD,
+    mi->heartbeat_period= (float) MY_MIN(SLAVE_MAX_HEARTBEAT_PERIOD,
                                       (slave_net_timeout/2.0));
   mi->received_heartbeats= LL(0); // counter lives until master is CHANGEd
   /*
@@ -1916,12 +1916,12 @@ bool change_master(THD* thd, Master_info* mi, bool *master_info_added)
    {
      /*
        Sometimes mi->rli.master_log_pos == 0 (it happens when the SQL thread is
-       not initialized), so we use a max().
+       not initialized), so we use a MY_MAX().
        What happens to mi->rli.master_log_pos during the initialization stages
        of replication is not 100% clear, so we guard against problems using
-       max().
+       MY_MAX().
       */
-     mi->master_log_pos = max(BIN_LOG_HEADER_SIZE,
+     mi->master_log_pos = MY_MAX(BIN_LOG_HEADER_SIZE,
 			      mi->rli.group_master_log_pos);
      strmake(mi->master_log_name, mi->rli.group_master_log_name,
              sizeof(mi->master_log_name)-1);
@@ -2106,7 +2106,7 @@ bool mysql_show_binlog_events(THD* thd)
     LEX_MASTER_INFO *lex_mi= &thd->lex->mi;
     SELECT_LEX_UNIT *unit= &thd->lex->unit;
     ha_rows event_count, limit_start, limit_end;
-    my_off_t pos = max(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly
+    my_off_t pos = MY_MAX(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly
     char search_file_name[FN_REFLEN], *name;
     const char *log_file_name = lex_mi->log_file_name;
     mysql_mutex_t *log_lock = binary_log->get_log_lock();
@@ -2394,14 +2394,14 @@ int log_loaded_block(IO_CACHE* file)
     DBUG_RETURN(0);
   
   for (block_len= (uint) (my_b_get_bytes_in_buffer(file)); block_len > 0;
-       buffer += min(block_len, max_event_size),
-       block_len -= min(block_len, max_event_size))
+       buffer += MY_MIN(block_len, max_event_size),
+       block_len -= MY_MIN(block_len, max_event_size))
   {
     lf_info->last_pos_in_file= my_b_get_pos_in_file(file);
     if (lf_info->wrote_create_file)
     {
       Append_block_log_event a(lf_info->thd, lf_info->thd->db, buffer,
-                               min(block_len, max_event_size),
+                               MY_MIN(block_len, max_event_size),
                                lf_info->log_delayed);
       if (mysql_bin_log.write(&a))
         DBUG_RETURN(1);
@@ -2410,7 +2410,7 @@ int log_loaded_block(IO_CACHE* file)
     {
       Begin_load_query_log_event b(lf_info->thd, lf_info->thd->db,
                                    buffer,
-                                   min(block_len, max_event_size),
+                                   MY_MIN(block_len, max_event_size),
                                    lf_info->log_delayed);
       if (mysql_bin_log.write(&b))
         DBUG_RETURN(1);
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 27b93cff189..e1b2beabd32 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1188,7 +1188,7 @@ JOIN::optimize_inner()
 #endif
 
   /* 
-     Try to optimize count(*), min() and max() to const fields if
+     Try to optimize count(*), MY_MIN() and MY_MAX() to const fields if
      there is implicit grouping (aggregate functions but no
      group_list). In this case, the result set shall only contain one
      row. 
@@ -3741,7 +3741,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
       This is can't be to high as otherwise we are likely to use
       table scan.
     */
-    s->worst_seeks= min((double) s->found_records / 10,
+    s->worst_seeks= MY_MIN((double) s->found_records / 10,
 			(double) s->read_time*3);
     if (s->worst_seeks < 2.0)			// Fix for small tables
       s->worst_seeks=2.0;
@@ -4910,7 +4910,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
   uint	and_level,i;
   KEY_FIELD *key_fields, *end, *field;
   uint sz;
-  uint m= max(select_lex->max_equal_elems,1);
+  uint m= MY_MAX(select_lex->max_equal_elems,1);
   
   /* 
     We use the same piece of memory to store both  KEY_FIELD 
@@ -4933,7 +4933,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
     can be not more than select_lex->max_equal_elems such 
     substitutions.
   */ 
-  sz= max(sizeof(KEY_FIELD),sizeof(SARGABLE_PARAM))*
+  sz= MY_MAX(sizeof(KEY_FIELD),sizeof(SARGABLE_PARAM))*
       (((thd->lex->current_select->cond_count+1)*2 +
 	thd->lex->current_select->between_count)*m+1);
   if (!(key_fields=(KEY_FIELD*)	thd->alloc(sz)))
@@ -5117,7 +5117,7 @@ static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array)
         DBUG_ASSERT(tablenr != Table_map_iterator::BITMAP_END);
 	TABLE *tmp_table=join->table[tablenr];
         if (tmp_table) // already created
-          keyuse->ref_table_rows= max(tmp_table->file->stats.records, 100);
+          keyuse->ref_table_rows= MY_MAX(tmp_table->file->stats.records, 100);
       }
     }
     /*
@@ -5591,7 +5591,7 @@ best_access_path(JOIN      *join,
               tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
             else
               tmp= table->file->read_time(key, 1,
-                                          (ha_rows) min(tmp,s->worst_seeks));
+                                          (ha_rows) MY_MIN(tmp,s->worst_seeks));
             tmp*= record_count;
           }
         }
@@ -5755,7 +5755,7 @@ best_access_path(JOIN      *join,
               tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
             else
               tmp= table->file->read_time(key, 1,
-                                          (ha_rows) min(tmp,s->worst_seeks));
+                                          (ha_rows) MY_MIN(tmp,s->worst_seeks));
             tmp*= record_count;
           }
           else
@@ -10667,7 +10667,7 @@ bool TABLE_REF::is_access_triggered()
     a correlated subquery itself, but has subqueries, we can free it
     fully and also free JOINs of all its subqueries. The exception
     is a subquery in SELECT list, e.g: @n
-    SELECT a, (select max(b) from t1) group by c @n
+    SELECT a, (select MY_MAX(b) from t1) group by c @n
     This subquery will not be evaluated at first sweep and its value will
     not be inserted into the temporary table. Instead, it's evaluated
     when selecting from the temporary table. Therefore, it can't be freed
@@ -14837,7 +14837,7 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields,
     share->max_rows= ~(ha_rows) 0;
   else
     share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ?
-                                 min(thd->variables.tmp_table_size,
+                                 MY_MIN(thd->variables.tmp_table_size,
                                      thd->variables.max_heap_table_size) :
                                  thd->variables.tmp_table_size) /
 			         share->reclength);
@@ -15416,7 +15416,7 @@ bool create_internal_tmp_table(TABLE *table, KEY *keyinfo,
                            start_recinfo,
                            share->uniques, &uniquedef,
                            &create_info,
-                           HA_CREATE_TMP_TABLE)))
+                           HA_CREATE_TMP_TABLE | HA_CREATE_INTERNAL_TABLE)))
   {
     table->file->print_error(error,MYF(0));	/* purecov: inspected */
     table->db_stat=0;
@@ -15562,7 +15562,7 @@ bool create_internal_tmp_table(TABLE *table, KEY *keyinfo,
 		       start_recinfo,
 		       share->uniques, &uniquedef,
 		       &create_info,
-		       HA_CREATE_TMP_TABLE)))
+		       HA_CREATE_TMP_TABLE | HA_CREATE_INTERNAL_TABLE)))
   {
     table->file->print_error(error,MYF(0));	/* purecov: inspected */
     table->db_stat=0;
@@ -19576,7 +19576,7 @@ SORT_FIELD *make_unireg_sortorder(ORDER *order, uint *length,
     count++;
   if (!sortorder)
     sortorder= (SORT_FIELD*) sql_alloc(sizeof(SORT_FIELD) *
-                                       (max(count, *length) + 1));
+                                       (MY_MAX(count, *length) + 1));
   pos= sort= sortorder;
 
   if (!pos)
@@ -23290,7 +23290,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
           index entry.
         */
         index_scan_time= select_limit/rec_per_key *
-                         min(rec_per_key, table->file->scan_time());
+                         MY_MIN(rec_per_key, table->file->scan_time());
         if ((ref_key < 0 && (group || table->force_index || is_covering)) ||
             index_scan_time < read_time)
         {
@@ -23301,7 +23301,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
           if (table->quick_keys.is_set(nr))
             quick_records= table->quick_rows[nr];
           if (best_key < 0 ||
-              (select_limit <= min(quick_records,best_records) ?
+              (select_limit <= MY_MIN(quick_records,best_records) ?
                keyinfo->key_parts < best_key_parts :
                quick_records < best_records) ||
               (!is_best_covering && is_covering))
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 1f860fe23db..74a554d8c6c 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -1046,7 +1046,7 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list)
   {
     field_list.push_back(new Item_empty_string("View",NAME_CHAR_LEN));
     field_list.push_back(new Item_empty_string("Create View",
-                                               max(buffer.length(),1024)));
+                                               MY_MAX(buffer.length(),1024)));
     field_list.push_back(new Item_empty_string("character_set_client",
                                                MY_CS_NAME_SIZE));
     field_list.push_back(new Item_empty_string("collation_connection",
@@ -1057,7 +1057,7 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list)
     field_list.push_back(new Item_empty_string("Table",NAME_CHAR_LEN));
     // 1024 is for not to confuse old clients
     field_list.push_back(new Item_empty_string("Create Table",
-                                               max(buffer.length(),1024)));
+                                               MY_MAX(buffer.length(),1024)));
   }
 
   if (protocol->send_result_set_metadata(&field_list,
@@ -2249,7 +2249,7 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
         /* Lock THD mutex that protects its data when looking at it. */
         if (tmp->query())
         {
-          uint length= min(max_query_length, tmp->query_length());
+          uint length= MY_MIN(max_query_length, tmp->query_length());
           char *q= thd->strmake(tmp->query(),length);
           /* Safety: in case strmake failed, we set length to 0. */
           thd_info->query_string=
@@ -2262,7 +2262,7 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
         */
         if (tmp->progress.max_counter)
         {
-          uint max_stage= max(tmp->progress.max_stage, 1);
+          uint max_stage= MY_MAX(tmp->progress.max_stage, 1);
           thd_info->progress= (((tmp->progress.stage / (double) max_stage) +
                                 ((tmp->progress.counter /
                                   (double) tmp->progress.max_counter) /
@@ -2574,7 +2574,7 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
       if (tmp->query())
       {
         table->field[7]->store(tmp->query(),
-                               min(PROCESS_LIST_INFO_WIDTH,
+                               MY_MIN(PROCESS_LIST_INFO_WIDTH,
                                    tmp->query_length()), cs);
         table->field[7]->set_notnull();
       }
@@ -3003,7 +3003,7 @@ static int aggregate_user_stats(HASH *all_user_stats, HASH *agg_user_stats)
 {
   DBUG_ENTER("aggregate_user_stats");
   if (my_hash_init(agg_user_stats, system_charset_info,
-                max(all_user_stats->records, 1),
+                MY_MAX(all_user_stats->records, 1),
                 0, 0, (my_hash_get_key)get_key_user_stats,
                 (my_hash_free_key)free_user_stats, 0))
   {
@@ -4225,7 +4225,7 @@ uint get_table_open_method(TABLE_LIST *tables,
     for (ptr=tables->table->field; (field= *ptr) ; ptr++)
     {
       star_table_open_method=
-        min(star_table_open_method,
+        MY_MIN(star_table_open_method,
             schema_table->fields_info[field_indx].open_method);
       if (bitmap_is_set(tables->table->read_set, field->field_index))
       {
@@ -9107,7 +9107,7 @@ static bool show_create_trigger_impl(THD *thd,
 
     Item_empty_string *stmt_fld=
       new Item_empty_string("SQL Original Statement",
-                            max(trg_sql_original_stmt.length, 1024));
+                            MY_MAX(trg_sql_original_stmt.length, 1024));
 
     stmt_fld->maybe_null= TRUE;
 
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 9d11677666f..8ccc8aff365 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -661,7 +661,7 @@ int String::reserve(uint32 space_needed, uint32 grow_by)
 {
   if (Alloced_length < str_length + space_needed)
   {
-    if (realloc(Alloced_length + max(space_needed, grow_by) - 1))
+    if (realloc(Alloced_length + MY_MAX(space_needed, grow_by) - 1))
       return TRUE;
   }
   return FALSE;
@@ -748,7 +748,7 @@ int sortcmp(const String *s,const String *t, CHARSET_INFO *cs)
 
 int stringcmp(const String *s,const String *t)
 {
-  uint32 s_len=s->length(),t_len=t->length(),len=min(s_len,t_len);
+  uint32 s_len=s->length(),t_len=t->length(),len=MY_MIN(s_len,t_len);
   int cmp= memcmp(s->ptr(), t->ptr(), len);
   return (cmp) ? cmp : (int) (s_len - t_len);
 }
@@ -765,7 +765,7 @@ String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
   }
   if (to->realloc(from_length))
     return from;				// Actually an error
-  if ((to->str_length=min(from->str_length,from_length)))
+  if ((to->str_length=MY_MIN(from->str_length,from_length)))
     memcpy(to->Ptr,from->Ptr,to->str_length);
   to->str_charset=from->str_charset;
   return to;
@@ -776,67 +776,6 @@ String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
   Help functions
 ****************************************************************************/
 
-
-
-/*
-  Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
-*/
-uint32
-copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
-                 const char *from, uint32 from_length, CHARSET_INFO *from_cs,
-                 uint *errors)
-{
-  /*
-    If any of the character sets is not ASCII compatible,
-    immediately switch to slow mb_wc->wc_mb method.
-  */
-  if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
-    return copy_and_convert_extended(to, to_length, to_cs,
-                                     from, from_length, from_cs, errors);
-
-  uint32 length= min(to_length, from_length), length2= length;
-
-#if defined(__i386__)
-  /*
-    Special loop for i386, it allows to refer to a
-    non-aligned memory block as UINT32, which makes
-    it possible to copy four bytes at once. This
-    gives about 10% performance improvement comparing
-    to byte-by-byte loop.
-  */
-  for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
-  {
-    if ((*(uint32*)from) & 0x80808080)
-      break;
-    *((uint32*) to)= *((const uint32*) from);
-  }
-#endif
-
-  for (; ; *to++= *from++, length--)
-  {
-    if (!length)
-    {
-      *errors= 0;
-      return length2;
-    }
-    if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
-    {
-      uint32 copied_length= length2 - length;
-      to_length-= copied_length;
-      from_length-= copied_length;
-      return copied_length + copy_and_convert_extended(to, to_length,
-                                                       to_cs,
-                                                       from, from_length,
-                                                       from_cs,
-                                                       errors);
-    }
-  }
-
-  DBUG_ASSERT(FALSE); // Should never get to here
-  return 0;           // Make compiler happy
-}
-
-
 /**
   Copy string with HEX-encoding of "bad" characters.
 
@@ -954,7 +893,7 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
 
     if (to_cs == &my_charset_bin)
     {
-      res= min(min(nchars, to_length), from_length);
+      res= MY_MIN(MY_MIN(nchars, to_length), from_length);
       memmove(to, from, res);
       *from_end_pos= from + res;
       *well_formed_error_pos= NULL;
@@ -1166,7 +1105,7 @@ uint convert_to_printable(char *to, size_t to_len,
   char *t= to;
   char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end
   const char *f= from;
-  const char *f_end= from + (nbytes ? min(from_len, nbytes) : from_len);
+  const char *f_end= from + (nbytes ? MY_MIN(from_len, nbytes) : from_len);
   char *dots= to; // last safe place to append '...'
 
   if (!f || t == t_end)
diff --git a/sql/sql_string.h b/sql/sql_string.h
index 58cda343dac..40096466a92 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -34,9 +34,13 @@ typedef struct st_mem_root MEM_ROOT;
 
 int sortcmp(const String *a,const String *b, CHARSET_INFO *cs);
 String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
-uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
-			const char *from, uint32 from_length,
-			CHARSET_INFO *from_cs, uint *errors);
+inline uint32 copy_and_convert(char *to, uint32 to_length,
+                               const CHARSET_INFO *to_cs,
+                               const char *from, uint32 from_length,
+                               const CHARSET_INFO *from_cs, uint *errors)
+{
+  return my_convert(to, to_length, to_cs, from, from_length, from_cs, errors);
+}
 uint32 well_formed_copy_nchars(CHARSET_INFO *to_cs,
                                char *to, uint to_length,
                                CHARSET_INFO *from_cs,
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 3c094e1740e..f9dea58bb32 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -3621,7 +3621,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
 	  if ((length=column->length) > max_key_length ||
 	      length > file->max_key_part_length())
 	  {
-	    length=min(max_key_length, file->max_key_part_length());
+	    length=MY_MIN(max_key_length, file->max_key_part_length());
 	    if (key->type == Key::MULTIPLE)
 	    {
 	      /* not a critical problem */
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 56e7db96a1a..0fa18b46ff4 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -2173,7 +2173,7 @@ master_file_def:
                from 0" (4 in fact), unspecified means "don't change the position
                (keep the preceding value)").
             */
-            Lex->mi.pos = max(BIN_LOG_HEADER_SIZE, Lex->mi.pos);
+            Lex->mi.pos= MY_MAX(BIN_LOG_HEADER_SIZE, Lex->mi.pos);
           }
         | RELAY_LOG_FILE_SYM EQ TEXT_STRING_sys
           {
@@ -2183,7 +2183,7 @@ master_file_def:
           {
             Lex->mi.relay_log_pos = $3;
             /* Adjust if < BIN_LOG_HEADER_SIZE (same comment as Lex->mi.pos) */
-            Lex->mi.relay_log_pos = max(BIN_LOG_HEADER_SIZE, Lex->mi.relay_log_pos);
+            Lex->mi.relay_log_pos= MY_MAX(BIN_LOG_HEADER_SIZE, Lex->mi.relay_log_pos);
           }
         ;
 
diff --git a/sql/structs.h b/sql/structs.h
index a3a54c524e6..e5e65e01064 100644
--- a/sql/structs.h
+++ b/sql/structs.h
@@ -89,8 +89,8 @@ struct ha_index_option_struct;
 typedef struct st_key {
   uint	key_length;			/* Tot length of key */
   ulong flags;                          /* dupp key and pack flags */
-  uint	key_parts;			/* How many key_parts */
-  uint	usable_key_parts;		/* Should normally be = key_parts */
+  uint	user_defined_key_parts;	   /* How many key_parts */
+  uint	usable_key_parts; /* Should normally be = user_defined_key_parts */
   uint ext_key_parts;              /* Number of key parts in extended key */
   ulong ext_key_flags;             /* Flags for extended key              */
   key_part_map ext_key_part_map;   /* Bitmap of pk key parts in extension */ 
@@ -256,10 +256,10 @@ typedef struct  user_conn {
 
 typedef struct st_user_stats
 {
-  char user[max(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
+  char user[MY_MAX(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
   // Account name the user is mapped to when this is a user from mapped_user.
   // Otherwise, the same value as user.
-  char priv_user[max(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
+  char priv_user[MY_MAX(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
   uint user_name_length;
   uint total_connections;
   uint concurrent_connections;
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index ed5e34463e9..5bdb0a4a235 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -1254,8 +1254,9 @@ static bool fix_max_connections(sys_var *self, THD *thd, enum_var_type type)
 // children, to avoid "too many connections" error in a common setup
 static Sys_var_ulong Sys_max_connections(
        "max_connections", "The number of simultaneous clients allowed",
-       GLOBAL_VAR(max_connections), CMD_LINE(REQUIRED_ARG),
-       VALID_RANGE(1, 100000), DEFAULT(151), BLOCK_SIZE(1), NO_MUTEX_GUARD,
+       PARSED_EARLY GLOBAL_VAR(max_connections), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(1, 100000),
+       DEFAULT(MAX_CONNECTIONS_DEFAULT), BLOCK_SIZE(1), NO_MUTEX_GUARD,
        NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(fix_max_connections));
 
 static Sys_var_ulong Sys_max_connect_errors(
@@ -2686,7 +2687,7 @@ static bool fix_autocommit(sys_var *self, THD *thd, enum_var_type type)
       Don't close thread tables or release metadata locks: if we do so, we
       risk releasing locks/closing tables of expressions used to assign
       other variables, as in:
-      set @var=my_stored_function1(), @@autocommit=1, @var2=(select max(a)
+      set @var=my_stored_function1(), @@autocommit=1, @var2=(select MY_MAX(a)
       from my_table), ...
       The locks will be released at statement end anyway, as SET
       statement that assigns autocommit is marked to commit
diff --git a/sql/table.cc b/sql/table.cc
index c8dc2b4ed5a..d7fd370dde5 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -854,6 +854,8 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
     share->table_charset= get_charset((((uint) head[41]) << 8) + 
                                         (uint) head[38],MYF(0));
     share->null_field_first= 1;
+    share->stats_sample_pages= uint2korr(head+42);
+    share->stats_auto_recalc= static_cast<enum_stats_auto_recalc>(head[44]);
   }
   if (!share->table_charset)
   {
@@ -883,12 +885,12 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   if (disk_buff[0] & 0x80)
   {
     share->keys=      keys=      (disk_buff[1] << 7) | (disk_buff[0] & 0x7f);
-    share->key_parts= key_parts= uint2korr(disk_buff+2);
+    share->user_defined_key_parts= key_parts= uint2korr(disk_buff+2);
   }
   else
   {
     share->keys=      keys=      disk_buff[0];
-    share->key_parts= key_parts= disk_buff[1];
+    share->user_defined_key_parts= key_parts= disk_buff[1];
   }
   share->keys_for_keyread.init(0);
   share->keys_in_use.init(keys);
@@ -931,7 +933,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
     {
       keyinfo->flags=	   (uint) uint2korr(strpos) ^ HA_NOSAME;
       keyinfo->key_length= (uint) uint2korr(strpos+2);
-      keyinfo->key_parts=  (uint) strpos[4];
+      keyinfo->user_defined_key_parts=  (uint) strpos[4];
       keyinfo->algorithm=  (enum ha_key_alg) strpos[5];
       keyinfo->block_size= uint2korr(strpos+6);
       strpos+=8;
@@ -940,7 +942,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
     {
       keyinfo->flags=	 ((uint) strpos[0]) ^ HA_NOSAME;
       keyinfo->key_length= (uint) uint2korr(strpos+1);
-      keyinfo->key_parts=  (uint) strpos[3];
+      keyinfo->user_defined_key_parts=  (uint) strpos[3];
       keyinfo->algorithm= HA_KEY_ALG_UNDEF;
       strpos+=4;
     }
@@ -965,7 +967,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
       first_key_parts= first_keyinfo.key_parts;
       keyinfo->flags= first_keyinfo.flags;
       keyinfo->key_length= first_keyinfo.key_length;
-      keyinfo->key_parts= first_keyinfo.key_parts;
+      keyinfo->user_defined_key_parts= first_keyinfo.key_parts;
       keyinfo->algorithm= first_keyinfo.algorithm;
       if (new_frm_ver >= 3)
         keyinfo->block_size= first_keyinfo.block_size;
@@ -973,7 +975,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 
     keyinfo->key_part=	 key_part;
     keyinfo->rec_per_key= rec_per_key;
-    for (j=keyinfo->key_parts ; j-- ; key_part++)
+    for (j=keyinfo->user_defined_key_parts ; j-- ; key_part++)
     {
       *rec_per_key++=0;
       key_part->fieldnr=	(uint16) (uint2korr(strpos) & FIELD_NR_MASK);
@@ -999,7 +1001,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
       }
       key_part->store_length=key_part->length;
     }
-    keyinfo->ext_key_parts= keyinfo->key_parts;
+    keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
     keyinfo->ext_key_flags= keyinfo->flags;
     keyinfo->ext_key_part_map= 0;
     if (share->use_ext_keys && i)
@@ -1009,7 +1011,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
            j < first_key_parts && keyinfo->ext_key_parts < MAX_REF_PARTS;
            j++)
       {
-        uint key_parts= keyinfo->key_parts;
+        uint key_parts= keyinfo->user_defined_key_parts;
         KEY_PART_INFO* curr_key_part= keyinfo->key_part;
         KEY_PART_INFO* curr_key_part_end= curr_key_part+key_parts;
         for ( ; curr_key_part < curr_key_part_end; curr_key_part++)
@@ -1689,7 +1691,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
                keyinfo->name_length+1);
       }
 
-      if (ext_key_parts > share->key_parts && key)
+      if (ext_key_parts > share->user_defined_key_parts && key)
       {
         KEY_PART_INFO *new_key_part= (keyinfo-1)->key_part +
                                      (keyinfo-1)->ext_key_parts;
@@ -1698,7 +1700,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
           Do not extend the key that contains a component
           defined over the beginning of a field.
 	*/ 
-        for (i= 0; i < keyinfo->key_parts; i++)
+        for (i= 0; i < keyinfo->user_defined_key_parts; i++)
 	{
           uint fieldnr= keyinfo->key_part[i].fieldnr;
           if (share->field[fieldnr-1]->key_length() !=
@@ -1709,11 +1711,11 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
           }
         }
 
-        if (add_first_key_parts < keyinfo->ext_key_parts-keyinfo->key_parts)
+        if (add_first_key_parts < keyinfo->ext_key_parts-keyinfo->user_defined_key_parts)
 	{
           share->ext_key_parts-= keyinfo->ext_key_parts;
           key_part_map ext_key_part_map= keyinfo->ext_key_part_map;
-          keyinfo->ext_key_parts= keyinfo->key_parts;
+          keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
           keyinfo->ext_key_flags= keyinfo->flags;
 	  keyinfo->ext_key_part_map= 0; 
           for (i= 0; i < add_first_key_parts; i++)
@@ -1746,7 +1748,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 	*/
 	primary_key=key;
         key_part= keyinfo->key_part;
-	for (i=0 ; i < keyinfo->key_parts ;i++)
+	for (i=0 ; i < keyinfo->user_defined_key_parts ;i++)
 	{
 	  uint fieldnr= key_part[i].fieldnr;
 	  if (!fieldnr ||
@@ -1762,7 +1764,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 
       key_part= keyinfo->key_part;
       uint key_parts= share->use_ext_keys ? keyinfo->ext_key_parts :
-	                                    keyinfo->key_parts;
+	                                    keyinfo->user_defined_key_parts;
       for (i=0; i < key_parts; key_part++, i++)
       {
         Field *field;
@@ -1804,7 +1806,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 
         if (i == 0 && key != primary_key)
           field->flags |= (((keyinfo->flags & HA_NOSAME) &&
-                           (keyinfo->key_parts == 1)) ?
+                           (keyinfo->user_defined_key_parts == 1)) ?
                            UNIQUE_KEY_FLAG : MULTIPLE_KEY_FLAG);
         if (i == 0)
           field->key_start.set_bit(key);
@@ -1815,7 +1817,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
           {
             share->keys_for_keyread.set_bit(key);
             field->part_of_key.set_bit(key);
-            if (i < keyinfo->key_parts)
+            if (i < keyinfo->user_defined_key_parts)
               field->part_of_key_not_clustered.set_bit(key);
           }
           if (handler_file->index_flags(key, i, 1) & HA_READ_ORDER)
@@ -1889,7 +1891,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
       keyinfo->usable_key_parts= usable_parts; // Filesort
 
       set_if_bigger(share->max_key_length,keyinfo->key_length+
-                    keyinfo->key_parts);
+                    keyinfo->user_defined_key_parts);
       share->total_key_length+= keyinfo->key_length;
       /*
         MERGE tables do not have unique indexes. But every key could be
@@ -2450,7 +2452,7 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
       outparam->field[(uint) (share->found_next_number_field - share->field)];
 
   /* Fix key->name and key_part->field */
-  if (share->key_parts)
+  if (share->user_defined_key_parts)
   {
     KEY	*key_info, *key_info_end;
     KEY_PART_INFO *key_part;
@@ -2475,7 +2477,7 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
       key_info->key_part= key_part;
 
       key_part_end= key_part + (share->use_ext_keys ? key_info->ext_key_parts :
-			                              key_info->key_parts) ;      
+			                              key_info->user_defined_key_parts) ;      
       for ( ; key_part < key_part_end; key_part++)
       {
         Field *field= key_part->field= outparam->field[key_part->fieldnr - 1];
@@ -2493,7 +2495,7 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
         }
       }
       if (!share->use_ext_keys)
-	key_part+= key_info->ext_key_parts - key_info->key_parts;
+	key_part+= key_info->ext_key_parts - key_info->user_defined_key_parts;
     }
   }
 
@@ -3303,11 +3305,10 @@ File create_frm(THD *thd, const char *name, const char *db,
     fileinfo[39]= (uchar) ((uint) create_info->transactional |
                            ((uint) create_info->page_checksum << 2));
     fileinfo[40]= (uchar) create_info->row_type;
-    /* Next few bytes where for RAID support */
+    /* Bytes 41-46 were for RAID support; now reused for other purposes */
     fileinfo[41]= (uchar) (csid >> 8);
-    fileinfo[42]= 0;
-    fileinfo[43]= 0;
-    fileinfo[44]= 0;
+    int2store(fileinfo+42, create_info->stats_sample_pages & 0xffff);
+    fileinfo[44]= (uchar) create_info->stats_auto_recalc;
     fileinfo[45]= 0;
     fileinfo[46]= 0;
     int4store(fileinfo+47, key_length);
@@ -6068,8 +6069,8 @@ bool TABLE::add_tmp_key(uint key, uint key_parts,
     return TRUE;
   keyinfo= key_info + key;
   keyinfo->key_part= key_part_info;
-  keyinfo->usable_key_parts= keyinfo->key_parts = key_parts;
-  keyinfo->ext_key_parts= keyinfo->key_parts;
+  keyinfo->usable_key_parts= keyinfo->user_defined_key_parts = key_parts;
+  keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
   keyinfo->key_length=0;
   keyinfo->algorithm= HA_KEY_ALG_UNDEF;
   keyinfo->flags= HA_GENERATED_KEY;
@@ -6168,7 +6169,7 @@ bool TABLE::is_filled_at_execution()
 uint TABLE::actual_n_key_parts(KEY *keyinfo)
 {
   return optimizer_flag(in_use, OPTIMIZER_SWITCH_EXTENDED_KEYS) ?
-           keyinfo->ext_key_parts : keyinfo->key_parts;
+           keyinfo->ext_key_parts : keyinfo->user_defined_key_parts;
 }
 
  
diff --git a/sql/table.h b/sql/table.h
index 1a567ae75d1..531b08bf813 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -646,7 +646,8 @@ struct TABLE_SHARE
   key_map keys_for_keyread;
   ha_rows min_rows, max_rows;		/* create information */
   ulong   avg_row_length;		/* create information */
-  ulong   version, mysql_version;
+  ulong   version;
+  ulong   mysql_version;		/* 0 if .frm is created before 5.0 */
   ulong   reclength;			/* Recordlength */
   /* Stored record length. No generated-only virtual fields are included */
   ulong   stored_rec_length;            
@@ -668,6 +669,9 @@ struct TABLE_SHARE
   uint ref_count;                       /* How many TABLE objects uses this */
   uint blob_ptr_size;			/* 4 or 8 */
   uint key_block_size;			/* create key_block_size, if used */
+  uint stats_sample_pages;		/* number of pages to sample during
+					stats estimation, if used, otherwise 0. */
+  enum_stats_auto_recalc stats_auto_recalc; /* Automatic recalc of stats. */
   uint null_bytes, last_null_bit_pos;
   /*
     Same as null_bytes, except that if there is only a 'delete-marker' in
diff --git a/sql/thr_malloc.cc b/sql/thr_malloc.cc
index 8c7db0673ac..548426587c5 100644
--- a/sql/thr_malloc.cc
+++ b/sql/thr_malloc.cc
@@ -134,7 +134,7 @@ char *sql_strmake_with_convert(const char *str, size_t arg_length,
   if ((from_cs == &my_charset_bin) || (to_cs == &my_charset_bin))
   {
     // Safety if to_cs->mbmaxlen > 0
-    new_length= min(arg_length, max_res_length);
+    new_length= MY_MIN(arg_length, max_res_length);
     memcpy(pos, str, new_length);
   }
   else
diff --git a/sql/tztime.cc b/sql/tztime.cc
index b16cc65d6bb..665e624cd41 100644
--- a/sql/tztime.cc
+++ b/sql/tztime.cc
@@ -176,7 +176,7 @@ tz_load(const char *name, TIME_ZONE_INFO *sp, MEM_ROOT *storage)
       uchar buf[sizeof(struct tzhead) + sizeof(my_time_t) * TZ_MAX_TIMES +
                 TZ_MAX_TIMES + sizeof(TRAN_TYPE_INFO) * TZ_MAX_TYPES +
 #ifdef ABBR_ARE_USED
-               max(TZ_MAX_CHARS + 1, (2 * (MY_TZNAME_MAX + 1))) +
+               MY_MAX(TZ_MAX_CHARS + 1, (2 * (MY_TZNAME_MAX + 1))) +
 #endif
                sizeof(LS_INFO) * TZ_MAX_LEAPS];
     } u;
@@ -405,7 +405,7 @@ prepare_tz_info(TIME_ZONE_INFO *sp, MEM_ROOT *storage)
       Let us choose end_t as point before next time type change or leap
       second correction.
     */
-    end_t= min((next_trans_idx < sp->timecnt) ? sp->ats[next_trans_idx] - 1:
+    end_t= MY_MIN((next_trans_idx < sp->timecnt) ? sp->ats[next_trans_idx] - 1:
                                                 MY_TIME_T_MAX,
                (next_leap_idx < sp->leapcnt) ?
                  sp->lsis[next_leap_idx].ls_trans - 1: MY_TIME_T_MAX);
@@ -1875,7 +1875,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
   uchar types[TZ_MAX_TIMES];
   TRAN_TYPE_INFO ttis[TZ_MAX_TYPES];
 #ifdef ABBR_ARE_USED
-  char chars[max(TZ_MAX_CHARS + 1, (2 * (MY_TZNAME_MAX + 1)))];
+  char chars[MY_MAX(TZ_MAX_CHARS + 1, (2 * (MY_TZNAME_MAX + 1)))];
 #endif
   /* 
     Used as a temporary tz_info until we decide that we actually want to
@@ -1942,7 +1942,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
   field->store((longlong) tzid, TRUE);
   DBUG_ASSERT(field->key_length() <= sizeof(keybuff));
   field->get_key_image(keybuff,
-                       min(field->key_length(), sizeof(keybuff)),
+                       MY_MIN(field->key_length(), sizeof(keybuff)),
                        Field::itRAW);
   if (table->file->ha_index_init(0, 1))
     goto end;
@@ -1975,7 +1975,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
   field->store((longlong) tzid, TRUE);
   DBUG_ASSERT(field->key_length() <= sizeof(keybuff));
   field->get_key_image(keybuff,
-                       min(field->key_length(), sizeof(keybuff)),
+                       MY_MIN(field->key_length(), sizeof(keybuff)),
                        Field::itRAW);
   if (table->file->ha_index_init(0, 1))
     goto end;
diff --git a/sql/unireg.cc b/sql/unireg.cc
index e40dc02c21b..4596bb52747 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -574,7 +574,7 @@ static uchar *pack_screens(List<Create_field> &create_fields,
     }
     cfield->row=(uint8) row;
     cfield->col=(uint8) (length+1);
-    cfield->sc_length=(uint8) min(cfield->length,cols-(length+2));
+    cfield->sc_length=(uint8) MY_MIN(cfield->length,cols-(length+2));
   }
   length=(uint) (pos-start_screen);
   int2store(start_screen,length);
@@ -830,7 +830,7 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
     DBUG_RETURN(1);
   }
   /* Hack to avoid bugs with small static rows in MySQL */
-  reclength=max(file->min_record_length(table_options),reclength);
+  reclength=MY_MAX(file->min_record_length(table_options),reclength);
   if (info_length+(ulong) create_fields.elements*FCOMP+288+
       n_length+int_length+com_length+vcol_info_length > 65535L || 
       int_count > 255)
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc
index 342f8be956e..5a6251a5de6 100644
--- a/storage/archive/ha_archive.cc
+++ b/storage/archive/ha_archive.cc
@@ -699,7 +699,7 @@ int ha_archive::create(const char *name, TABLE *table_arg,
   {
     KEY *pos= table_arg->key_info+key;
     KEY_PART_INFO *key_part=     pos->key_part;
-    KEY_PART_INFO *key_part_end= key_part + pos->key_parts;
+    KEY_PART_INFO *key_part_end= key_part + pos->user_defined_key_parts;
 
     for (; key_part != key_part_end; key_part++)
     {
diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc
index 916c7b151de..c25bc4f2713 100644
--- a/storage/csv/ha_tina.cc
+++ b/storage/csv/ha_tina.cc
@@ -1308,7 +1308,7 @@ bool ha_tina::get_write_pos(my_off_t *end_pos, tina_set *closest_hole)
   if (closest_hole == chain_ptr) /* no more chains */
     *end_pos= file_buff->end();
   else
-    *end_pos= min(file_buff->end(), closest_hole->begin);
+    *end_pos= MY_MIN(file_buff->end(), closest_hole->begin);
   return (closest_hole != chain_ptr) && (*end_pos == closest_hole->begin);
 }
 
@@ -1545,7 +1545,7 @@ int ha_tina::repair(THD* thd, HA_CHECK_OPT* check_opt)
   /* write repaired file */
   while (1)
   {
-    write_end= min(file_buff->end(), current_position);
+    write_end= MY_MIN(file_buff->end(), current_position);
     if ((write_end - write_begin) &&
         (mysql_file_write(repair_file, (uchar*)file_buff->ptr(),
                           (size_t) (write_end - write_begin), MYF_RW)))
diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc
index 0c07af0a554..333e3b0b672 100644
--- a/storage/federated/ha_federated.cc
+++ b/storage/federated/ha_federated.cc
@@ -584,7 +584,7 @@ static int parse_url_error(FEDERATED_SHARE *share, TABLE *table, int error_num)
   size_t buf_len;
   DBUG_ENTER("ha_federated parse_url_error");
 
-  buf_len= min(table->s->connect_string.length,
+  buf_len= MY_MIN(table->s->connect_string.length,
                FEDERATED_QUERY_BUFFER_SIZE-1);
   strmake(buf, table->s->connect_string.str, buf_len);
   my_error(error_num, MYF(0), buf);
@@ -1317,7 +1317,7 @@ bool ha_federated::create_where_from_key(String *to,
     }
 
     for (key_part= key_info->key_part,
-         remainder= key_info->key_parts,
+         remainder= key_info->user_defined_key_parts,
          length= ranges[i]->length,
          ptr= ranges[i]->key; ;
          remainder--,
@@ -1325,7 +1325,7 @@ bool ha_federated::create_where_from_key(String *to,
     {
       Field *field= key_part->field;
       uint store_length= key_part->store_length;
-      uint part_length= min(store_length, length);
+      uint part_length= MY_MIN(store_length, length);
       needs_quotes= field->str_needs_quotes();
       DBUG_DUMP("key, start of loop", ptr, length);
 
diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc
index e1c2a38964a..f5cb284c7c4 100644
--- a/storage/federatedx/ha_federatedx.cc
+++ b/storage/federatedx/ha_federatedx.cc
@@ -522,7 +522,7 @@ static int parse_url_error(FEDERATEDX_SHARE *share, TABLE *table, int error_num)
   int buf_len;
   DBUG_ENTER("ha_federatedx parse_url_error");
 
-  buf_len= min(table->s->connect_string.length,
+  buf_len= MY_MIN(table->s->connect_string.length,
                FEDERATEDX_QUERY_BUFFER_SIZE-1);
   strmake(buf, table->s->connect_string.str, buf_len);
   my_error(error_num, MYF(0), buf);
@@ -1246,7 +1246,7 @@ bool ha_federatedx::create_where_from_key(String *to,
     {
       Field *field= key_part->field;
       uint store_length= key_part->store_length;
-      uint part_length= min(store_length, length);
+      uint part_length= MY_MIN(store_length, length);
       needs_quotes= field->str_needs_quotes();
       DBUG_DUMP("key, start of loop", ptr, length);
 
diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc
index 8e63799680b..66d64c54b89 100644
--- a/storage/heap/ha_heap.cc
+++ b/storage/heap/ha_heap.cc
@@ -221,14 +221,14 @@ void ha_heap::update_key_stats()
     if (key->algorithm != HA_KEY_ALG_BTREE)
     {
       if (key->flags & HA_NOSAME)
-        key->rec_per_key[key->key_parts-1]= 1;
+        key->rec_per_key[key->user_defined_key_parts-1]= 1;
       else
       {
         ha_rows hash_buckets= file->s->keydef[i].hash_buckets;
         uint no_records= hash_buckets ? (uint) (file->s->records/hash_buckets) : 2;
         if (no_records < 2)
           no_records= 2;
-        key->rec_per_key[key->key_parts-1]= no_records;
+        key->rec_per_key[key->user_defined_key_parts-1]= no_records;
       }
     }
   }
@@ -611,7 +611,7 @@ ha_rows ha_heap::records_in_range(uint inx, key_range *min_key,
 
   /* Assert that info() did run. We need current statistics here. */
   DBUG_ASSERT(key_stat_version == file->s->key_stat_version);
-  return key->rec_per_key[key->key_parts-1];
+  return key->rec_per_key[key->user_defined_key_parts-1];
 }
 
 
@@ -630,7 +630,7 @@ heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table,
   bzero(hp_create_info, sizeof(*hp_create_info));
 
   for (key= parts= 0; key < keys; key++)
-    parts+= table_arg->key_info[key].key_parts;
+    parts+= table_arg->key_info[key].user_defined_key_parts;
 
   if (!(keydef= (HP_KEYDEF*) my_malloc(keys * sizeof(HP_KEYDEF) +
 				       parts * sizeof(HA_KEYSEG),
@@ -641,9 +641,9 @@ heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table,
   {
     KEY *pos= table_arg->key_info+key;
     KEY_PART_INFO *key_part=     pos->key_part;
-    KEY_PART_INFO *key_part_end= key_part + pos->key_parts;
+    KEY_PART_INFO *key_part_end= key_part + pos->user_defined_key_parts;
 
-    keydef[key].keysegs=   (uint) pos->key_parts;
+    keydef[key].keysegs=   (uint) pos->user_defined_key_parts;
     keydef[key].flag=      (pos->flags & (HA_NOSAME | HA_NULL_ARE_EQUAL));
     keydef[key].seg=       seg;
 
diff --git a/storage/heap/hp_create.c b/storage/heap/hp_create.c
index a8bc8e63810..e286ff69e61 100644
--- a/storage/heap/hp_create.c
+++ b/storage/heap/hp_create.c
@@ -254,18 +254,18 @@ static void init_block(HP_BLOCK *block, uint reclength, ulong min_records,
     If not min_records and max_records are given, optimize for 1000 rows
   */
   if (!min_records)
-    min_records= min(1000, max_records);
+    min_records= MY_MIN(1000, max_records);
   if (!max_records)
-    max_records= max(min_records, 1000);
+    max_records= MY_MAX(min_records, 1000);
 
   /*
     We don't want too few records_in_block as otherwise the overhead of
     of the HP_PTRS block will be too notable
   */
-  records_in_block= max(1000, min_records);
-  records_in_block= min(records_in_block, max_records);
+  records_in_block= MY_MAX(1000, min_records);
+  records_in_block= MY_MIN(records_in_block, max_records);
   /* If big max_records is given, allocate bigger blocks */
-  records_in_block= max(records_in_block, max_records / 10);
+  records_in_block= MY_MAX(records_in_block, max_records / 10);
   /* We don't want too few blocks per row either */
   if (records_in_block < 10)
     records_in_block= 10;
diff --git a/storage/heap/hp_test2.c b/storage/heap/hp_test2.c
index 058a2904697..13b49fbb7ec 100644
--- a/storage/heap/hp_test2.c
+++ b/storage/heap/hp_test2.c
@@ -132,7 +132,7 @@ int main(int argc, char *argv[])
 
   for (i=0 ; i < recant ; i++)
   {
-    n1=rnd(1000); n2=rnd(100); n3=rnd(min(recant*5,MAX_RECORDS));
+    n1=rnd(1000); n2=rnd(100); n3=rnd(MY_MIN(recant*5,MAX_RECORDS));
     make_record(record,n1,n2,n3,"Pos",write_count);
 
     if (heap_write(file,record))
@@ -208,7 +208,7 @@ int main(int argc, char *argv[])
   printf("- Update\n");
   for (i=0 ; i < write_count/10 ; i++)
   {
-    n1=rnd(1000); n2=rnd(100); n3=rnd(min(recant*2,MAX_RECORDS));
+    n1=rnd(1000); n2=rnd(100); n3=rnd(MY_MIN(recant*2,MAX_RECORDS));
     make_record(record2, n1, n2, n3, "XXX", update);
     if (rnd(2) == 1)
     {
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index 318b45e43ae..ee8758a08d2 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -54,6 +54,8 @@ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DUNIV_DEBUG -DUNIV_SYNC_DEB
 #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion")
 #ENDIF()
 
+CHECK_FUNCTION_EXISTS(sched_getcpu  HAVE_SCHED_GETCPU)
+
 IF(NOT MSVC)
 # either define HAVE_IB_GCC_ATOMIC_BUILTINS or not
 IF(NOT CMAKE_CROSSCOMPILING)
@@ -95,12 +97,36 @@ IF(NOT CMAKE_CROSSCOMPILING)
   }"
   HAVE_IB_GCC_ATOMIC_BUILTINS
   )
+  CHECK_C_SOURCE_RUNS(
+  "#include<stdint.h>
+  int main()
+  {
+    int64_t	x,y,res;
+
+    x = 10;
+    y = 123;
+    res = __sync_sub_and_fetch(&y, x);
+    if (res != y || y != 113) {
+      return(1);
+    }
+    res = __sync_add_and_fetch(&y, x);
+    if (res != y || y != 123) {
+      return(1);
+    }
+    return(0);
+  }"
+  HAVE_IB_GCC_ATOMIC_BUILTINS_64
+  )
 ENDIF()
 
 IF(HAVE_IB_GCC_ATOMIC_BUILTINS)
  ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS=1)
 ENDIF()
 
+IF(HAVE_IB_GCC_ATOMIC_BUILTINS_64)
+ ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_64=1)
+ENDIF()
+
  # either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not
 IF(NOT CMAKE_CROSSCOMPILING)
   CHECK_C_SOURCE_RUNS(
@@ -129,7 +155,8 @@ ENDIF()
 
 ENDIF(NOT MSVC)
 
-SET(LINKER_SCRIPT)
+CHECK_FUNCTION_EXISTS(asprintf  HAVE_ASPRINTF)
+CHECK_FUNCTION_EXISTS(vasprintf  HAVE_VASPRINTF)
 
 # Solaris atomics
 IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
@@ -150,10 +177,6 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
     ADD_DEFINITIONS(-DHAVE_IB_SOLARIS_ATOMICS=1)
   ENDIF()
 
-  IF(CMAKE_COMPILER_IS_GNUCC AND NOT HAVE_VISIBILITY_HIDDEN)
-    SET(LINKER_SCRIPT "-Wl,-M${CMAKE_CURRENT_SOURCE_DIR}/plugin_exports")
-  ENDIF()
-
   IF(NOT CMAKE_CROSSCOMPILING)
   # either define HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS or not
   CHECK_C_SOURCE_COMPILES(
@@ -233,13 +256,16 @@ ENDIF()
 IF(MSVC)
   # Avoid "unreferenced label" warning in generated file
   GET_FILENAME_COMPONENT(_SRC_DIR ${CMAKE_CURRENT_LIST_FILE} PATH)
-  SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/pars0grm.cc
+  SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/pars0grm.c
           PROPERTIES COMPILE_FLAGS "/wd4102")
-  SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/lexyy.cc
+  SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/lexyy.c
           PROPERTIES COMPILE_FLAGS "/wd4003")
 ENDIF()
- 
+
+
 SET(INNOBASE_SOURCES
+	api/api0api.cc
+	api/api0misc.cc
 	btr/btr0btr.cc
 	btr/btr0cur.cc
 	btr/btr0pcur.cc
@@ -260,6 +286,7 @@ SET(INNOBASE_SOURCES
 	dict/dict0load.cc
 	dict/dict0mem.cc
 	dict/dict0stats.cc
+	dict/dict0stats_bg.cc
 	dyn/dyn0dyn.cc
 	eval/eval0eval.cc
 	eval/eval0proc.cc
@@ -311,9 +338,11 @@ SET(INNOBASE_SOURCES
 	rem/rem0rec.cc
 	row/row0ext.cc
 	row/row0ftsort.cc
+	row/row0import.cc
 	row/row0ins.cc
 	row/row0merge.cc
 	row/row0mysql.cc
+	row/row0log.cc
 	row/row0purge.cc
 	row/row0row.cc
 	row/row0sel.cc
@@ -321,6 +350,7 @@ SET(INNOBASE_SOURCES
 	row/row0umod.cc
 	row/row0undo.cc
 	row/row0upd.cc
+	row/row0quiesce.cc
 	row/row0vers.cc
 	srv/srv0conc.cc
 	srv/srv0mon.cc
@@ -355,7 +385,18 @@ IF(WITH_INNODB)
   SET(WITH_INNOBASE_STORAGE_ENGINE TRUE)
 ENDIF()
 
+
+# On solaris, reduce symbol visibility, so loader does not mix
+# the same symbols from builtin innodb and from shared one.
+# Only required for old GCC (3.4.3) that does not support hidden visibility
+IF(CMAKE_SYSTEM_NAME MATCHES "SunOS" AND CMAKE_COMPILER_IS_GNUCC 
+  AND NOT HAVE_VISIBILITY_HIDDEN)
+  SET(LINKER_SCRIPT "-Wl,-M${CMAKE_CURRENT_SOURCE_DIR}/plugin_exports")
+ELSE()
+  SET(LINKER_SCRIPT)
+ENDIF()
+
 MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE
   DEFAULT
   MODULE_OUTPUT_NAME ha_innodb
-  LINK_LIBRARIES ${ZLIB_LIBRARY})
+  LINK_LIBRARIES ${ZLIB_LIBRARY} ${LINKER_SCRIPT})
diff --git a/storage/innobase/api/api0api.cc b/storage/innobase/api/api0api.cc
new file mode 100644
index 00000000000..5f9762a1846
--- /dev/null
+++ b/storage/innobase/api/api0api.cc
@@ -0,0 +1,3859 @@
+/*****************************************************************************
+
+Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file api/api0api.cc
+InnoDB Native API
+
+2008-08-01 Created Sunny Bains
+3/20/2011 Jimmy Yang extracted from Embedded InnoDB
+*******************************************************/
+
+#include "univ.i"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "api0api.h"
+#include "api0misc.h"
+#include "srv0start.h"
+#include "dict0dict.h"
+#include "btr0pcur.h"
+#include "row0ins.h"
+#include "row0upd.h"
+#include "row0vers.h"
+#include "trx0roll.h"
+#include "dict0crea.h"
+#include "row0merge.h"
+#include "pars0pars.h"
+#include "lock0types.h"
+#include "row0sel.h"
+#include "lock0lock.h"
+#include "rem0cmp.h"
+#include "ut0dbg.h"
+#include "dict0priv.h"
+#include "ut0ut.h"
+#include "ha_prototypes.h"
+#include "trx0roll.h"
+
+/** configure variable for binlog option with InnoDB APIs */
+my_bool ib_binlog_enabled = FALSE;
+
+/** configure variable for MDL option with InnoDB APIs */
+my_bool ib_mdl_enabled = FALSE;
+
+/** configure variable for disable rowlock with InnoDB APIs */
+my_bool ib_disable_row_lock = FALSE;
+
+/** configure variable for Transaction isolation levels */
+ulong ib_trx_level_setting = IB_TRX_READ_UNCOMMITTED;
+
+/** configure variable for background commit interval in seconds */
+ulong ib_bk_commit_interval = 0;
+
+/** InnoDB tuple types. */
+enum ib_tuple_type_t{
+	TPL_TYPE_ROW,			/*!< Data row tuple */
+	TPL_TYPE_KEY			/*!< Index key tuple */
+};
+
+/** Query types supported. */
+enum ib_qry_type_t{
+	QRY_NON,			/*!< None/Sentinel */
+	QRY_INS,			/*!< Insert operation */
+	QRY_UPD,			/*!< Update operation */
+	QRY_SEL				/*!< Select operation */
+};
+
+/** Query graph types. */
+struct ib_qry_grph_t {
+	que_fork_t*	ins;		/*!< Innobase SQL query graph used
+					in inserts */
+	que_fork_t*	upd;		/*!< Innobase SQL query graph used
+					in updates or deletes */
+	que_fork_t*	sel;		/*!< dummy query graph used in
+					selects */
+};
+
+/** Query node types. */
+struct ib_qry_node_t {
+	ins_node_t*	ins;		/*!< Innobase SQL insert node
+					used to perform inserts to the table */
+	upd_node_t*	upd;		/*!< Innobase SQL update node
+					used to perform updates and deletes */
+	sel_node_t*	sel;		/*!< Innobase SQL select node
+					used to perform selects on the table */
+};
+
+/** Query processing fields. */
+struct ib_qry_proc_t {
+
+	ib_qry_node_t	node;		/*!< Query node*/
+
+	ib_qry_grph_t	grph;		/*!< Query graph */
+};
+
+/** Cursor instance for traversing tables/indexes. This will eventually
+become row_prebuilt_t. */
+struct ib_cursor_t {
+	mem_heap_t*	heap;		/*!< Instance heap */
+
+	mem_heap_t*	query_heap;	/*!< Heap to use for query graphs */
+
+	ib_qry_proc_t	q_proc;		/*!< Query processing info */
+
+	ib_match_mode_t	match_mode;	/*!< ib_cursor_moveto match mode */
+
+	row_prebuilt_t*	prebuilt;	/*!< For reading rows */
+
+	bool		valid_trx;	/*!< Valid transaction attached */
+};
+
+/** InnoDB table columns used during table and index schema creation. */
+struct ib_col_t {
+	const char*	name;		/*!< Name of column */
+
+	ib_col_type_t	ib_col_type;	/*!< Main type of the column */
+
+	ulint		len;		/*!< Length of the column */
+
+	ib_col_attr_t	ib_col_attr;	/*!< Column attributes */
+
+};
+
+/** InnoDB index columns used during index and index schema creation. */
+struct ib_key_col_t {
+	const char*	name;		/*!< Name of column */
+
+	ulint		prefix_len;	/*!< Column index prefix len or 0 */
+};
+
+struct ib_table_def_t;
+
+/** InnoDB index schema used during index creation */
+struct ib_index_def_t {
+	mem_heap_t*	heap;		/*!< Heap used to build this and all
+					its columns in the list */
+
+	const char*	name;		/*!< Index name */
+
+	dict_table_t*	table;		/*!< Parent InnoDB table */
+
+	ib_table_def_t*	schema;		/*!< Parent table schema that owns
+					this instance */
+
+	ibool		clustered;	/*!< True if clustered index */
+
+	ibool		unique;		/*!< True if unique index */
+
+	ib_vector_t*	cols;		/*!< Vector of columns */
+
+	trx_t*		usr_trx;	/*!< User transacton covering the
+					DDL operations */
+};
+
+/** InnoDB table schema used during table creation */
+struct ib_table_def_t {
+	mem_heap_t*	heap;		/*!< Heap used to build this and all
+					its columns in the list */
+	const char*	name;		/*!< Table name */
+
+	ib_tbl_fmt_t	ib_tbl_fmt;	/*!< Row format */
+
+	ulint		page_size;	/*!< Page size */
+
+	ib_vector_t*	cols;		/*!< Vector of columns */
+
+	ib_vector_t*	indexes;	/*!< Vector of indexes */
+
+	dict_table_t*	table;		/* Table read from or NULL */
+};
+
+/** InnoDB tuple used for key operations. */
+struct ib_tuple_t {
+	mem_heap_t*		heap;	/*!< Heap used to build
+					this and for copying
+					the column values. */
+
+	ib_tuple_type_t		type;	/*!< Tuple discriminitor. */
+
+	const dict_index_t*	index;	/*!< Index for tuple can be either
+					secondary or cluster index. */
+
+	dtuple_t*		ptr;	/*!< The internal tuple
+					instance */
+};
+
+/** The following counter is used to convey information to InnoDB
+about server activity: in selects it is not sensible to call
+srv_active_wake_master_thread after each fetch or search, we only do
+it every INNOBASE_WAKE_INTERVAL'th step. */
+
+#define INNOBASE_WAKE_INTERVAL	32
+
+/*****************************************************************//**
+Check whether the Innodb persistent cursor is positioned.
+@return	IB_TRUE if positioned */
+UNIV_INLINE
+ib_bool_t
+ib_btr_cursor_is_positioned(
+/*========================*/
+	btr_pcur_t*	pcur)		/*!< in: InnoDB persistent cursor */
+{
+	return(pcur->old_stored == BTR_PCUR_OLD_STORED
+	       && (pcur->pos_state == BTR_PCUR_IS_POSITIONED
+	           || pcur->pos_state == BTR_PCUR_WAS_POSITIONED));
+}
+
+
+/********************************************************************//**
+Open a table using the table id, if found then increment table ref count.
+@return	table instance if found */
+static
+dict_table_t*
+ib_open_table_by_id(
+/*================*/
+	ib_id_u64_t	tid,		/*!< in: table id to lookup */
+	ib_bool_t	locked)		/*!< in: TRUE if own dict mutex */
+{
+	dict_table_t*	table;
+	table_id_t	table_id;
+
+	table_id = tid;
+
+	if (!locked) {
+		dict_mutex_enter_for_mysql();
+	}
+
+	table = dict_table_open_on_id(table_id, FALSE, FALSE);
+
+	if (table != NULL && table->ibd_file_missing) {
+		table = NULL;
+	}
+
+	if (!locked) {
+		dict_mutex_exit_for_mysql();
+	}
+
+	return(table);
+}
+
+/********************************************************************//**
+Open a table using the table name, if found then increment table ref count.
+@return	table instance if found */
+UNIV_INTERN
+void*
+ib_open_table_by_name(
+/*==================*/
+	const char*	name)		/*!< in: table name to lookup */
+{
+	dict_table_t*	table;
+
+	table = dict_table_open_on_name(name, FALSE, FALSE,
+					DICT_ERR_IGNORE_NONE);
+
+	if (table != NULL && table->ibd_file_missing) {
+		table = NULL;
+	}
+
+	return(table);
+}
+
+/********************************************************************//**
+Find table using table name.
+@return	table instance if found */
+static
+dict_table_t*
+ib_lookup_table_by_name(
+/*====================*/
+	const char*	name)		/*!< in: table name to lookup */
+{
+	dict_table_t*	table;
+
+	table = dict_table_get_low(name);
+
+	if (table != NULL && table->ibd_file_missing) {
+		table = NULL;
+	}
+
+	return(table);
+}
+
+/********************************************************************//**
+Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
+time calls srv_active_wake_master_thread. This function should be used
+when a single database operation may introduce a small need for
+server utility activity, like checkpointing. */
+UNIV_INLINE
+void
+ib_wake_master_thread(void)
+/*=======================*/
+{
+        static ulint    ib_signal_counter = 0;
+
+        ++ib_signal_counter;
+
+        if ((ib_signal_counter % INNOBASE_WAKE_INTERVAL) == 0) {
+                srv_active_wake_master_thread();
+        }
+}
+
+/*********************************************************************//**
+Calculate the max row size of the columns in a cluster index.
+@return	max row length */
+UNIV_INLINE
+ulint
+ib_get_max_row_len(
+/*===============*/
+	dict_index_t*	cluster)		/*!< in: cluster index */
+{
+	ulint		i;
+	ulint		max_len = 0;
+	ulint		n_fields = cluster->n_fields;
+
+	/* Add the size of the ordering columns in the
+	clustered index. */
+	for (i = 0; i < n_fields; ++i) {
+		const dict_col_t*	col;
+
+		col = dict_index_get_nth_col(cluster, i);
+
+		/* Use the maximum output size of
+		mach_write_compressed(), although the encoded
+		length should always fit in 2 bytes. */
+		max_len += dict_col_get_max_size(col);
+	}
+
+	return(max_len);
+}
+
+/*****************************************************************//**
+Read the columns from a rec into a tuple. */
+static
+void
+ib_read_tuple(
+/*==========*/
+	const rec_t*	rec,		/*!< in: Record to read */
+	ib_bool_t	page_format,	/*!< in: IB_TRUE if compressed format */
+	ib_tuple_t*	tuple)		/*!< in: tuple to read into */
+{
+	ulint		i;
+	void*		ptr;
+	rec_t*		copy;
+	ulint		rec_meta_data;
+	ulint		n_index_fields;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets	= offsets_;
+	dtuple_t*	dtuple = tuple->ptr;
+	const dict_index_t* index = tuple->index;
+
+	rec_offs_init(offsets_);
+
+	offsets = rec_get_offsets(
+		rec, index, offsets, ULINT_UNDEFINED, &tuple->heap);
+
+	rec_meta_data = rec_get_info_bits(rec, page_format);
+	dtuple_set_info_bits(dtuple, rec_meta_data);
+
+	/* Make a copy of the rec. */
+	ptr = mem_heap_alloc(tuple->heap, rec_offs_size(offsets));
+	copy = rec_copy(ptr, rec, offsets);
+
+	n_index_fields = ut_min(
+		rec_offs_n_fields(offsets), dtuple_get_n_fields(dtuple));
+
+	for (i = 0; i < n_index_fields; ++i) {
+		ulint		len;
+		const byte*	data;
+		dfield_t*	dfield;
+
+		if (tuple->type == TPL_TYPE_ROW) {
+			const dict_col_t*	col;
+			ulint			col_no;
+			const dict_field_t*	index_field;
+
+			index_field = dict_index_get_nth_field(index, i);
+			col = dict_field_get_col(index_field);
+			col_no = dict_col_get_no(col);
+
+			dfield = dtuple_get_nth_field(dtuple, col_no);
+		} else {
+			dfield = dtuple_get_nth_field(dtuple, i);
+		}
+
+		data = rec_get_nth_field(copy, offsets, i, &len);
+
+		/* Fetch and copy any externally stored column. */
+		if (rec_offs_nth_extern(offsets, i)) {
+
+			ulint	zip_size;
+
+			zip_size = dict_table_zip_size(index->table);
+
+			data = btr_rec_copy_externally_stored_field(
+				copy, offsets, zip_size, i, &len,
+				tuple->heap);
+
+			ut_a(len != UNIV_SQL_NULL);
+		}
+
+		dfield_set_data(dfield, data, len);
+	}
+}
+
+/*****************************************************************//**
+Create an InnoDB key tuple.
+@return	tuple instance created, or NULL */
+static
+ib_tpl_t
+ib_key_tuple_new_low(
+/*=================*/
+	const dict_index_t*	index,	/*!< in: index for which tuple
+					required */
+	ulint			n_cols,	/*!< in: no. of user defined cols */
+	mem_heap_t*		heap)	/*!< in: memory heap */
+{
+	ib_tuple_t*	tuple;
+	ulint		i;
+	ulint		n_cmp_cols;
+
+	tuple = static_cast<ib_tuple_t*>(
+			mem_heap_alloc(heap, sizeof(*tuple)));
+
+	if (tuple == NULL) {
+		mem_heap_free(heap);
+		return(NULL);
+	}
+
+	tuple->heap  = heap;
+	tuple->index = index;
+	tuple->type  = TPL_TYPE_KEY;
+
+	/* Is it a generated clustered index ? */
+	if (n_cols == 0) {
+		++n_cols;
+	}
+
+	tuple->ptr = dtuple_create(heap, n_cols);
+
+	/* Copy types and set to SQL_NULL. */
+	dict_index_copy_types(tuple->ptr, index, n_cols);
+
+	for (i = 0; i < n_cols; i++) {
+
+		dfield_t*	dfield;
+
+		dfield	= dtuple_get_nth_field(tuple->ptr, i);
+		dfield_set_null(dfield);
+	}
+
+	n_cmp_cols = dict_index_get_n_ordering_defined_by_user(index);
+
+	dtuple_set_n_fields_cmp(tuple->ptr, n_cmp_cols);
+
+	return((ib_tpl_t) tuple);
+}
+
+/*****************************************************************//**
+Create an InnoDB key tuple.
+@return	tuple instance created, or NULL */
+static
+ib_tpl_t
+ib_key_tuple_new(
+/*=============*/
+	const dict_index_t*	index,	/*!< in: index of tuple */
+	ulint			n_cols)	/*!< in: no. of user defined cols */
+{
+	mem_heap_t*	heap;
+
+	heap = mem_heap_create(64);
+
+	if (heap == NULL) {
+		return(NULL);
+	}
+
+	return(ib_key_tuple_new_low(index, n_cols, heap));
+}
+
+/*****************************************************************//**
+Create an InnoDB row tuple.
+@return	tuple instance, or NULL */
+static
+ib_tpl_t
+ib_row_tuple_new_low(
+/*=================*/
+	const dict_index_t*	index,	/*!< in: index of tuple */
+	ulint			n_cols,	/*!< in: no. of cols in tuple */
+	mem_heap_t*		heap)	/*!< in: memory heap */
+{
+	ib_tuple_t*	tuple;
+
+	tuple = static_cast<ib_tuple_t*>(mem_heap_alloc(heap, sizeof(*tuple)));
+
+	if (tuple == NULL) {
+		mem_heap_free(heap);
+		return(NULL);
+	}
+
+	tuple->heap  = heap;
+	tuple->index = index;
+	tuple->type  = TPL_TYPE_ROW;
+
+	tuple->ptr = dtuple_create(heap, n_cols);
+
+	/* Copy types and set to SQL_NULL. */
+	dict_table_copy_types(tuple->ptr, index->table);
+
+	return((ib_tpl_t) tuple);
+}
+
+/*****************************************************************//**
+Create an InnoDB row tuple.
+@return	tuple instance, or NULL */
+static
+ib_tpl_t
+ib_row_tuple_new(
+/*=============*/
+	const dict_index_t*	index,	/*!< in: index of tuple */
+	ulint			n_cols)	/*!< in: no. of cols in tuple */
+{
+	mem_heap_t*	heap;
+
+	heap = mem_heap_create(64);
+
+	if (heap == NULL) {
+		return(NULL);
+	}
+
+	return(ib_row_tuple_new_low(index, n_cols, heap));
+}
+
+/*****************************************************************//**
+Begin a transaction.
+@return	innobase txn handle */
+UNIV_INTERN
+ib_err_t
+ib_trx_start(
+/*=========*/
+	ib_trx_t	ib_trx,		/*!< in: transaction to restart */
+	ib_trx_level_t	ib_trx_level,	/*!< in: trx isolation level */
+	void*		thd)		/*!< in: THD */
+{
+	ib_err_t	err = DB_SUCCESS;
+	trx_t*		trx = (trx_t*) ib_trx;
+
+	ut_a(ib_trx_level <= IB_TRX_SERIALIZABLE);
+
+	trx_start_if_not_started(trx);
+
+	trx->isolation_level = ib_trx_level;
+
+	/* FIXME: This is a place holder, we should add an arg that comes
+	from the client. */
+	trx->mysql_thd = static_cast<THD*>(thd);
+
+	return(err);
+}
+
+/*****************************************************************//**
+Begin a transaction. This will allocate a new transaction handle.
+put the transaction in the active state.
+@return	innobase txn handle */
+UNIV_INTERN
+ib_trx_t
+ib_trx_begin(
+/*=========*/
+	ib_trx_level_t	ib_trx_level)	/*!< in: trx isolation level */
+{
+	trx_t*		trx;
+	ib_bool_t	started;
+
+	trx = trx_allocate_for_mysql();
+	started = ib_trx_start((ib_trx_t) trx, ib_trx_level, NULL);
+	ut_a(started);
+
+	return((ib_trx_t) trx);
+}
+
+/*****************************************************************//**
+Get the transaction's state.
+@return	transaction state */
+UNIV_INTERN
+ib_trx_state_t
+ib_trx_state(
+/*=========*/
+	ib_trx_t	ib_trx)		/*!< in: trx handle */
+{
+	trx_t*		trx = (trx_t*) ib_trx;
+
+	return((ib_trx_state_t) trx->state);
+}
+
+/*****************************************************************//**
+Get a trx start time.
+@return	trx start_time */
+UNIV_INTERN
+ib_u64_t
+ib_trx_get_start_time(
+/*==================*/
+	ib_trx_t	ib_trx)		/*!< in: transaction */
+{
+	trx_t*		trx = (trx_t*) ib_trx;
+	return(static_cast<ib_u64_t>(trx->start_time));
+}
+/*****************************************************************//**
+Release the resources of the transaction.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_trx_release(
+/*===========*/
+	ib_trx_t	ib_trx)		/*!< in: trx handle */
+{
+	trx_t*		trx = (trx_t*) ib_trx;
+
+	ut_ad(trx != NULL);
+	trx_free_for_mysql(trx);
+
+	return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Commit a transaction. This function will also release the schema
+latches too.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_trx_commit(
+/*==========*/
+	ib_trx_t	ib_trx)		/*!< in: trx handle */
+{
+	ib_err_t	err = DB_SUCCESS;
+	trx_t*		trx = (trx_t*) ib_trx;
+
+	if (trx->state == TRX_STATE_NOT_STARTED) {
+		err = ib_trx_release(ib_trx);
+		return(err);
+	}
+
+	trx_commit(trx);
+
+	err = ib_trx_release(ib_trx);
+	ut_a(err == DB_SUCCESS);
+
+	return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Rollback a transaction. This function will also release the schema
+latches too.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_trx_rollback(
+/*============*/
+	ib_trx_t	ib_trx)		/*!< in: trx handle */
+{
+	ib_err_t	err;
+	trx_t*		trx = (trx_t*) ib_trx;
+
+	err = static_cast<ib_err_t>(trx_rollback_for_mysql(trx));
+
+        /* It should always succeed */
+        ut_a(err == DB_SUCCESS);
+
+	err = ib_trx_release(ib_trx);
+	ut_a(err == DB_SUCCESS);
+
+	ib_wake_master_thread();
+
+	return(err);
+}
+
+/*****************************************************************//**
+Find an index definition from the index vector using index name.
+@return	index def. if found else NULL */
+UNIV_INLINE
+const ib_index_def_t*
+ib_table_find_index(
+/*================*/
+	ib_vector_t*	indexes,	/*!< in: vector of indexes */
+	const char*	name)		/*!< in: index name */
+{
+	ulint		i;
+
+	for (i = 0; i < ib_vector_size(indexes); ++i) {
+		const ib_index_def_t*	index_def;
+
+		index_def = (ib_index_def_t*) ib_vector_get(indexes, i);
+
+		if (innobase_strcasecmp(name, index_def->name) == 0) {
+			return(index_def);
+		}
+	}
+
+	return(NULL);
+}
+
+/*****************************************************************//**
+Get the InnoDB internal precise type from the schema column definition.
+@return	precise type in api format */
+UNIV_INLINE
+ulint
+ib_col_get_prtype(
+/*==============*/
+	const ib_col_t*	ib_col)		/*!< in: column definition */
+{
+	ulint		prtype = 0;
+
+	if (ib_col->ib_col_attr & IB_COL_UNSIGNED) {
+		prtype |= DATA_UNSIGNED;
+
+		ut_a(ib_col->ib_col_type == IB_INT);
+	}
+
+	if (ib_col->ib_col_attr & IB_COL_NOT_NULL) {
+		prtype |= DATA_NOT_NULL;
+	}
+
+	return(prtype);
+}
+
+/*****************************************************************//**
+Get the InnoDB internal main type from the schema column definition.
+@return	column main type */
+UNIV_INLINE
+ulint
+ib_col_get_mtype(
+/*==============*/
+	const ib_col_t*	ib_col)		/*!< in: column definition */
+{
+	/* Note: The api0api.h types should map directly to
+	the internal numeric codes. */
+	return(ib_col->ib_col_type);
+}
+
+/*****************************************************************//**
+Find a column in the the column vector with the same name.
+@return	col. def. if found else NULL */
+UNIV_INLINE
+const ib_col_t*
+ib_table_find_col(
+/*==============*/
+	const ib_vector_t*	cols,	/*!< in: column list head */
+	const char*	name)		/*!< in: column name to find */
+{
+	ulint		i;
+
+	for (i = 0; i < ib_vector_size(cols); ++i) {
+		const ib_col_t*	ib_col;
+
+		ib_col =  static_cast<const ib_col_t*>(
+			ib_vector_get((ib_vector_t*) cols, i));
+
+		if (innobase_strcasecmp(ib_col->name, name) == 0) {
+			return(ib_col);
+		}
+	}
+
+	return(NULL);
+}
+
+/*****************************************************************//**
+Find a column in the the column list with the same name.
+@return	col. def. if found else NULL */
+UNIV_INLINE
+const ib_key_col_t*
+ib_index_find_col(
+/*==============*/
+	ib_vector_t*	cols,		/*!< in: column list head */
+	const char*	name)		/*!< in: column name to find */
+{
+	ulint		i;
+
+	for (i = 0; i < ib_vector_size(cols); ++i) {
+		const ib_key_col_t*	ib_col;
+
+		ib_col = static_cast<ib_key_col_t*>(ib_vector_get(cols, i));
+
+		if (innobase_strcasecmp(ib_col->name, name) == 0) {
+			return(ib_col);
+		}
+	}
+
+	return(NULL);
+}
+
+#ifdef __WIN__
+/*****************************************************************//**
+Convert a string to lower case. */
+static
+void
+ib_to_lower_case(
+/*=============*/
+	char*		ptr)		/*!< string to convert to lower case */
+{
+	while (*ptr) {
+		*ptr = tolower(*ptr);
+		++ptr;
+	}
+}
+#endif /* __WIN__ */
+
+/*****************************************************************//**
+Normalizes a table name string. A normalized name consists of the
+database name catenated to '/' and table name. An example:
+test/mytable. On Windows normalization puts both the database name and the
+table name always to lower case. This function can be called for system
+tables and they don't have a database component. For tables that don't have
+a database component, we don't normalize them to lower case on Windows.
+The assumption is that they are system tables that reside in the system
+table space. */
+static
+void
+ib_normalize_table_name(
+/*====================*/
+	char*		norm_name,	/*!< out: normalized name as a
+					null-terminated string */
+	const char*	name)		/*!< in: table name string */
+{
+	const char*	ptr = name;
+
+	/* Scan name from the end */
+
+	ptr += ut_strlen(name) - 1;
+
+	/* Find the start of the table name. */
+	while (ptr >= name && *ptr != '\\' && *ptr != '/' && ptr > name) {
+		--ptr;
+	}
+
+
+	/* For system tables there is no '/' or dbname. */
+	ut_a(ptr >= name);
+
+	if (ptr > name) {
+		const char*	db_name;
+		const char*	table_name;
+
+		table_name = ptr + 1;
+
+		--ptr;
+
+		while (ptr >= name && *ptr != '\\' && *ptr != '/') {
+			ptr--;
+		}
+
+		db_name = ptr + 1;
+
+		memcpy(norm_name, db_name,
+			ut_strlen(name) + 1 - (db_name - name));
+
+		norm_name[table_name - db_name - 1] = '/';
+#ifdef __WIN__
+		ib_to_lower_case(norm_name);
+#endif
+	} else {
+		ut_strcpy(norm_name, name);
+	}
+}
+
+/*****************************************************************//**
+Check whether the table name conforms to our requirements. Currently
+we only do a simple check for the presence of a '/'.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_table_name_check(
+/*================*/
+	const char*	name)		/*!< in: table name to check */
+{
+	const char*	slash = NULL;
+	ulint		len = ut_strlen(name);
+
+	if (len < 2
+	    || *name == '/'
+	    || name[len - 1] == '/'
+	    || (name[0] == '.' && name[1] == '/')
+	    || (name[0] == '.' && name[1] == '.' && name[2] == '/')) {
+
+		return(DB_DATA_MISMATCH);
+	}
+
+	for ( ; *name; ++name) {
+#ifdef __WIN__
+		/* Check for reserved characters in DOS filenames. */
+		switch (*name) {
+		case ':':
+		case '|':
+		case '"':
+		case '*':
+		case '<':
+		case '>':
+			return(DB_DATA_MISMATCH);
+		}
+#endif /* __WIN__ */
+		if (*name == '/') {
+			if (slash) {
+				return(DB_DATA_MISMATCH);
+			}
+			slash = name;
+		}
+	}
+
+	return(slash ? DB_SUCCESS : DB_DATA_MISMATCH);
+}
+
+
+
+/*****************************************************************//**
+Get an index definition that is tagged as a clustered index.
+@return	cluster index schema */
+UNIV_INLINE
+ib_index_def_t*
+ib_find_clustered_index(
+/*====================*/
+	ib_vector_t*	indexes)	/*!< in: index defs. to search */
+{
+	ulint		i;
+	ulint		n_indexes;
+
+	n_indexes = ib_vector_size(indexes);
+
+	for (i = 0; i < n_indexes; ++i) {
+		ib_index_def_t*	ib_index_def;
+
+		ib_index_def = static_cast<ib_index_def_t*>(
+			ib_vector_get(indexes, i));
+
+		if (ib_index_def->clustered) {
+			return(ib_index_def);
+		}
+	}
+
+	return(NULL);
+}
+
+/*****************************************************************//**
+Get a table id. The caller must have acquired the dictionary mutex.
+@return	DB_SUCCESS if found */
+static
+ib_err_t
+ib_table_get_id_low(
+/*================*/
+	const char*	table_name,	/*!< in: table to find */
+	ib_id_u64_t*	table_id)	/*!< out: table id if found */
+{
+	dict_table_t*	table;
+	ib_err_t	err = DB_TABLE_NOT_FOUND;
+
+	*table_id = 0;
+
+	table = ib_lookup_table_by_name(table_name);
+
+	if (table != NULL) {
+		*table_id = (table->id);
+
+		err = DB_SUCCESS;
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Create an internal cursor instance.
+@return	DB_SUCCESS or err code */
+static
+ib_err_t
+ib_create_cursor(
+/*=============*/
+	ib_crsr_t*	ib_crsr,	/*!< out: InnoDB cursor */
+	dict_table_t*	table,		/*!< in: table instance */
+	dict_index_t*	index,		/*!< in: index to use */
+	trx_t*		trx)		/*!< in: transaction */
+{
+	mem_heap_t*	heap;
+	ib_cursor_t*	cursor;
+	ib_err_t	err = DB_SUCCESS;
+
+	heap = mem_heap_create(sizeof(*cursor) * 2);
+
+	if (heap != NULL) {
+		row_prebuilt_t*	prebuilt;
+
+		cursor = static_cast<ib_cursor_t*>(
+			 mem_heap_zalloc(heap, sizeof(*cursor)));
+
+		cursor->heap = heap;
+
+		cursor->query_heap = mem_heap_create(64);
+
+		if (cursor->query_heap == NULL) {
+			mem_heap_free(heap);
+
+			return(DB_OUT_OF_MEMORY);
+		}
+
+		cursor->prebuilt = row_create_prebuilt(table, 0);
+
+		prebuilt = cursor->prebuilt;
+
+		prebuilt->trx = trx;
+
+		cursor->valid_trx = TRUE;
+
+		prebuilt->table = table;
+		prebuilt->select_lock_type = LOCK_NONE;
+		prebuilt->innodb_api = TRUE;
+
+		prebuilt->index = index;
+
+		ut_a(prebuilt->index != NULL);
+
+		if (prebuilt->trx != NULL) {
+			++prebuilt->trx->n_mysql_tables_in_use;
+
+			 prebuilt->index_usable =
+				row_merge_is_index_usable(
+					prebuilt->trx, prebuilt->index);
+
+			/* Assign a read view if the transaction does
+			not have it yet */
+
+			trx_assign_read_view(prebuilt->trx);
+		}
+
+		*ib_crsr = (ib_crsr_t) cursor;
+	} else {
+		err = DB_OUT_OF_MEMORY;
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Create an internal cursor instance, and set prebuilt->index to index
+with supplied index_id.
+@return	DB_SUCCESS or err code */
+static
+ib_err_t
+ib_create_cursor_with_index_id(
+/*===========================*/
+	ib_crsr_t*	ib_crsr,	/*!< out: InnoDB cursor */
+	dict_table_t*	table,		/*!< in: table instance */
+	ib_id_u64_t	index_id,	/*!< in: index id or 0 */
+	trx_t*		trx)		/*!< in: transaction */
+{
+	dict_index_t*	index;
+
+	if (index_id != 0) {
+		mutex_enter(&dict_sys->mutex);
+		index = dict_index_find_on_id_low(index_id);
+		mutex_exit(&dict_sys->mutex);
+	} else {
+		index = dict_table_get_first_index(table);
+	}
+
+	return(ib_create_cursor(ib_crsr, table, index, trx));
+}
+
+/*****************************************************************//**
+Open an InnoDB table and return a cursor handle to it.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_open_table_using_id(
+/*==========================*/
+	ib_id_u64_t	table_id,	/*!< in: table id of table to open */
+	ib_trx_t	ib_trx,		/*!< in: Current transaction handle
+					can be NULL */
+	ib_crsr_t*	ib_crsr)	/*!< out,own: InnoDB cursor */
+{
+	ib_err_t	err;
+	dict_table_t*	table;
+
+	if (ib_trx == NULL || !ib_schema_lock_is_exclusive(ib_trx)) {
+		table = ib_open_table_by_id(table_id, FALSE);
+	} else {
+		table = ib_open_table_by_id(table_id, TRUE);
+	}
+
+	if (table == NULL) {
+
+		return(DB_TABLE_NOT_FOUND);
+	}
+
+	err = ib_create_cursor_with_index_id(ib_crsr, table, 0,
+					     (trx_t*) ib_trx);
+
+	return(err);
+}
+
+/*****************************************************************//**
+Open an InnoDB index and return a cursor handle to it.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_open_index_using_id(
+/*==========================*/
+	ib_id_u64_t	index_id,	/*!< in: index id of index to open */
+	ib_trx_t	ib_trx,		/*!< in: Current transaction handle
+					can be NULL */
+	ib_crsr_t*	ib_crsr)	/*!< out: InnoDB cursor */
+{
+	ib_err_t	err;
+	dict_table_t*	table;
+	ulint		table_id = (ulint)( index_id >> 32);
+
+	if (ib_trx == NULL || !ib_schema_lock_is_exclusive(ib_trx)) {
+		table = ib_open_table_by_id(table_id, FALSE);
+	} else {
+		table = ib_open_table_by_id(table_id, TRUE);
+	}
+
+	if (table == NULL) {
+
+		return(DB_TABLE_NOT_FOUND);
+	}
+
+	/* We only return the lower 32 bits of the dulint. */
+	err = ib_create_cursor_with_index_id(
+		ib_crsr, table, index_id, (trx_t*) ib_trx);
+
+	if (ib_crsr != NULL) {
+		const ib_cursor_t*	cursor;
+
+		cursor = *(ib_cursor_t**) ib_crsr;
+
+		if (cursor->prebuilt->index == NULL) {
+			ib_err_t	crsr_err;
+
+			crsr_err = ib_cursor_close(*ib_crsr);
+			ut_a(crsr_err == DB_SUCCESS);
+
+			*ib_crsr = NULL;
+		}
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Open an InnoDB secondary index cursor and return a cursor handle to it.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_open_index_using_name(
+/*============================*/
+	ib_crsr_t	ib_open_crsr,	/*!< in: open/active cursor */
+	const char*	index_name,	/*!< in: secondary index name */
+	ib_crsr_t*	ib_crsr,	/*!< out,own: InnoDB index cursor */
+	int*		idx_type,	/*!< out: index is cluster index */
+	ib_id_u64_t*	idx_id)		/*!< out: index id */
+{
+	dict_table_t*	table;
+	dict_index_t*	index;
+	index_id_t	index_id = 0;
+	ib_err_t	err = DB_TABLE_NOT_FOUND;
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_open_crsr;
+
+	*idx_type = 0;
+	*idx_id = 0;
+	*ib_crsr = NULL;
+
+	/* We want to increment the ref count, so we do a redundant search. */
+	table = dict_table_open_on_id(cursor->prebuilt->table->id,
+				      FALSE, FALSE);
+	ut_a(table != NULL);
+
+	/* The first index is always the cluster index. */
+	index = dict_table_get_first_index(table);
+
+	/* Traverse the user defined indexes. */
+	while (index != NULL) {
+		if (innobase_strcasecmp(index->name, index_name) == 0) {
+			index_id = index->id;
+			*idx_type = index->type;
+			*idx_id = index_id;
+			break;
+		}
+		index = UT_LIST_GET_NEXT(indexes, index);
+	}
+
+	if (!index_id) {
+		dict_table_close(table, FALSE, FALSE);
+		return(DB_ERROR);
+	}
+
+	if (index_id > 0) {
+		ut_ad(index->id == index_id);
+		err = ib_create_cursor(
+			ib_crsr, table, index, cursor->prebuilt->trx);
+	}
+
+	if (*ib_crsr != NULL) {
+		const ib_cursor_t*	cursor;
+
+		cursor = *(ib_cursor_t**) ib_crsr;
+
+		if (cursor->prebuilt->index == NULL) {
+			err = ib_cursor_close(*ib_crsr);
+			ut_a(err == DB_SUCCESS);
+			*ib_crsr = NULL;
+		}
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Open an InnoDB table and return a cursor handle to it.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_open_table(
+/*=================*/
+	const char*	name,		/*!< in: table name */
+	ib_trx_t	ib_trx,		/*!< in: Current transaction handle
+					can be NULL */
+	ib_crsr_t*	ib_crsr)	/*!< out,own: InnoDB cursor */
+{
+	ib_err_t	err;
+	dict_table_t*	table;
+	char*		normalized_name;
+
+	normalized_name = static_cast<char*>(mem_alloc(ut_strlen(name) + 1));
+	ib_normalize_table_name(normalized_name, name);
+
+	if (ib_trx != NULL) {
+	       if (!ib_schema_lock_is_exclusive(ib_trx)) {
+			table = (dict_table_t*)ib_open_table_by_name(
+				normalized_name);
+		} else {
+			/* NOTE: We do not acquire MySQL metadata lock */
+			table = ib_lookup_table_by_name(normalized_name);
+		}
+	} else {
+		table = (dict_table_t*)ib_open_table_by_name(normalized_name);
+	}
+
+	mem_free(normalized_name);
+	normalized_name = NULL;
+
+	/* It can happen that another thread has created the table but
+	not the cluster index or it's a broken table definition. Refuse to
+	open if that's the case. */
+	if (table != NULL && dict_table_get_first_index(table) == NULL) {
+		table = NULL;
+	}
+
+	if (table != NULL) {
+		err = ib_create_cursor_with_index_id(ib_crsr, table, 0,
+						     (trx_t*) ib_trx);
+	} else {
+		err = DB_TABLE_NOT_FOUND;
+	}
+
+	return(err);
+}
+
+/********************************************************************//**
+Free a context struct for a table handle. */
+static
+void
+ib_qry_proc_free(
+/*=============*/
+	ib_qry_proc_t*	q_proc)		/*!< in, own: qproc struct */
+{
+	que_graph_free_recursive(q_proc->grph.ins);
+	que_graph_free_recursive(q_proc->grph.upd);
+	que_graph_free_recursive(q_proc->grph.sel);
+
+	memset(q_proc, 0x0, sizeof(*q_proc));
+}
+
+/*****************************************************************//**
+set a cursor trx to NULL */
+UNIV_INTERN
+void
+ib_cursor_clear_trx(
+/*================*/
+	ib_crsr_t	ib_crsr)	/*!< in/out: InnoDB cursor */
+{
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+
+	cursor->prebuilt->trx = NULL;
+}
+
+/*****************************************************************//**
+Reset the cursor.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_reset(
+/*============*/
+	ib_crsr_t	ib_crsr)	/*!< in/out: InnoDB cursor */
+{
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	row_prebuilt_t*	prebuilt = cursor->prebuilt;
+
+	if (cursor->valid_trx && prebuilt->trx != NULL
+	    && prebuilt->trx->n_mysql_tables_in_use > 0) {
+
+		--prebuilt->trx->n_mysql_tables_in_use;
+	}
+
+	/* The fields in this data structure are allocated from
+	the query heap and so need to be reset too. */
+	ib_qry_proc_free(&cursor->q_proc);
+
+	mem_heap_empty(cursor->query_heap);
+
+	return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+update the cursor with new transactions and also reset the cursor
+@return	DB_SUCCESS or err code */
+ib_err_t
+ib_cursor_new_trx(
+/*==============*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor */
+	ib_trx_t	ib_trx)		/*!< in: transaction */
+{
+	ib_err_t        err = DB_SUCCESS;
+	ib_cursor_t*    cursor = (ib_cursor_t*) ib_crsr;
+	trx_t*          trx = (trx_t*) ib_trx;
+
+	row_prebuilt_t*	prebuilt = cursor->prebuilt;
+
+	row_update_prebuilt_trx(prebuilt, trx);
+
+	cursor->valid_trx = TRUE;
+
+	trx_assign_read_view(prebuilt->trx);
+
+        ib_qry_proc_free(&cursor->q_proc);
+
+        mem_heap_empty(cursor->query_heap);
+
+	return(err);
+}
+
+/*****************************************************************//**
+Commit the transaction in a cursor
+@return	DB_SUCCESS or err code */
+ib_err_t
+ib_cursor_commit_trx(
+/*=================*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor */
+	ib_trx_t	ib_trx)		/*!< in: transaction */
+{
+	ib_err_t        err = DB_SUCCESS;
+	ib_cursor_t*    cursor = (ib_cursor_t*) ib_crsr;
+	row_prebuilt_t*	prebuilt = cursor->prebuilt;
+
+	ut_ad(prebuilt->trx == (trx_t*) ib_trx);
+	err = ib_trx_commit(ib_trx);
+	prebuilt->trx = NULL;
+	cursor->valid_trx = FALSE;
+	return(err);
+}
+
+/*****************************************************************//**
+Close an InnoDB table and free the cursor.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_close(
+/*============*/
+	ib_crsr_t	ib_crsr)	/*!< in,own: InnoDB cursor */
+{
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	row_prebuilt_t*	prebuilt;
+	trx_t*		trx;
+
+	if (!cursor) {
+		return(DB_SUCCESS);
+	}
+
+	prebuilt = cursor->prebuilt;
+	trx = prebuilt->trx;
+
+	ib_qry_proc_free(&cursor->q_proc);
+
+	/* The transaction could have been detached from the cursor. */
+	if (cursor->valid_trx && trx != NULL
+	    && trx->n_mysql_tables_in_use > 0) {
+		--trx->n_mysql_tables_in_use;
+	}
+
+	row_prebuilt_free(prebuilt, FALSE);
+	cursor->prebuilt = NULL;
+
+	mem_heap_free(cursor->query_heap);
+	mem_heap_free(cursor->heap);
+	cursor = NULL;
+
+	return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Close the table, decrement n_ref_count count.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_close_table(
+/*==================*/
+	ib_crsr_t	ib_crsr)	/*!< in,own: InnoDB cursor */
+{
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	row_prebuilt_t*	prebuilt = cursor->prebuilt;
+
+	if (prebuilt && prebuilt->table) {
+		dict_table_close(prebuilt->table, FALSE, FALSE);
+	}
+
+	return(DB_SUCCESS);
+}
+/**********************************************************************//**
+Run the insert query and do error handling.
+@return	DB_SUCCESS or error code */
+UNIV_INLINE
+ib_err_t
+ib_insert_row_with_lock_retry(
+/*==========================*/
+	que_thr_t*	thr,		/*!< in: insert query graph */
+	ins_node_t*	node,		/*!< in: insert node for the query */
+	trx_savept_t*	savept)		/*!< in: savepoint to rollback to
+					in case of an error */
+{
+	trx_t*		trx;
+	ib_err_t	err;
+	ib_bool_t	lock_wait;
+
+	trx = thr_get_trx(thr);
+
+	do {
+		thr->run_node = node;
+		thr->prev_node = node;
+
+		row_ins_step(thr);
+
+		err = trx->error_state;
+
+		if (err != DB_SUCCESS) {
+			que_thr_stop_for_mysql(thr);
+
+			thr->lock_state = QUE_THR_LOCK_ROW;
+			lock_wait = ib_handle_errors(&err, trx, thr, savept);
+			thr->lock_state = QUE_THR_LOCK_NOLOCK;
+		} else {
+			lock_wait = FALSE;
+		}
+	} while (lock_wait);
+
+	return(err);
+}
+
+/*****************************************************************//**
+Write a row.
+@return	DB_SUCCESS or err code */
+static
+ib_err_t
+ib_execute_insert_query_graph(
+/*==========================*/
+	dict_table_t*	table,		/*!< in: table where to insert */
+	que_fork_t*	ins_graph,	/*!< in: query graph */
+	ins_node_t*	node)		/*!< in: insert node */
+{
+	trx_t*		trx;
+	que_thr_t*	thr;
+	trx_savept_t	savept;
+	ib_err_t	err = DB_SUCCESS;
+
+	trx = ins_graph->trx;
+
+	savept = trx_savept_take(trx);
+
+	thr = que_fork_get_first_thr(ins_graph);
+
+	que_thr_move_to_run_state_for_mysql(thr, trx);
+
+	err = ib_insert_row_with_lock_retry(thr, node, &savept);
+
+	if (err == DB_SUCCESS) {
+		que_thr_stop_for_mysql_no_error(thr, trx);
+
+		dict_table_n_rows_inc(table);
+
+		srv_stats.n_rows_inserted.inc();
+	}
+
+	trx->op_info = "";
+
+	return(err);
+}
+
+/*****************************************************************//**
+Create an insert query graph node. */
+static
+void
+ib_insert_query_graph_create(
+/*==========================*/
+	ib_cursor_t*	cursor)		/*!< in: Cursor instance */
+{
+	ib_qry_proc_t*	q_proc = &cursor->q_proc;
+	ib_qry_node_t*	node = &q_proc->node;
+	trx_t*		trx = cursor->prebuilt->trx;
+
+	ut_a(trx->state != TRX_STATE_NOT_STARTED);
+
+	if (node->ins == NULL) {
+		dtuple_t*	row;
+		ib_qry_grph_t*	grph = &q_proc->grph;
+		mem_heap_t*	heap = cursor->query_heap;
+		dict_table_t*	table = cursor->prebuilt->table;
+
+		node->ins = ins_node_create(INS_DIRECT, table, heap);
+
+		node->ins->select = NULL;
+		node->ins->values_list = NULL;
+
+		row = dtuple_create(heap, dict_table_get_n_cols(table));
+		dict_table_copy_types(row, table);
+
+		ins_node_set_new_row(node->ins, row);
+
+		grph->ins = static_cast<que_fork_t*>(
+			que_node_get_parent(
+				pars_complete_graph_for_exec(node->ins, trx,
+							     heap)));
+
+		grph->ins->state = QUE_FORK_ACTIVE;
+	}
+}
+
+/*****************************************************************//**
+Insert a row to a table.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_insert_row(
+/*=================*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor instance */
+	const ib_tpl_t	ib_tpl)		/*!< in: tuple to insert */
+{
+	ib_ulint_t	i;
+	ib_qry_node_t*	node;
+	ib_qry_proc_t*	q_proc;
+	ulint		n_fields;
+	dtuple_t*	dst_dtuple;
+	ib_err_t	err = DB_SUCCESS;
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	const ib_tuple_t* src_tuple = (const ib_tuple_t*) ib_tpl;
+
+	ib_insert_query_graph_create(cursor);
+
+	ut_ad(src_tuple->type == TPL_TYPE_ROW);
+
+	q_proc = &cursor->q_proc;
+	node = &q_proc->node;
+
+	node->ins->state = INS_NODE_ALLOC_ROW_ID;
+	dst_dtuple = node->ins->row;
+
+	n_fields = dtuple_get_n_fields(src_tuple->ptr);
+	ut_ad(n_fields == dtuple_get_n_fields(dst_dtuple));
+
+	/* Do a shallow copy of the data fields and check for NULL
+	constraints on columns. */
+	for (i = 0; i < n_fields; i++) {
+		ulint		mtype;
+		dfield_t*	src_field;
+		dfield_t*	dst_field;
+
+		src_field = dtuple_get_nth_field(src_tuple->ptr, i);
+
+		mtype = dtype_get_mtype(dfield_get_type(src_field));
+
+		/* Don't touch the system columns. */
+		if (mtype != DATA_SYS) {
+			ulint	prtype;
+
+			prtype = dtype_get_prtype(dfield_get_type(src_field));
+
+			if ((prtype & DATA_NOT_NULL)
+			    && dfield_is_null(src_field)) {
+
+				err = DB_DATA_MISMATCH;
+				break;
+			}
+
+			dst_field = dtuple_get_nth_field(dst_dtuple, i);
+			ut_ad(mtype
+			      == dtype_get_mtype(dfield_get_type(dst_field)));
+
+			/* Do a shallow copy. */
+			dfield_set_data(
+				dst_field, src_field->data, src_field->len);
+
+			if (dst_field->len != IB_SQL_NULL) {
+				UNIV_MEM_ASSERT_RW(dst_field->data,
+						   dst_field->len);
+			}
+		}
+	}
+
+	if (err == DB_SUCCESS) {
+		err = ib_execute_insert_query_graph(
+			src_tuple->index->table, q_proc->grph.ins, node->ins);
+	}
+
+	return(err);
+}
+
+/*********************************************************************//**
+Gets pointer to a prebuilt update vector used in updates.
+@return	update vector */
+UNIV_INLINE
+upd_t*
+ib_update_vector_create(
+/*====================*/
+	ib_cursor_t*	cursor)		/*!< in: current cursor */
+{
+	trx_t*		trx = cursor->prebuilt->trx;
+	mem_heap_t*	heap = cursor->query_heap;
+	dict_table_t*	table = cursor->prebuilt->table;
+	ib_qry_proc_t*	q_proc = &cursor->q_proc;
+	ib_qry_grph_t*	grph = &q_proc->grph;
+	ib_qry_node_t*	node = &q_proc->node;
+
+	ut_a(trx->state != TRX_STATE_NOT_STARTED);
+
+	if (node->upd == NULL) {
+		node->upd = static_cast<upd_node_t*>(
+			row_create_update_node_for_mysql(table, heap));
+	}
+
+	grph->upd = static_cast<que_fork_t*>(
+		que_node_get_parent(
+			pars_complete_graph_for_exec(node->upd, trx, heap)));
+
+	grph->upd->state = QUE_FORK_ACTIVE;
+
+	return(node->upd->update);
+}
+
+/**********************************************************************//**
+Note that a column has changed. */
+static
+void
+ib_update_col(
+/*==========*/
+
+	ib_cursor_t*	cursor,		/*!< in: current cursor */
+	upd_field_t*	upd_field,	/*!< in/out: update field */
+	ulint		col_no,		/*!< in: column number */
+	dfield_t*	dfield)		/*!< in: updated dfield */
+{
+	ulint		data_len;
+	dict_table_t*	table = cursor->prebuilt->table;
+	dict_index_t*	index = dict_table_get_first_index(table);
+
+	data_len = dfield_get_len(dfield);
+
+	if (data_len == UNIV_SQL_NULL) {
+		dfield_set_null(&upd_field->new_val);
+	} else {
+		dfield_copy_data(&upd_field->new_val, dfield);
+	}
+
+	upd_field->exp = NULL;
+
+	upd_field->orig_len = 0;
+
+	upd_field->field_no = dict_col_get_clust_pos(
+		&table->cols[col_no], index);
+}
+
+/**********************************************************************//**
+Checks which fields have changed in a row and stores the new data
+to an update vector.
+@return	DB_SUCCESS or err code */
+static
+ib_err_t
+ib_calc_diff(
+/*=========*/
+	ib_cursor_t*	cursor,		/*!< in: current cursor */
+	upd_t*		upd,		/*!< in/out: update vector */
+	const ib_tuple_t*old_tuple,	/*!< in: Old tuple in table */
+	const ib_tuple_t*new_tuple)	/*!< in: New tuple to update */
+{
+	ulint		i;
+	ulint		n_changed = 0;
+	ib_err_t	err = DB_SUCCESS;
+	ulint		n_fields = dtuple_get_n_fields(new_tuple->ptr);
+
+	ut_a(old_tuple->type == TPL_TYPE_ROW);
+	ut_a(new_tuple->type == TPL_TYPE_ROW);
+	ut_a(old_tuple->index->table == new_tuple->index->table);
+
+	for (i = 0; i < n_fields; ++i) {
+		ulint		mtype;
+		ulint		prtype;
+		upd_field_t*	upd_field;
+		dfield_t*	new_dfield;
+		dfield_t*	old_dfield;
+
+		new_dfield = dtuple_get_nth_field(new_tuple->ptr, i);
+		old_dfield = dtuple_get_nth_field(old_tuple->ptr, i);
+
+		mtype = dtype_get_mtype(dfield_get_type(old_dfield));
+		prtype = dtype_get_prtype(dfield_get_type(old_dfield));
+
+		/* Skip the system columns */
+		if (mtype == DATA_SYS) {
+			continue;
+
+		} else if ((prtype & DATA_NOT_NULL)
+			   && dfield_is_null(new_dfield)) {
+
+			err = DB_DATA_MISMATCH;
+			break;
+		}
+
+		if (dfield_get_len(new_dfield) != dfield_get_len(old_dfield)
+		    || (!dfield_is_null(old_dfield)
+		        && memcmp(dfield_get_data(new_dfield),
+			      dfield_get_data(old_dfield),
+			      dfield_get_len(old_dfield)) != 0)) {
+
+			upd_field = &upd->fields[n_changed];
+
+			ib_update_col(cursor, upd_field, i, new_dfield);
+
+			++n_changed;
+		}
+	}
+
+	if (err == DB_SUCCESS) {
+		upd->info_bits = 0;
+		upd->n_fields = n_changed;
+	}
+
+	return(err);
+}
+
+/**********************************************************************//**
+Run the update query and do error handling.
+@return	DB_SUCCESS or error code */
+UNIV_INLINE
+ib_err_t
+ib_update_row_with_lock_retry(
+/*==========================*/
+	que_thr_t*	thr,		/*!< in: Update query graph */
+	upd_node_t*	node,		/*!< in: Update node for the query */
+	trx_savept_t*	savept)		/*!< in: savepoint to rollback to
+					in case of an error */
+
+{
+	trx_t*		trx;
+	ib_err_t	err;
+	ib_bool_t	lock_wait;
+
+	trx = thr_get_trx(thr);
+
+	do {
+		thr->run_node = node;
+		thr->prev_node = node;
+
+		row_upd_step(thr);
+
+		err = trx->error_state;
+
+		if (err != DB_SUCCESS) {
+			que_thr_stop_for_mysql(thr);
+
+			if (err != DB_RECORD_NOT_FOUND) {
+				thr->lock_state = QUE_THR_LOCK_ROW;
+
+				lock_wait = ib_handle_errors(
+					&err, trx, thr, savept);
+
+				thr->lock_state = QUE_THR_LOCK_NOLOCK;
+			} else {
+				lock_wait = FALSE;
+			}
+		} else {
+			lock_wait = FALSE;
+		}
+	} while (lock_wait);
+
+	return(err);
+}
+
+/*********************************************************************//**
+Does an update or delete of a row.
+@return	DB_SUCCESS or err code */
+UNIV_INLINE
+ib_err_t
+ib_execute_update_query_graph(
+/*==========================*/
+	ib_cursor_t*	cursor,		/*!< in: Cursor instance */
+	btr_pcur_t*	pcur)		/*!< in: Btree persistent cursor */
+{
+	ib_err_t	err;
+	que_thr_t*	thr;
+	upd_node_t*	node;
+	trx_savept_t	savept;
+	trx_t*		trx = cursor->prebuilt->trx;
+	dict_table_t*	table = cursor->prebuilt->table;
+	ib_qry_proc_t*	q_proc = &cursor->q_proc;
+
+	/* The transaction must be running. */
+	ut_a(trx->state != TRX_STATE_NOT_STARTED);
+
+	node = q_proc->node.upd;
+
+	ut_a(dict_index_is_clust(pcur->btr_cur.index));
+	btr_pcur_copy_stored_position(node->pcur, pcur);
+
+	ut_a(node->pcur->rel_pos == BTR_PCUR_ON);
+
+	savept = trx_savept_take(trx);
+
+	thr = que_fork_get_first_thr(q_proc->grph.upd);
+
+	node->state = UPD_NODE_UPDATE_CLUSTERED;
+
+	que_thr_move_to_run_state_for_mysql(thr, trx);
+
+	err = ib_update_row_with_lock_retry(thr, node, &savept);
+
+	if (err == DB_SUCCESS) {
+
+		que_thr_stop_for_mysql_no_error(thr, trx);
+
+		if (node->is_delete) {
+
+			dict_table_n_rows_dec(table);
+
+			srv_stats.n_rows_deleted.inc();
+		} else {
+			srv_stats.n_rows_updated.inc();
+		}
+
+	} else if (err == DB_RECORD_NOT_FOUND) {
+		trx->error_state = DB_SUCCESS;
+	}
+
+	trx->op_info = "";
+
+	return(err);
+}
+
+/*****************************************************************//**
+Update a row in a table.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_update_row(
+/*=================*/
+	ib_crsr_t	ib_crsr,	/*!< in: InnoDB cursor instance */
+	const ib_tpl_t	ib_old_tpl,	/*!< in: Old tuple in table */
+	const ib_tpl_t	ib_new_tpl)	/*!< in: New tuple to update */
+{
+	upd_t*		upd;
+	ib_err_t	err;
+	btr_pcur_t*	pcur;
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	row_prebuilt_t*	prebuilt = cursor->prebuilt;
+	const ib_tuple_t*old_tuple = (const ib_tuple_t*) ib_old_tpl;
+	const ib_tuple_t*new_tuple = (const ib_tuple_t*) ib_new_tpl;
+
+	if (dict_index_is_clust(prebuilt->index)) {
+		pcur = &cursor->prebuilt->pcur;
+	} else if (prebuilt->need_to_access_clustered) {
+		pcur = &cursor->prebuilt->clust_pcur;
+	} else {
+		return(DB_ERROR);
+	}
+
+	ut_a(old_tuple->type == TPL_TYPE_ROW);
+	ut_a(new_tuple->type == TPL_TYPE_ROW);
+
+	upd = ib_update_vector_create(cursor);
+
+	err = ib_calc_diff(cursor, upd, old_tuple, new_tuple);
+
+	if (err == DB_SUCCESS) {
+		/* Note that this is not a delete. */
+		cursor->q_proc.node.upd->is_delete = FALSE;
+
+		err = ib_execute_update_query_graph(cursor, pcur);
+	}
+
+	return(err);
+}
+
+/**********************************************************************//**
+Build the update query graph to delete a row from an index.
+@return	DB_SUCCESS or err code */
+static
+ib_err_t
+ib_delete_row(
+/*==========*/
+	ib_cursor_t*	cursor,		/*!< in: current cursor */
+	btr_pcur_t*	pcur,		/*!< in: Btree persistent cursor */
+	const rec_t*	rec)		/*!< in: record to delete */
+{
+	ulint		i;
+	upd_t*		upd;
+	ib_err_t	err;
+	ib_tuple_t*	tuple;
+	ib_tpl_t	ib_tpl;
+	ulint		n_cols;
+	upd_field_t*	upd_field;
+	ib_bool_t	page_format;
+	dict_table_t*	table = cursor->prebuilt->table;
+	dict_index_t*	index = dict_table_get_first_index(table);
+
+	n_cols = dict_index_get_n_ordering_defined_by_user(index);
+	ib_tpl = ib_key_tuple_new(index, n_cols);
+
+	if (!ib_tpl) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	tuple = (ib_tuple_t*) ib_tpl;
+
+	upd = ib_update_vector_create(cursor);
+
+	page_format = dict_table_is_comp(index->table);
+	ib_read_tuple(rec, page_format, tuple);
+
+	upd->n_fields = ib_tuple_get_n_cols(ib_tpl);
+
+	for (i = 0; i < upd->n_fields; ++i) {
+		dfield_t*	dfield;
+
+		upd_field = &upd->fields[i];
+		dfield = dtuple_get_nth_field(tuple->ptr, i);
+
+		dfield_copy_data(&upd_field->new_val, dfield);
+
+		upd_field->exp = NULL;
+
+		upd_field->orig_len = 0;
+
+		upd->info_bits = 0;
+
+		upd_field->field_no = dict_col_get_clust_pos(
+			&table->cols[i], index);
+	}
+
+	/* Note that this is a delete. */
+	cursor->q_proc.node.upd->is_delete = TRUE;
+
+	err = ib_execute_update_query_graph(cursor, pcur);
+
+	ib_tuple_delete(ib_tpl);
+
+	return(err);
+}
+
+/*****************************************************************//**
+Delete a row in a table.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_delete_row(
+/*=================*/
+	ib_crsr_t	ib_crsr)	/*!< in: InnoDB cursor instance */
+{
+	ib_err_t	err;
+	btr_pcur_t*	pcur;
+	dict_index_t*	index;
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	row_prebuilt_t*	prebuilt = cursor->prebuilt;
+
+	index = dict_table_get_first_index(prebuilt->index->table);
+
+	/* Check whether this is a secondary index cursor */
+	if (index != prebuilt->index) {
+		if (prebuilt->need_to_access_clustered) {
+			pcur = &prebuilt->clust_pcur;
+		} else {
+			return(DB_ERROR);
+		}
+	} else {
+		pcur = &prebuilt->pcur;
+	}
+
+	if (ib_btr_cursor_is_positioned(pcur)) {
+		const rec_t*	rec;
+		ib_bool_t	page_format;
+		mtr_t		mtr;
+
+		page_format = dict_table_is_comp(index->table);
+
+		mtr_start(&mtr);
+
+		if (btr_pcur_restore_position(
+			BTR_SEARCH_LEAF, pcur, &mtr)) {
+
+			rec = btr_pcur_get_rec(pcur);
+		} else {
+			rec = NULL;
+		}
+
+		mtr_commit(&mtr);
+
+		if (rec && !rec_get_deleted_flag(rec, page_format)) {
+			err = ib_delete_row(cursor, pcur, rec);
+		} else {
+			err = DB_RECORD_NOT_FOUND;
+		}
+	} else {
+		err = DB_RECORD_NOT_FOUND;
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Read current row.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_read_row(
+/*===============*/
+	ib_crsr_t	ib_crsr,	/*!< in: InnoDB cursor instance */
+	ib_tpl_t	ib_tpl)		/*!< out: read cols into this tuple */
+{
+	ib_err_t	err;
+	ib_tuple_t*	tuple = (ib_tuple_t*) ib_tpl;
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+
+	ut_a(cursor->prebuilt->trx->state != TRX_STATE_NOT_STARTED);
+
+	/* When searching with IB_EXACT_MATCH set, row_search_for_mysql()
+	will not position the persistent cursor but will copy the record
+	found into the row cache. It should be the only entry. */
+	if (!ib_cursor_is_positioned(ib_crsr) ) {
+		err = DB_RECORD_NOT_FOUND;
+	} else {
+		mtr_t		mtr;
+		btr_pcur_t*	pcur;
+		row_prebuilt_t*	prebuilt = cursor->prebuilt;
+
+		if (prebuilt->need_to_access_clustered
+		    && tuple->type == TPL_TYPE_ROW) {
+			pcur = &prebuilt->clust_pcur;
+		} else {
+			pcur = &prebuilt->pcur;
+		}
+
+		if (pcur == NULL) {
+			return(DB_ERROR);
+		}
+
+		mtr_start(&mtr);
+
+		if (btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr)) {
+			const rec_t*	rec;
+			ib_bool_t	page_format;
+
+			page_format = dict_table_is_comp(tuple->index->table);
+			rec = btr_pcur_get_rec(pcur);
+
+			if (prebuilt->innodb_api_rec &&
+			    prebuilt->innodb_api_rec != rec) {
+				rec = prebuilt->innodb_api_rec;
+			}
+
+			if (!rec_get_deleted_flag(rec, page_format)) {
+				ib_read_tuple(rec, page_format, tuple);
+				err = DB_SUCCESS;
+			} else{
+				err = DB_RECORD_NOT_FOUND;
+			}
+
+		} else {
+			err = DB_RECORD_NOT_FOUND;
+		}
+
+		mtr_commit(&mtr);
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Move cursor to the first record in the table.
+@return	DB_SUCCESS or err code */
+UNIV_INLINE
+ib_err_t
+ib_cursor_position(
+/*===============*/
+	ib_cursor_t*	cursor,		/*!< in: InnoDB cursor instance */
+	ib_srch_mode_t	mode)		/*!< in: Search mode */
+{
+	ib_err_t	err;
+	row_prebuilt_t*	prebuilt = cursor->prebuilt;
+	unsigned char*	buf;
+
+	buf = static_cast<unsigned char*>(mem_alloc(UNIV_PAGE_SIZE));
+
+	/* We want to position at one of the ends, row_search_for_mysql()
+	uses the search_tuple fields to work out what to do. */
+	dtuple_set_n_fields(prebuilt->search_tuple, 0);
+
+	err = static_cast<ib_err_t>(row_search_for_mysql(
+		buf, mode, prebuilt, 0, 0));
+
+	mem_free(buf);
+
+	return(err);
+}
+
+/*****************************************************************//**
+Move cursor to the first record in the table.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_first(
+/*============*/
+	ib_crsr_t	ib_crsr)	/*!< in: InnoDB cursor instance */
+{
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+
+	return(ib_cursor_position(cursor, IB_CUR_G));
+}
+
+/*****************************************************************//**
+Move cursor to the last record in the table.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_last(
+/*===========*/
+	ib_crsr_t	ib_crsr)	/*!< in: InnoDB cursor instance */
+{
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+
+	return(ib_cursor_position(cursor, IB_CUR_L));
+}
+
+/*****************************************************************//**
+Move cursor to the next user record in the table.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_next(
+/*===========*/
+        ib_crsr_t       ib_crsr)        /*!< in: InnoDB cursor instance */
+{
+        ib_err_t	err;
+        ib_cursor_t*    cursor = (ib_cursor_t*) ib_crsr;
+        row_prebuilt_t* prebuilt = cursor->prebuilt;
+	byte		buf[UNIV_PAGE_SIZE_MAX];
+
+        /* We want to move to the next record */
+        dtuple_set_n_fields(prebuilt->search_tuple, 0);
+
+        err = static_cast<ib_err_t>(row_search_for_mysql(
+		buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT));
+
+        return(err);
+}
+
+/*****************************************************************//**
+Search for key.
+@return	DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_moveto(
+/*=============*/
+	ib_crsr_t	ib_crsr,	/*!< in: InnoDB cursor instance */
+	ib_tpl_t	ib_tpl,		/*!< in: Key to search for */
+	ib_srch_mode_t	ib_srch_mode)	/*!< in: search mode */
+{
+	ulint		i;
+	ulint		n_fields;
+	ib_err_t	err = DB_SUCCESS;
+	ib_tuple_t*	tuple = (ib_tuple_t*) ib_tpl;
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	row_prebuilt_t*	prebuilt = cursor->prebuilt;
+	dtuple_t*	search_tuple = prebuilt->search_tuple;
+	unsigned char*	buf;
+
+	ut_a(tuple->type == TPL_TYPE_KEY);
+
+	n_fields = dict_index_get_n_ordering_defined_by_user(prebuilt->index);
+
+	dtuple_set_n_fields(search_tuple, n_fields);
+	dtuple_set_n_fields_cmp(search_tuple, n_fields);
+
+	/* Do a shallow copy */
+	for (i = 0; i < n_fields; ++i) {
+		dfield_copy(dtuple_get_nth_field(search_tuple, i),
+			    dtuple_get_nth_field(tuple->ptr, i));
+	}
+
+	ut_a(prebuilt->select_lock_type <= LOCK_NUM);
+
+	prebuilt->innodb_api_rec = NULL;
+
+	buf = static_cast<unsigned char*>(mem_alloc(UNIV_PAGE_SIZE));
+
+	err = static_cast<ib_err_t>(row_search_for_mysql(
+		buf, ib_srch_mode, prebuilt, cursor->match_mode, 0));
+
+	mem_free(buf);
+
+	return(err);
+}
+
+/*****************************************************************//**
+Set the cursor search mode. */
+UNIV_INTERN
+void
+ib_cursor_set_match_mode(
+/*=====================*/
+	ib_crsr_t	ib_crsr,	/*!< in: Cursor instance */
+	ib_match_mode_t	match_mode)	/*!< in: ib_cursor_moveto match mode */
+{
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+
+	cursor->match_mode = match_mode;
+}
+
+/*****************************************************************//**
+Get the dfield instance for the column in the tuple.
+@return	dfield instance in tuple */
+UNIV_INLINE
+dfield_t*
+ib_col_get_dfield(
+/*==============*/
+	ib_tuple_t*	tuple,		/*!< in: tuple instance */
+	ulint		col_no)		/*!< in: col no. in tuple */
+{
+	dfield_t*	dfield;
+
+	dfield = dtuple_get_nth_field(tuple->ptr, col_no);
+
+	return(dfield);
+}
+
+/*****************************************************************//**
+Predicate to check whether a column type contains variable length data.
+@return	DB_SUCCESS or error code */
+UNIV_INLINE
+ib_err_t
+ib_col_is_capped(
+/*==============*/
+	const dtype_t*  dtype)		/*!< in: column type */
+{
+	return(static_cast<ib_err_t>(
+		(dtype_get_mtype(dtype) == DATA_VARCHAR
+		|| dtype_get_mtype(dtype) == DATA_CHAR
+		|| dtype_get_mtype(dtype) == DATA_MYSQL
+		|| dtype_get_mtype(dtype) == DATA_VARMYSQL
+		|| dtype_get_mtype(dtype) == DATA_FIXBINARY
+		|| dtype_get_mtype(dtype) == DATA_BINARY)
+	       && dtype_get_len(dtype) > 0));
+}
+
+/*****************************************************************//**
+Set a column of the tuple. Make a copy using the tuple's heap.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_col_set_value(
+/*=============*/
+	ib_tpl_t	ib_tpl,		/*!< in: tuple instance */
+	ib_ulint_t	col_no,		/*!< in: column index in tuple */
+	const void*	src,		/*!< in: data value */
+	ib_ulint_t	len)		/*!< in: data value len */
+{
+	const dtype_t*  dtype;
+	dfield_t*	dfield;
+	void*		dst = NULL;
+	ib_tuple_t*	tuple = (ib_tuple_t*) ib_tpl;
+
+	dfield = ib_col_get_dfield(tuple, col_no);
+
+	/* User wants to set the column to NULL. */
+	if (len == IB_SQL_NULL) {
+		dfield_set_null(dfield);
+		return(DB_SUCCESS);
+	}
+
+	dtype = dfield_get_type(dfield);
+
+	/* Not allowed to update system columns. */
+	if (dtype_get_mtype(dtype) == DATA_SYS) {
+		return(DB_DATA_MISMATCH);
+	}
+
+	dst = dfield_get_data(dfield);
+
+	/* Since TEXT/CLOB also map to DATA_VARCHAR we need to make an
+	exception. Perhaps we need to set the precise type and check
+	for that. */
+	if (ib_col_is_capped(dtype)) {
+
+		len = ut_min(len, dtype_get_len(dtype));
+
+		if (dst == NULL || len > dfield_get_len(dfield)) {
+			dst = mem_heap_alloc(tuple->heap, dtype_get_len(dtype));
+			ut_a(dst != NULL);
+		}
+	} else if (dst == NULL || len > dfield_get_len(dfield)) {
+		dst = mem_heap_alloc(tuple->heap, len);
+	}
+
+	if (dst == NULL) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	switch (dtype_get_mtype(dtype)) {
+	case DATA_INT: {
+
+		if (dtype_get_len(dtype) == len) {
+			ibool		usign;
+
+			usign = dtype_get_prtype(dtype) & DATA_UNSIGNED;
+			mach_write_int_type(static_cast<byte*>(dst),
+					    static_cast<const byte*>(src),
+					    len, usign);
+
+		} else {
+			return(DB_DATA_MISMATCH);
+		}
+		break;
+	}
+
+	case DATA_FLOAT:
+		if (len == sizeof(float)) {
+			mach_float_write(static_cast<byte*>(dst), *(float*)src);
+		} else {
+			return(DB_DATA_MISMATCH);
+		}
+		break;
+
+	case DATA_DOUBLE:
+		if (len == sizeof(double)) {
+			mach_double_write(static_cast<byte*>(dst),
+					  *(double*)src);
+		} else {
+			return(DB_DATA_MISMATCH);
+		}
+		break;
+
+	case DATA_SYS:
+		ut_error;
+		break;
+
+	case DATA_CHAR: {
+		ulint	pad_char = ULINT_UNDEFINED;
+
+		pad_char = dtype_get_pad_char(
+			dtype_get_mtype(dtype),	dtype_get_prtype(dtype));
+
+		ut_a(pad_char != ULINT_UNDEFINED);
+
+		memset((byte*) dst + len,
+		       pad_char,
+		       dtype_get_len(dtype) - len);
+
+		memcpy(dst, src, len);
+
+		len = dtype_get_len(dtype);
+		break;
+	}
+	case DATA_BLOB:
+	case DATA_BINARY:
+	case DATA_MYSQL:
+	case DATA_DECIMAL:
+	case DATA_VARCHAR:
+	case DATA_VARMYSQL:
+	case DATA_FIXBINARY:
+		memcpy(dst, src, len);
+		break;
+
+	default:
+		ut_error;
+	}
+
+	if (dst != dfield_get_data(dfield)) {
+		dfield_set_data(dfield, dst, len);
+	} else {
+		dfield_set_len(dfield, len);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Get the size of the data available in a column of the tuple.
+@return	bytes avail or IB_SQL_NULL */
+UNIV_INTERN
+ib_ulint_t
+ib_col_get_len(
+/*===========*/
+	ib_tpl_t	ib_tpl,		/*!< in: tuple instance */
+	ib_ulint_t	i)		/*!< in: column index in tuple */
+{
+	const dfield_t*		dfield;
+	ulint			data_len;
+	ib_tuple_t*		tuple = (ib_tuple_t*) ib_tpl;
+
+	dfield = ib_col_get_dfield(tuple, i);
+
+	data_len = dfield_get_len(dfield);
+
+	return(data_len == UNIV_SQL_NULL ? IB_SQL_NULL : data_len);
+}
+
+/*****************************************************************//**
+Copy a column value from the tuple.
+@return	bytes copied or IB_SQL_NULL */
+UNIV_INLINE
+ib_ulint_t
+ib_col_copy_value_low(
+/*==================*/
+	ib_tpl_t	ib_tpl,		/*!< in: tuple instance */
+	ib_ulint_t	i,		/*!< in: column index in tuple */
+	void*		dst,		/*!< out: copied data value */
+	ib_ulint_t	len)		/*!< in: max data value len to copy */
+{
+	const void*	data;
+	const dfield_t*	dfield;
+	ulint		data_len;
+	ib_tuple_t*	tuple = (ib_tuple_t*) ib_tpl;
+
+	dfield = ib_col_get_dfield(tuple, i);
+
+	data = dfield_get_data(dfield);
+	data_len = dfield_get_len(dfield);
+
+	if (data_len != UNIV_SQL_NULL) {
+
+		const dtype_t*  dtype = dfield_get_type(dfield);
+
+		switch (dtype_get_mtype(dfield_get_type(dfield))) {
+		case DATA_INT: {
+			ibool		usign;
+			ullint		ret;
+
+			ut_a(data_len == len);
+
+			usign = dtype_get_prtype(dtype) & DATA_UNSIGNED;
+			ret = mach_read_int_type(static_cast<const byte*>(data),
+						 data_len, usign);
+
+			if (usign) {
+				if (len == 2) {
+					*(ib_i16_t*)dst = (ib_i16_t)ret;
+				} else if (len == 4) {
+					*(ib_i32_t*)dst = (ib_i32_t)ret;
+				} else {
+					*(ib_i64_t*)dst = (ib_i64_t)ret;
+				}
+			} else {
+				if (len == 2) {
+					*(ib_u16_t*)dst = (ib_i16_t)ret;
+				} else if (len == 4) {
+					*(ib_u32_t*)dst = (ib_i32_t)ret;
+				} else {
+					*(ib_u64_t*)dst = (ib_i64_t)ret;
+				}
+			}
+
+			break;
+		}
+		case DATA_FLOAT:
+			if (len == data_len) {
+				float	f;
+
+				ut_a(data_len == sizeof(f));
+				f = mach_float_read(static_cast<const byte*>(
+					data));
+				memcpy(dst, &f, sizeof(f));
+			} else {
+				data_len = 0;
+			}
+			break;
+		case DATA_DOUBLE:
+			if (len == data_len) {
+				double	d;
+
+				ut_a(data_len == sizeof(d));
+				d = mach_double_read(static_cast<const byte*>(
+					data));
+				memcpy(dst, &d, sizeof(d));
+			} else {
+				data_len = 0;
+			}
+			break;
+		default:
+			data_len = ut_min(data_len, len);
+			memcpy(dst, data, data_len);
+		}
+	} else {
+		data_len = IB_SQL_NULL;
+	}
+
+	return(data_len);
+}
+
+/*****************************************************************//**
+Copy a column value from the tuple.
+@return	bytes copied or IB_SQL_NULL */
+UNIV_INTERN
+ib_ulint_t
+ib_col_copy_value(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: tuple instance */
+	ib_ulint_t	i,		/*!< in: column index in tuple */
+	void*		dst,		/*!< out: copied data value */
+	ib_ulint_t	len)		/*!< in: max data value len to copy */
+{
+	return(ib_col_copy_value_low(ib_tpl, i, dst, len));
+}
+
+/*****************************************************************//**
+Get the InnoDB column attribute from the internal column precise type.
+@return	precise type in api format */
+UNIV_INLINE
+ib_col_attr_t
+ib_col_get_attr(
+/*============*/
+	ulint		prtype)		/*!< in: column definition */
+{
+	ib_col_attr_t	attr = IB_COL_NONE;
+
+	if (prtype & DATA_UNSIGNED) {
+		attr = static_cast<ib_col_attr_t>(attr | IB_COL_UNSIGNED);
+	}
+
+	if (prtype & DATA_NOT_NULL) {
+		attr = static_cast<ib_col_attr_t>(attr | IB_COL_NOT_NULL);
+	}
+
+	return(attr);
+}
+
+/*****************************************************************//**
+Get a column name from the tuple.
+@return	name of the column */
+UNIV_INTERN
+const char*
+ib_col_get_name(
+/*============*/
+	ib_crsr_t       ib_crsr,        /*!< in: InnoDB cursor instance */
+	ib_ulint_t	i)		/*!< in: column index in tuple */
+{
+	const char*	name;
+	ib_cursor_t*    cursor = (ib_cursor_t*) ib_crsr;
+	dict_table_t*	table = cursor->prebuilt->table;
+	dict_col_t*     col = dict_table_get_nth_col(table, i);
+	ulint           col_no = dict_col_get_no(col);
+
+	name = dict_table_get_col_name(table, col_no);
+
+	return(name);
+}
+
+/*****************************************************************//**
+Get an index field name from the cursor.
+@return	name of the field */
+UNIV_INTERN
+const char*
+ib_get_idx_field_name(
+/*==================*/
+	ib_crsr_t       ib_crsr,        /*!< in: InnoDB cursor instance */
+	ib_ulint_t	i)		/*!< in: column index in tuple */
+{
+	ib_cursor_t*    cursor = (ib_cursor_t*) ib_crsr;
+	dict_index_t*	index = cursor->prebuilt->index;
+	dict_field_t* 	field;
+
+	if (index) {
+		field = dict_index_get_nth_field(cursor->prebuilt->index, i);
+
+		if (field) {
+			return(field->name);
+		}
+	}
+
+	return(NULL);
+}
+
+/*****************************************************************//**
+Get a column type, length and attributes from the tuple.
+@return	len of column data */
+UNIV_INLINE
+ib_ulint_t
+ib_col_get_meta_low(
+/*================*/
+	ib_tpl_t	ib_tpl,		/*!< in: tuple instance */
+	ib_ulint_t	i,		/*!< in: column index in tuple */
+	ib_col_meta_t*	ib_col_meta)	/*!< out: column meta data */
+{
+	ib_u16_t	prtype;
+	const dfield_t*	dfield;
+	ulint		data_len;
+	ib_tuple_t*	tuple = (ib_tuple_t*) ib_tpl;
+
+	dfield = ib_col_get_dfield(tuple, i);
+
+	data_len = dfield_get_len(dfield);
+
+	/* We assume 1-1 mapping between the ENUM and internal type codes. */
+	ib_col_meta->type = static_cast<ib_col_type_t>(
+		dtype_get_mtype(dfield_get_type(dfield)));
+
+	ib_col_meta->type_len = dtype_get_len(dfield_get_type(dfield));
+
+	prtype = (ib_u16_t) dtype_get_prtype(dfield_get_type(dfield));
+
+	ib_col_meta->attr = ib_col_get_attr(prtype);
+	ib_col_meta->client_type = prtype & DATA_MYSQL_TYPE_MASK;
+
+	return(data_len);
+}
+
+/*************************************************************//**
+Read a signed int 8 bit column from an InnoDB tuple. */
+UNIV_INLINE
+ib_err_t
+ib_tuple_check_int(
+/*===============*/
+	ib_tpl_t		ib_tpl,	/*!< in: InnoDB tuple */
+	ib_ulint_t		i,	/*!< in: column number */
+	ib_bool_t		usign,	/*!< in: true if unsigned */
+	ulint			size)	/*!< in: size of integer */
+{
+	ib_col_meta_t		ib_col_meta;
+
+	ib_col_get_meta_low(ib_tpl, i, &ib_col_meta);
+
+	if (ib_col_meta.type != IB_INT) {
+		return(DB_DATA_MISMATCH);
+	} else if (ib_col_meta.type_len == IB_SQL_NULL) {
+		return(DB_UNDERFLOW);
+	} else if (ib_col_meta.type_len != size) {
+		return(DB_DATA_MISMATCH);
+	} else if ((ib_col_meta.attr & IB_COL_UNSIGNED) && !usign) {
+		return(DB_DATA_MISMATCH);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/*************************************************************//**
+Read a signed int 8 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_i8(
+/*=============*/
+	ib_tpl_t		ib_tpl,	/*!< in: InnoDB tuple */
+	ib_ulint_t		i,	/*!< in: column number */
+	ib_i8_t*		ival)	/*!< out: integer value */
+{
+	ib_err_t		err;
+
+	err = ib_tuple_check_int(ib_tpl, i, IB_FALSE, sizeof(*ival));
+
+	if (err == DB_SUCCESS) {
+		ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+	}
+
+	return(err);
+}
+
+/*************************************************************//**
+Read an unsigned int 8 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_u8(
+/*=============*/
+	ib_tpl_t		ib_tpl,	/*!< in: InnoDB tuple */
+	ib_ulint_t		i,	/*!< in: column number */
+	ib_u8_t*		ival)	/*!< out: integer value */
+{
+	ib_err_t		err;
+
+	err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
+
+	if (err == DB_SUCCESS) {
+		ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+	}
+
+	return(err);
+}
+
+/*************************************************************//**
+Read a signed int 16 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_i16(
+/*==============*/
+	ib_tpl_t		ib_tpl,	/*!< in: InnoDB tuple */
+	ib_ulint_t		i,	/*!< in: column number */
+	ib_i16_t*		ival)	/*!< out: integer value */
+{
+	ib_err_t		err;
+
+	err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival));
+
+	if (err == DB_SUCCESS) {
+		ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+	}
+
+	return(err);
+}
+
+/*************************************************************//**
+Read an unsigned int 16 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_u16(
+/*==============*/
+	ib_tpl_t		ib_tpl,	/*!< in: InnoDB tuple */
+	ib_ulint_t		i,	/*!< in: column number */
+	ib_u16_t*		ival)	/*!< out: integer value */
+{
+	ib_err_t		err;
+
+	err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
+
+	if (err == DB_SUCCESS) {
+		ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+	}
+
+	return(err);
+}
+
+/*************************************************************//**
+Read a signed int 32 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_i32(
+/*==============*/
+	ib_tpl_t		ib_tpl,	/*!< in: InnoDB tuple */
+	ib_ulint_t		i,	/*!< in: column number */
+	ib_i32_t*		ival)	/*!< out: integer value */
+{
+	ib_err_t		err;
+
+	err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival));
+
+	if (err == DB_SUCCESS) {
+		ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+	}
+
+	return(err);
+}
+
+/*************************************************************//**
+Read an unsigned int 32 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_u32(
+/*==============*/
+	ib_tpl_t		ib_tpl,	/*!< in: InnoDB tuple */
+	ib_ulint_t		i,	/*!< in: column number */
+	ib_u32_t*		ival)	/*!< out: integer value */
+{
+	ib_err_t		err;
+
+	err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
+
+	if (err == DB_SUCCESS) {
+		ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+	}
+
+	return(err);
+}
+
+/*************************************************************//**
+Read a signed int 64 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_i64(
+/*==============*/
+	ib_tpl_t		ib_tpl,	/*!< in: InnoDB tuple */
+	ib_ulint_t		i,	/*!< in: column number */
+	ib_i64_t*		ival)	/*!< out: integer value */
+{
+	ib_err_t		err;
+
+	err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival));
+
+	if (err == DB_SUCCESS) {
+		ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+	}
+
+	return(err);
+}
+
+/*************************************************************//**
+Read an unsigned int 64 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_u64(
+/*==============*/
+	ib_tpl_t		ib_tpl,	/*!< in: InnoDB tuple */
+	ib_ulint_t		i,	/*!< in: column number */
+	ib_u64_t*		ival)	/*!< out: integer value */
+{
+	ib_err_t		err;
+
+	err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
+
+	if (err == DB_SUCCESS) {
+		ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Get a column value pointer from the tuple.
+@return	NULL or pointer to buffer */
+UNIV_INTERN
+const void*
+ib_col_get_value(
+/*=============*/
+	ib_tpl_t	ib_tpl,		/*!< in: tuple instance */
+	ib_ulint_t	i)		/*!< in: column index in tuple */
+{
+	const void*	data;
+	const dfield_t*	dfield;
+	ulint		data_len;
+	ib_tuple_t*	tuple = (ib_tuple_t*) ib_tpl;
+
+	dfield = ib_col_get_dfield(tuple, i);
+
+	data = dfield_get_data(dfield);
+	data_len = dfield_get_len(dfield);
+
+	return(data_len != UNIV_SQL_NULL ? data : NULL);
+}
+
+/*****************************************************************//**
+Get a column type, length and attributes from the tuple.
+@return	len of column data */
+UNIV_INTERN
+ib_ulint_t
+ib_col_get_meta(
+/*============*/
+	ib_tpl_t	ib_tpl,		/*!< in: tuple instance */
+	ib_ulint_t	i,		/*!< in: column index in tuple */
+	ib_col_meta_t*	ib_col_meta)	/*!< out: column meta data */
+{
+	return(ib_col_get_meta_low(ib_tpl, i, ib_col_meta));
+}
+
+/*****************************************************************//**
+"Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple.
+@return	new tuple, or NULL */
+UNIV_INTERN
+ib_tpl_t
+ib_tuple_clear(
+/*============*/
+	ib_tpl_t	ib_tpl)		/*!< in,own: tuple (will be freed) */
+{
+	const dict_index_t*	index;
+	ulint			n_cols;
+	ib_tuple_t*		tuple	= (ib_tuple_t*) ib_tpl;
+	ib_tuple_type_t		type	= tuple->type;
+	mem_heap_t*		heap	= tuple->heap;
+
+	index = tuple->index;
+	n_cols = dtuple_get_n_fields(tuple->ptr);
+
+	mem_heap_empty(heap);
+
+	if (type == TPL_TYPE_ROW) {
+		return(ib_row_tuple_new_low(index, n_cols, heap));
+	} else {
+		return(ib_key_tuple_new_low(index, n_cols, heap));
+	}
+}
+
+/*****************************************************************//**
+Create a new cluster key search tuple and copy the contents of  the
+secondary index key tuple columns that refer to the cluster index record
+to the cluster key. It does a deep copy of the column data.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_tuple_get_cluster_key(
+/*=====================*/
+	ib_crsr_t	ib_crsr,	/*!< in: secondary index cursor */
+	ib_tpl_t*	ib_dst_tpl,	/*!< out,own: destination tuple */
+	const ib_tpl_t	ib_src_tpl)	/*!< in: source tuple */
+{
+	ulint		i;
+	ulint		n_fields;
+	ib_err_t	err = DB_SUCCESS;
+	ib_tuple_t*	dst_tuple = NULL;
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	ib_tuple_t*	src_tuple = (ib_tuple_t*) ib_src_tpl;
+	dict_index_t*	clust_index;
+
+	clust_index = dict_table_get_first_index(cursor->prebuilt->table);
+
+	/* We need to ensure that the src tuple belongs to the same table
+	as the open cursor and that it's not a tuple for a cluster index. */
+	if (src_tuple->type != TPL_TYPE_KEY) {
+		return(DB_ERROR);
+	} else if (src_tuple->index->table != cursor->prebuilt->table) {
+		return(DB_DATA_MISMATCH);
+	} else if (src_tuple->index == clust_index) {
+		return(DB_ERROR);
+	}
+
+	/* Create the cluster index key search tuple. */
+	*ib_dst_tpl = ib_clust_search_tuple_create(ib_crsr);
+
+	if (!*ib_dst_tpl) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	dst_tuple = (ib_tuple_t*) *ib_dst_tpl;
+	ut_a(dst_tuple->index == clust_index);
+
+	n_fields = dict_index_get_n_unique(dst_tuple->index);
+
+	/* Do a deep copy of the data fields. */
+	for (i = 0; i < n_fields; i++) {
+		ulint		pos;
+		dfield_t*	src_field;
+		dfield_t*	dst_field;
+
+		pos = dict_index_get_nth_field_pos(
+			src_tuple->index, dst_tuple->index, i);
+
+		ut_a(pos != ULINT_UNDEFINED);
+
+		src_field = dtuple_get_nth_field(src_tuple->ptr, pos);
+		dst_field = dtuple_get_nth_field(dst_tuple->ptr, i);
+
+		if (!dfield_is_null(src_field)) {
+			UNIV_MEM_ASSERT_RW(src_field->data, src_field->len);
+
+			dst_field->data = mem_heap_dup(
+				dst_tuple->heap,
+				src_field->data,
+				src_field->len);
+
+			dst_field->len = src_field->len;
+		} else {
+			dfield_set_null(dst_field);
+		}
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Copy the contents of  source tuple to destination tuple. The tuples
+must be of the same type and belong to the same table/index.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_tuple_copy(
+/*==========*/
+	ib_tpl_t	ib_dst_tpl,	/*!< in: destination tuple */
+	const ib_tpl_t	ib_src_tpl)	/*!< in: source tuple */
+{
+	ulint		i;
+	ulint		n_fields;
+	ib_err_t	err = DB_SUCCESS;
+	const ib_tuple_t*src_tuple = (const ib_tuple_t*) ib_src_tpl;
+	ib_tuple_t*	dst_tuple = (ib_tuple_t*) ib_dst_tpl;
+
+	/* Make sure src and dst are not the same. */
+	ut_a(src_tuple != dst_tuple);
+
+	/* Make sure they are the same type and refer to the same index. */
+	if (src_tuple->type != dst_tuple->type
+	   || src_tuple->index != dst_tuple->index) {
+
+		return(DB_DATA_MISMATCH);
+	}
+
+	n_fields = dtuple_get_n_fields(src_tuple->ptr);
+	ut_ad(n_fields == dtuple_get_n_fields(dst_tuple->ptr));
+
+	/* Do a deep copy of the data fields. */
+	for (i = 0; i < n_fields; ++i) {
+		dfield_t*	src_field;
+		dfield_t*	dst_field;
+
+		src_field = dtuple_get_nth_field(src_tuple->ptr, i);
+		dst_field = dtuple_get_nth_field(dst_tuple->ptr, i);
+
+		if (!dfield_is_null(src_field)) {
+			UNIV_MEM_ASSERT_RW(src_field->data, src_field->len);
+
+			dst_field->data = mem_heap_dup(
+				dst_tuple->heap,
+				src_field->data,
+				src_field->len);
+
+			dst_field->len = src_field->len;
+		} else {
+			dfield_set_null(dst_field);
+		}
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Create an InnoDB tuple used for index/table search.
+@return	own: Tuple for current index */
+UNIV_INTERN
+ib_tpl_t
+ib_sec_search_tuple_create(
+/*=======================*/
+	ib_crsr_t	ib_crsr)	/*!< in: Cursor instance */
+{
+	ulint		n_cols;
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	dict_index_t*	index = cursor->prebuilt->index;
+
+	n_cols = dict_index_get_n_unique_in_tree(index);
+	return(ib_key_tuple_new(index, n_cols));
+}
+
+/*****************************************************************//**
+Create an InnoDB tuple used for index/table search.
+@return	own: Tuple for current index */
+UNIV_INTERN
+ib_tpl_t
+ib_sec_read_tuple_create(
+/*=====================*/
+	ib_crsr_t	ib_crsr)	/*!< in: Cursor instance */
+{
+	ulint		n_cols;
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	dict_index_t*	index = cursor->prebuilt->index;
+
+	n_cols = dict_index_get_n_fields(index);
+	return(ib_row_tuple_new(index, n_cols));
+}
+
+/*****************************************************************//**
+Create an InnoDB tuple used for table key operations.
+@return	own: Tuple for current table */
+UNIV_INTERN
+ib_tpl_t
+ib_clust_search_tuple_create(
+/*=========================*/
+	ib_crsr_t	ib_crsr)	/*!< in: Cursor instance */
+{
+	ulint		n_cols;
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	dict_index_t*	index;
+
+	index = dict_table_get_first_index(cursor->prebuilt->table);
+
+	n_cols = dict_index_get_n_ordering_defined_by_user(index);
+	return(ib_key_tuple_new(index, n_cols));
+}
+
+/*****************************************************************//**
+Create an InnoDB tuple for table row operations.
+@return	own: Tuple for current table */
+UNIV_INTERN
+ib_tpl_t
+ib_clust_read_tuple_create(
+/*=======================*/
+	ib_crsr_t	ib_crsr)	/*!< in: Cursor instance */
+{
+	ulint		n_cols;
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	dict_index_t*	index;
+
+	index = dict_table_get_first_index(cursor->prebuilt->table);
+
+	n_cols = dict_table_get_n_cols(cursor->prebuilt->table);
+	return(ib_row_tuple_new(index, n_cols));
+}
+
+/*****************************************************************//**
+Return the number of user columns in the tuple definition.
+@return	number of user columns */
+UNIV_INTERN
+ib_ulint_t
+ib_tuple_get_n_user_cols(
+/*=====================*/
+	const ib_tpl_t	ib_tpl)		/*!< in: Tuple for current table */
+{
+	const ib_tuple_t*	tuple = (const ib_tuple_t*) ib_tpl;
+
+	if (tuple->type == TPL_TYPE_ROW) {
+		return(dict_table_get_n_user_cols(tuple->index->table));
+	}
+
+	return(dict_index_get_n_ordering_defined_by_user(tuple->index));
+}
+
+/*****************************************************************//**
+Return the number of columns in the tuple definition.
+@return	number of columns */
+UNIV_INTERN
+ib_ulint_t
+ib_tuple_get_n_cols(
+/*================*/
+	const ib_tpl_t	ib_tpl)		/*!< in: Tuple for table/index */
+{
+	const ib_tuple_t*	tuple = (const ib_tuple_t*) ib_tpl;
+
+	return(dtuple_get_n_fields(tuple->ptr));
+}
+
+/*****************************************************************//**
+Destroy an InnoDB tuple. */
+UNIV_INTERN
+void
+ib_tuple_delete(
+/*============*/
+	ib_tpl_t	ib_tpl)		/*!< in,own: Tuple instance to delete */
+{
+	ib_tuple_t*	tuple = (ib_tuple_t*) ib_tpl;
+
+	if (!ib_tpl) {
+		return;
+	}
+
+	mem_heap_free(tuple->heap);
+}
+
+/*****************************************************************//**
+Get a table id. This function will acquire the dictionary mutex.
+@return	DB_SUCCESS if found */
+UNIV_INTERN
+ib_err_t
+ib_table_get_id(
+/*============*/
+	const char*	table_name,	/*!< in: table to find */
+	ib_id_u64_t*	table_id)	/*!< out: table id if found */
+{
+	ib_err_t	err;
+
+	dict_mutex_enter_for_mysql();
+
+	err = ib_table_get_id_low(table_name, table_id);
+
+	dict_mutex_exit_for_mysql();
+
+	return(err);
+}
+
+/*****************************************************************//**
+Get an index id.
+@return	DB_SUCCESS if found */
+UNIV_INTERN
+ib_err_t
+ib_index_get_id(
+/*============*/
+	const char*	table_name,	/*!< in: find index for this table */
+	const char*	index_name,	/*!< in: index to find */
+	ib_id_u64_t*	index_id)	/*!< out: index id if found */
+{
+	dict_table_t*	table;
+	char*		normalized_name;
+	ib_err_t	err = DB_TABLE_NOT_FOUND;
+
+	*index_id = 0;
+
+	normalized_name = static_cast<char*>(
+		mem_alloc(ut_strlen(table_name) + 1));
+	ib_normalize_table_name(normalized_name, table_name);
+
+	table = ib_lookup_table_by_name(normalized_name);
+
+	mem_free(normalized_name);
+	normalized_name = NULL;
+
+	if (table != NULL) {
+		dict_index_t*	index;
+
+		index = dict_table_get_index_on_name(table, index_name);
+
+		if (index != NULL) {
+			/* We only support 32 bit table and index ids. Because
+			we need to pack the table id into the index id. */
+
+			*index_id = (table->id);
+			*index_id <<= 32;
+			*index_id |= (index->id);
+
+			err = DB_SUCCESS;
+		}
+	}
+
+	return(err);
+}
+
+#ifdef __WIN__
+#define SRV_PATH_SEPARATOR      '\\'
+#else
+#define SRV_PATH_SEPARATOR      '/'
+#endif
+
+
+/*****************************************************************//**
+Check if cursor is positioned.
+@return	IB_TRUE if positioned */
+UNIV_INTERN
+ib_bool_t
+ib_cursor_is_positioned(
+/*====================*/
+	const ib_crsr_t	ib_crsr)	/*!< in: InnoDB cursor instance */
+{
+	const ib_cursor_t*	cursor = (const ib_cursor_t*) ib_crsr;
+	row_prebuilt_t*		prebuilt = cursor->prebuilt;
+
+	return(ib_btr_cursor_is_positioned(&prebuilt->pcur));
+}
+
+
+/*****************************************************************//**
+Checks if the data dictionary is latched in exclusive mode.
+@return	TRUE if exclusive latch */
+UNIV_INTERN
+ib_bool_t
+ib_schema_lock_is_exclusive(
+/*========================*/
+	const ib_trx_t	ib_trx)		/*!< in: transaction */
+{
+	const trx_t*	trx = (const trx_t*) ib_trx;
+
+	return(trx->dict_operation_lock_mode == RW_X_LATCH);
+}
+
+/*****************************************************************//**
+Checks if the data dictionary is latched in shared mode.
+@return	TRUE if shared latch */
+UNIV_INTERN
+ib_bool_t
+ib_schema_lock_is_shared(
+/*=====================*/
+	const ib_trx_t	ib_trx)		/*!< in: transaction */
+{
+	const trx_t*	trx = (const trx_t*) ib_trx;
+
+	return(trx->dict_operation_lock_mode == RW_S_LATCH);
+}
+
+/*****************************************************************//**
+Set the Lock an InnoDB cursor/table.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_lock(
+/*===========*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor */
+	ib_lck_mode_t	ib_lck_mode)	/*!< in: InnoDB lock mode */
+{
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	row_prebuilt_t*	prebuilt = cursor->prebuilt;
+	trx_t*		trx = prebuilt->trx;
+	dict_table_t*	table = prebuilt->table;
+
+	return(ib_trx_lock_table_with_retry(
+		trx, table, (enum lock_mode) ib_lck_mode));
+}
+
+/*****************************************************************//**
+Set the Lock an InnoDB table using the table id.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_table_lock(
+/*==========*/
+	ib_trx_t	ib_trx,		/*!< in/out: transaction */
+	ib_id_u64_t	table_id,	/*!< in: table id */
+	ib_lck_mode_t	ib_lck_mode)	/*!< in: InnoDB lock mode */
+{
+	ib_err_t	err;
+	que_thr_t*	thr;
+	mem_heap_t*	heap;
+	dict_table_t*	table;
+	ib_qry_proc_t	q_proc;
+	trx_t*		trx = (trx_t*) ib_trx;
+
+	ut_a(trx->state != TRX_STATE_NOT_STARTED);
+
+	table = ib_open_table_by_id(table_id, FALSE);
+
+	if (table == NULL) {
+		return(DB_TABLE_NOT_FOUND);
+	}
+
+	ut_a(ib_lck_mode <= static_cast<ib_lck_mode_t>(LOCK_NUM));
+
+	heap = mem_heap_create(128);
+
+	q_proc.node.sel = sel_node_create(heap);
+
+	thr = pars_complete_graph_for_exec(q_proc.node.sel, trx, heap);
+
+	q_proc.grph.sel = static_cast<que_fork_t*>(que_node_get_parent(thr));
+	q_proc.grph.sel->state = QUE_FORK_ACTIVE;
+
+	trx->op_info = "setting table lock";
+
+	ut_a(ib_lck_mode == IB_LOCK_IS || ib_lck_mode == IB_LOCK_IX);
+	err = static_cast<ib_err_t>(
+		lock_table(0, table, (enum lock_mode) ib_lck_mode, thr));
+
+	trx->error_state = err;
+
+	mem_heap_free(heap);
+
+	return(err);
+}
+
+/*****************************************************************//**
+Unlock an InnoDB table.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_unlock(
+/*=============*/
+	ib_crsr_t	ib_crsr)	/*!< in/out: InnoDB cursor */
+{
+	ib_err_t	err = DB_SUCCESS;
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	row_prebuilt_t*	prebuilt = cursor->prebuilt;
+
+	if (prebuilt->trx->mysql_n_tables_locked > 0) {
+		--prebuilt->trx->mysql_n_tables_locked;
+	} else {
+		err = DB_ERROR;
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Set the Lock mode of the cursor.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_set_lock_mode(
+/*====================*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor */
+	ib_lck_mode_t	ib_lck_mode)	/*!< in: InnoDB lock mode */
+{
+	ib_err_t	err = DB_SUCCESS;
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	row_prebuilt_t*	prebuilt = cursor->prebuilt;
+
+	ut_a(ib_lck_mode <= static_cast<ib_lck_mode_t>(LOCK_NUM));
+
+	if (ib_lck_mode == IB_LOCK_X) {
+		err = ib_cursor_lock(ib_crsr, IB_LOCK_IX);
+	} else if (ib_lck_mode == IB_LOCK_S) {
+		err = ib_cursor_lock(ib_crsr, IB_LOCK_IS);
+	}
+
+	if (err == DB_SUCCESS) {
+		prebuilt->select_lock_type = (enum lock_mode) ib_lck_mode;
+		ut_a(prebuilt->trx->state != TRX_STATE_NOT_STARTED);
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Set need to access clustered index record. */
+UNIV_INTERN
+void
+ib_cursor_set_cluster_access(
+/*=========================*/
+	ib_crsr_t	ib_crsr)	/*!< in/out: InnoDB cursor */
+{
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+	row_prebuilt_t*	prebuilt = cursor->prebuilt;
+
+	prebuilt->need_to_access_clustered = TRUE;
+}
+
+/*************************************************************//**
+Convert and write an INT column value to an InnoDB tuple.
+@return	DB_SUCCESS or error */
+UNIV_INLINE
+ib_err_t
+ib_tuple_write_int(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	ulint		col_no,		/*!< in: column number */
+	const void*	value,		/*!< in: integer value */
+	ulint		value_len)	/*!< in: sizeof value type */
+{
+	const dfield_t*	dfield;
+	ulint		data_len;
+	ulint		type_len;
+	ib_tuple_t*	tuple = (ib_tuple_t*) ib_tpl;
+
+	ut_a(col_no < ib_tuple_get_n_cols(ib_tpl));
+
+	dfield = ib_col_get_dfield(tuple, col_no);
+
+	data_len = dfield_get_len(dfield);
+	type_len = dtype_get_len(dfield_get_type(dfield));
+
+	if (dtype_get_mtype(dfield_get_type(dfield)) != DATA_INT
+	    || value_len != data_len) {
+
+		return(DB_DATA_MISMATCH);
+	}
+
+	return(ib_col_set_value(ib_tpl, col_no, value, type_len));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_i8(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_i8_t		val)		/*!< in: value to write */
+{
+	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_i16(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_i16_t	val)		/*!< in: value to write */
+{
+	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_i32(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_i32_t	val)		/*!< in: value to write */
+{
+	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_i64(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_i64_t	val)		/*!< in: value to write */
+{
+	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_u8(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_u8_t		val)		/*!< in: value to write */
+{
+	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_u16(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tupe to write to */
+	int		col_no,		/*!< in: column number */
+	ib_u16_t	val)		/*!< in: value to write */
+{
+	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_u32(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_u32_t	val)		/*!< in: value to write */
+{
+	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_u64(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_u64_t	val)		/*!< in: value to write */
+{
+	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Inform the cursor that it's the start of an SQL statement. */
+UNIV_INTERN
+void
+ib_cursor_stmt_begin(
+/*=================*/
+	ib_crsr_t	ib_crsr)	/*!< in: cursor */
+{
+	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
+
+	cursor->prebuilt->sql_stat_start = TRUE;
+}
+
+/*****************************************************************//**
+Write a double value to a column.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_double(
+/*==================*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	double		val)		/*!< in: value to write */
+{
+	const dfield_t*	dfield;
+	ib_tuple_t*	tuple = (ib_tuple_t*) ib_tpl;
+
+	dfield = ib_col_get_dfield(tuple, col_no);
+
+	if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_DOUBLE) {
+		return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+	} else {
+		return(DB_DATA_MISMATCH);
+	}
+}
+
+/*************************************************************//**
+Read a double column value from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_double(
+/*=================*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	col_no,		/*!< in: column number */
+	double*		dval)		/*!< out: double value */
+{
+	ib_err_t	err;
+	const dfield_t*	dfield;
+	ib_tuple_t*	tuple = (ib_tuple_t*) ib_tpl;
+
+	dfield = ib_col_get_dfield(tuple, col_no);
+
+	if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_DOUBLE) {
+		ib_col_copy_value_low(ib_tpl, col_no, dval, sizeof(*dval));
+		err = DB_SUCCESS;
+	} else {
+		err = DB_DATA_MISMATCH;
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Write a float value to a column.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_float(
+/*=================*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	float		val)		/*!< in: value to write */
+{
+	const dfield_t*	dfield;
+	ib_tuple_t*	tuple = (ib_tuple_t*) ib_tpl;
+
+	dfield = ib_col_get_dfield(tuple, col_no);
+
+	if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_FLOAT) {
+		return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+	} else {
+		return(DB_DATA_MISMATCH);
+	}
+}
+
+/*************************************************************//**
+Read a float value from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_float(
+/*================*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	col_no,		/*!< in: column number */
+	float*		fval)		/*!< out: float value */
+{
+	ib_err_t	err;
+	const dfield_t*	dfield;
+	ib_tuple_t*	tuple = (ib_tuple_t*) ib_tpl;
+
+	dfield = ib_col_get_dfield(tuple, col_no);
+
+	if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_FLOAT) {
+		ib_col_copy_value_low(ib_tpl, col_no, fval, sizeof(*fval));
+		err = DB_SUCCESS;
+	} else {
+		err = DB_DATA_MISMATCH;
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Truncate a table. The cursor handle will be closed and set to NULL
+on success.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_truncate(
+/*===============*/
+	ib_crsr_t*	ib_crsr,	/*!< in/out: cursor for table
+					to truncate */
+	ib_id_u64_t*	table_id)	/*!< out: new table id */
+{
+	ib_err_t        err;
+	ib_cursor_t*    cursor = *(ib_cursor_t**) ib_crsr;
+	row_prebuilt_t* prebuilt = cursor->prebuilt;
+
+	*table_id = 0;
+
+	err = ib_cursor_lock(*ib_crsr, IB_LOCK_X);
+
+	if (err == DB_SUCCESS) {
+		trx_t*          trx;
+		dict_table_t*   table = prebuilt->table;
+
+		/* We are going to free the cursor and the prebuilt. Store
+		the transaction handle locally. */
+		trx = prebuilt->trx;
+		err = ib_cursor_close(*ib_crsr);
+		ut_a(err == DB_SUCCESS);
+
+		*ib_crsr = NULL;
+
+		/* A temp go around for assertion in trx_start_for_ddl_low
+		we already start the trx */
+		if (trx->state == TRX_STATE_ACTIVE) {
+#ifdef UNIV_DEBUG
+			trx->start_file = 0;
+#endif /* UNIV_DEBUG */
+			trx->dict_operation = TRX_DICT_OP_TABLE;
+		}
+
+		/* This function currently commits the transaction
+		on success. */
+		err = static_cast<ib_err_t>(
+			row_truncate_table_for_mysql(table, trx));
+
+		if (err == DB_SUCCESS) {
+			*table_id = (table->id);
+		}
+	}
+
+        return(err);
+}
+
+/*****************************************************************//**
+Truncate a table.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_table_truncate(
+/*==============*/
+	const char*	table_name,	/*!< in: table name */
+	ib_id_u64_t*	table_id)	/*!< out: new table id */
+{
+	ib_err_t        err;
+	dict_table_t*   table;
+	ib_err_t        trunc_err;
+	ib_trx_t        ib_trx = NULL;
+	ib_crsr_t       ib_crsr = NULL;
+
+	ib_trx = ib_trx_begin(IB_TRX_SERIALIZABLE);
+
+	dict_mutex_enter_for_mysql();
+
+	table = dict_table_open_on_name(table_name, TRUE, FALSE,
+					DICT_ERR_IGNORE_NONE);
+
+	if (table != NULL && dict_table_get_first_index(table)) {
+		err = ib_create_cursor_with_index_id(&ib_crsr, table, 0,
+						     (trx_t*) ib_trx);
+	} else {
+		err = DB_TABLE_NOT_FOUND;
+	}
+
+	dict_mutex_exit_for_mysql();
+
+	if (err == DB_SUCCESS) {
+		trunc_err = ib_cursor_truncate(&ib_crsr, table_id);
+		ut_a(err == DB_SUCCESS);
+	} else {
+		trunc_err = err;
+	}
+
+	if (ib_crsr != NULL) {
+		err = ib_cursor_close(ib_crsr);
+		ut_a(err == DB_SUCCESS);
+	}
+
+	if (trunc_err == DB_SUCCESS) {
+		ut_a(ib_trx_state(ib_trx) == static_cast<ib_trx_state_t>(
+			TRX_STATE_NOT_STARTED));
+
+		err = ib_trx_release(ib_trx);
+		ut_a(err == DB_SUCCESS);
+	} else {
+		err = ib_trx_rollback(ib_trx);
+		ut_a(err == DB_SUCCESS);
+	}
+
+        return(trunc_err);
+}
+
+/*****************************************************************//**
+Frees a possible InnoDB trx object associated with the current THD.
+@return 0 or error number */
+UNIV_INTERN
+ib_err_t
+ib_close_thd(
+/*=========*/
+	void*		thd)	/*!< in: handle to the MySQL thread of the user
+				whose resources should be free'd */
+{
+	innobase_close_thd(static_cast<THD*>(thd));
+
+	return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Return isolation configuration set by "innodb_api_trx_level"
+@return trx isolation level*/
+UNIV_INTERN
+ib_trx_state_t
+ib_cfg_trx_level()
+/*==============*/
+{
+	return(static_cast<ib_trx_state_t>(ib_trx_level_setting));
+}
+
+/*****************************************************************//**
+Return configure value for background commit interval (in seconds)
+@return background commit interval (in seconds) */
+UNIV_INTERN
+ib_ulint_t
+ib_cfg_bk_commit_interval()
+/*=======================*/
+{
+	return(static_cast<ib_ulint_t>(ib_bk_commit_interval));
+}
+
+/*****************************************************************//**
+Get generic configure status
+@return configure status*/
+UNIV_INTERN
+int
+ib_cfg_get_cfg()
+/*============*/
+{
+	int	cfg_status;
+
+	cfg_status = (ib_binlog_enabled) ? IB_CFG_BINLOG_ENABLED : 0;
+
+	if (ib_mdl_enabled) {
+		cfg_status |= IB_CFG_MDL_ENABLED;
+	}
+
+	if (ib_disable_row_lock) {
+		cfg_status |= IB_CFG_DISABLE_ROWLOCK;
+	}
+
+	return(cfg_status);
+}
diff --git a/storage/innobase/api/api0misc.cc b/storage/innobase/api/api0misc.cc
new file mode 100644
index 00000000000..b2370105938
--- /dev/null
+++ b/storage/innobase/api/api0misc.cc
@@ -0,0 +1,206 @@
+/*****************************************************************************
+
+Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file api/api0misc.cc
+InnoDB Native API
+
+2008-08-01 Created by Sunny Bains
+3/20/2011 Jimmy Yang extracted from Embedded InnoDB
+*******************************************************/
+
+#include <errno.h>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif /* HAVE_UNISTD_H */
+
+#include "api0misc.h"
+#include "trx0roll.h"
+#include "srv0srv.h"
+#include "dict0mem.h"
+#include "dict0dict.h"
+#include "pars0pars.h"
+#include "row0sel.h"
+#include "lock0lock.h"
+#include "ha_prototypes.h"
+#include <m_ctype.h>
+#include <mysys_err.h>
+#include <mysql/plugin.h>
+
+/*********************************************************************//**
+Sets a lock on a table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+ib_trx_lock_table_with_retry(
+/*=========================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	dict_table_t*	table,		/*!< in: table to lock */
+	enum lock_mode	mode)		/*!< in: LOCK_X or LOCK_S */
+{
+	que_thr_t*	thr;
+	dberr_t		err;
+	mem_heap_t*	heap;
+	sel_node_t*	node;
+
+	heap = mem_heap_create(512);
+
+	trx->op_info = "setting table lock";
+
+	node = sel_node_create(heap);
+	thr = pars_complete_graph_for_exec(node, trx, heap);
+	thr->graph->state = QUE_FORK_ACTIVE;
+
+	/* We use the select query graph as the dummy graph needed
+	in the lock module call */
+
+	thr = que_fork_get_first_thr(static_cast<que_fork_t*>(
+		que_node_get_parent(thr)));
+	que_thr_move_to_run_state_for_mysql(thr, trx);
+
+run_again:
+	thr->run_node = thr;
+	thr->prev_node = thr->common.parent;
+
+	err = lock_table(0, table, mode, thr);
+
+	trx->error_state = err;
+
+	if (UNIV_LIKELY(err == DB_SUCCESS)) {
+		que_thr_stop_for_mysql_no_error(thr, trx);
+	} else {
+		que_thr_stop_for_mysql(thr);
+
+		if (err != DB_QUE_THR_SUSPENDED) {
+			ibool	was_lock_wait;
+
+			was_lock_wait = ib_handle_errors(&err, trx, thr, NULL);
+
+			if (was_lock_wait) {
+				goto run_again;
+			}
+		} else {
+			que_thr_t*	run_thr;
+			que_node_t*	parent;
+
+			parent = que_node_get_parent(thr);
+			run_thr = que_fork_start_command(
+				static_cast<que_fork_t*>(parent));
+
+			ut_a(run_thr == thr);
+
+			/* There was a lock wait but the thread was not
+			in a ready to run or running state. */
+			trx->error_state = DB_LOCK_WAIT;
+
+			goto run_again;
+		}
+	}
+
+	que_graph_free(thr->graph);
+	trx->op_info = "";
+
+	return(err);
+}
+/****************************************************************//**
+Handles user errors and lock waits detected by the database engine.
+@return TRUE if it was a lock wait and we should continue running
+the query thread */
+UNIV_INTERN
+ibool
+ib_handle_errors(
+/*=============*/
+        dberr_t*	new_err,/*!< out: possible new error encountered in
+                                lock wait, or if no new error, the value
+                                of trx->error_state at the entry of this
+                                function */
+        trx_t*          trx,    /*!< in: transaction */
+        que_thr_t*      thr,    /*!< in: query thread */
+        trx_savept_t*   savept) /*!< in: savepoint or NULL */
+{
+        dberr_t		err;
+handle_new_error:
+        err = trx->error_state;
+
+        ut_a(err != DB_SUCCESS);
+
+        trx->error_state = DB_SUCCESS;
+
+        switch (err) {
+        case DB_LOCK_WAIT_TIMEOUT:
+		trx_rollback_for_mysql(trx);
+		break;
+                /* fall through */
+        case DB_DUPLICATE_KEY:
+        case DB_FOREIGN_DUPLICATE_KEY:
+        case DB_TOO_BIG_RECORD:
+        case DB_ROW_IS_REFERENCED:
+        case DB_NO_REFERENCED_ROW:
+        case DB_CANNOT_ADD_CONSTRAINT:
+        case DB_TOO_MANY_CONCURRENT_TRXS:
+        case DB_OUT_OF_FILE_SPACE:
+                if (savept) {
+                        /* Roll back the latest, possibly incomplete
+                        insertion or update */
+
+			trx_rollback_to_savepoint(trx, savept);
+                }
+                break;
+        case DB_LOCK_WAIT:
+		lock_wait_suspend_thread(thr);
+
+                if (trx->error_state != DB_SUCCESS) {
+                        que_thr_stop_for_mysql(thr);
+
+                        goto handle_new_error;
+                }
+
+                *new_err = err;
+
+                return(TRUE); /* Operation needs to be retried. */
+
+        case DB_DEADLOCK:
+        case DB_LOCK_TABLE_FULL:
+                /* Roll back the whole transaction; this resolution was added
+                to version 3.23.43 */
+
+                trx_rollback_for_mysql(trx);
+                break;
+
+        case DB_MUST_GET_MORE_FILE_SPACE:
+
+                exit(1);
+
+        case DB_CORRUPTION:
+	case DB_FOREIGN_EXCEED_MAX_CASCADE:
+                break;
+        default:
+                ut_error;
+        }
+
+        if (trx->error_state != DB_SUCCESS) {
+                *new_err = trx->error_state;
+        } else {
+                *new_err = err;
+        }
+
+        trx->error_state = DB_SUCCESS;
+
+        return(FALSE);
+}
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 8b7a19777ab..e3e127c3ace 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -697,14 +698,16 @@ btr_root_fseg_validate(
 #endif /* UNIV_BTR_DEBUG */
 
 /**************************************************************//**
-Gets the root node of a tree and x-latches it.
-@return	root page, x-latched */
+Gets the root node of a tree and x- or s-latches it.
+@return	root page, x- or s-latched */
 static
 buf_block_t*
 btr_root_block_get(
 /*===============*/
-	dict_index_t*	index,	/*!< in: index tree */
-	mtr_t*		mtr)	/*!< in: mtr */
+	const dict_index_t*	index,	/*!< in: index tree */
+	ulint			mode,	/*!< in: either RW_S_LATCH
+					or RW_X_LATCH */
+	mtr_t*			mtr)	/*!< in: mtr */
 {
 	ulint		space;
 	ulint		zip_size;
@@ -715,8 +718,7 @@ btr_root_block_get(
 	zip_size = dict_table_zip_size(index->table);
 	root_page_no = dict_index_get_page(index);
 
-	block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH,
-			      index, mtr);
+	block = btr_block_get(space, zip_size, root_page_no, mode, index, mtr);
 	btr_assert_not_corrupted(block, index);
 #ifdef UNIV_BTR_DEBUG
 	if (!dict_index_is_ibuf(index)) {
@@ -739,10 +741,162 @@ UNIV_INTERN
 page_t*
 btr_root_get(
 /*=========*/
+	const dict_index_t*	index,	/*!< in: index tree */
+	mtr_t*			mtr)	/*!< in: mtr */
+{
+	return(buf_block_get_frame(btr_root_block_get(index, RW_X_LATCH,
+						      mtr)));
+}
+
+/**************************************************************//**
+Gets the height of the B-tree (the level of the root, when the leaf
+level is assumed to be 0). The caller must hold an S or X latch on
+the index.
+@return	tree height (level of the root) */
+UNIV_INTERN
+ulint
+btr_height_get(
+/*===========*/
 	dict_index_t*	index,	/*!< in: index tree */
-	mtr_t*		mtr)	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+{
+	ulint		height;
+	buf_block_t*	root_block;
+
+	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+				MTR_MEMO_S_LOCK)
+	      || mtr_memo_contains(mtr, dict_index_get_lock(index),
+				MTR_MEMO_X_LOCK));
+
+        /* S latches the page */
+        root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
+
+        height = btr_page_get_level(buf_block_get_frame(root_block), mtr);
+
+        /* Release the S latch on the root page. */
+        mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX);
+#ifdef UNIV_SYNC_DEBUG
+        sync_thread_reset_level(&root_block->lock);
+#endif /* UNIV_SYNC_DEBUG */
+
+	return(height);
+}
+
+/**************************************************************//**
+Checks a file segment header within a B-tree root page and updates
+the segment header space id.
+@return	TRUE if valid */
+static
+bool
+btr_root_fseg_adjust_on_import(
+/*===========================*/
+	fseg_header_t*	seg_header,	/*!< in/out: segment header */
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page,
+					or NULL */
+	ulint		space,		/*!< in: tablespace identifier */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
-	return(buf_block_get_frame(btr_root_block_get(index, mtr)));
+	ulint	offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
+
+	if (offset < FIL_PAGE_DATA
+	    || offset > UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) {
+
+		return(FALSE);
+
+	} else if (page_zip) {
+		mach_write_to_4(seg_header + FSEG_HDR_SPACE, space);
+		page_zip_write_header(page_zip, seg_header + FSEG_HDR_SPACE,
+				      4, mtr);
+	} else {
+		mlog_write_ulint(seg_header + FSEG_HDR_SPACE,
+				 space, MLOG_4BYTES, mtr);
+	}
+
+	return(TRUE);
+}
+
+/**************************************************************//**
+Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
+@return error code, or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+btr_root_adjust_on_import(
+/*======================*/
+	const dict_index_t*	index)	/*!< in: index tree */
+{
+	dberr_t		err;
+	mtr_t		mtr;
+	page_t*		page;
+	buf_block_t*	block;
+	page_zip_des_t*	page_zip;
+	dict_table_t*	table		= index->table;
+	ulint		space_id	= dict_index_get_space(index);
+	ulint		zip_size	= dict_table_zip_size(table);
+	ulint		root_page_no	= dict_index_get_page(index);
+
+	mtr_start(&mtr);
+
+	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+	DBUG_EXECUTE_IF("ib_import_trigger_corruption_3",
+			return(DB_CORRUPTION););
+
+	block = btr_block_get(
+		space_id, zip_size, root_page_no, RW_X_LATCH, index, &mtr);
+
+	page = buf_block_get_frame(block);
+	page_zip = buf_block_get_page_zip(block);
+
+	/* Check that this is a B-tree page and both the PREV and NEXT
+	pointers are FIL_NULL, because the root page does not have any
+	siblings. */
+	if (fil_page_get_type(page) != FIL_PAGE_INDEX
+	    || fil_page_get_prev(page) != FIL_NULL
+	    || fil_page_get_next(page) != FIL_NULL) {
+
+		err = DB_CORRUPTION;
+
+	} else if (dict_index_is_clust(index)) {
+		bool	page_is_compact_format;
+
+		page_is_compact_format = page_is_comp(page) > 0;
+
+		/* Check if the page format and table format agree. */
+		if (page_is_compact_format != dict_table_is_comp(table)) {
+			err = DB_CORRUPTION;
+		} else {
+
+			/* Check that the table flags and the tablespace
+			flags match. */
+			ulint	flags = fil_space_get_flags(table->space);
+
+			if (flags
+			    && flags != dict_tf_to_fsp_flags(table->flags)) {
+
+				err = DB_CORRUPTION;
+			} else {
+				err = DB_SUCCESS;
+			}
+		}
+	} else {
+		err = DB_SUCCESS;
+	}
+
+	/* Check and adjust the file segment headers, if all OK so far. */
+	if (err == DB_SUCCESS
+	    && (!btr_root_fseg_adjust_on_import(
+			FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+			+ page, page_zip, space_id, &mtr)
+		|| !btr_root_fseg_adjust_on_import(
+			FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+			+ page, page_zip, space_id, &mtr))) {
+
+		err = DB_CORRUPTION;
+	}
+
+	mtr_commit(&mtr);
+
+	return(err);
 }
 
 /*************************************************************//**
@@ -1033,8 +1187,7 @@ btr_get_size(
 	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
 				MTR_MEMO_S_LOCK));
 
-	if (index->page == FIL_NULL
-	    || index->to_be_dropped
+	if (index->page == FIL_NULL || dict_index_is_online_ddl(index)
 	    || *index->name == TEMP_INDEX_PREFIX) {
 		return(ULINT_UNDEFINED);
 	}
@@ -1584,6 +1737,8 @@ btr_page_reorganize_low(
 				there cannot exist locks on the
 				page, and a hash index should not be
 				dropped: it cannot exist */
+	ulint		compression_level,/*!< in: compression level to be used
+				if dealing with compressed page */
 	buf_block_t*	block,	/*!< in: page to be reorganized */
 	dict_index_t*	index,	/*!< in: record descriptor */
 	mtr_t*		mtr)	/*!< in: mtr */
@@ -1601,6 +1756,8 @@ btr_page_reorganize_low(
 	ulint		max_ins_size1;
 	ulint		max_ins_size2;
 	ibool		success		= FALSE;
+	byte		type;
+	byte*		log_ptr;
 
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	btr_assert_not_corrupted(block, index);
@@ -1612,9 +1769,23 @@ btr_page_reorganize_low(
 
 #ifndef UNIV_HOTBACKUP
 	/* Write the log record */
-	mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
-				  ? MLOG_COMP_PAGE_REORGANIZE
-				  : MLOG_PAGE_REORGANIZE, 0);
+	if (page_zip) {
+		type = MLOG_ZIP_PAGE_REORGANIZE;
+	} else if (page_is_comp(page)) {
+		type = MLOG_COMP_PAGE_REORGANIZE;
+	} else {
+		type = MLOG_PAGE_REORGANIZE;
+	}
+
+	log_ptr = mlog_open_and_write_index(
+		mtr, page, index, type, page_zip ? 1 : 0);
+
+	/* For compressed pages write the compression level. */
+	if (log_ptr && page_zip) {
+		mach_write_to_1(log_ptr, compression_level);
+		mlog_close(mtr, log_ptr + 1);
+	}
+
 #endif /* !UNIV_HOTBACKUP */
 
 	/* Turn logging off */
@@ -1662,7 +1833,9 @@ btr_page_reorganize_low(
 		ut_ad(max_trx_id != 0 || recovery);
 	}
 
-	if (page_zip && !page_zip_compress(page_zip, page, index, NULL)) {
+	if (page_zip
+	    && !page_zip_compress(page_zip, page, index,
+				  compression_level, NULL)) {
 
 		/* Restore the old page and exit. */
 		btr_blob_dbg_restore(page, temp_page, index,
@@ -1750,7 +1923,8 @@ btr_page_reorganize(
 	dict_index_t*	index,	/*!< in: record descriptor */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	return(btr_page_reorganize_low(FALSE, block, index, mtr));
+	return(btr_page_reorganize_low(FALSE, page_compression_level,
+				       block, index, mtr));
 }
 #endif /* !UNIV_HOTBACKUP */
 
@@ -1762,18 +1936,32 @@ byte*
 btr_parse_page_reorganize(
 /*======================*/
 	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr __attribute__((unused)),
-				/*!< in: buffer end */
+	byte*		end_ptr,/*!< in: buffer end */
 	dict_index_t*	index,	/*!< in: record descriptor */
+	bool		compressed,/*!< in: true if compressed page */
 	buf_block_t*	block,	/*!< in: page to be reorganized, or NULL */
 	mtr_t*		mtr)	/*!< in: mtr or NULL */
 {
+	ulint	level = page_compression_level;
+
 	ut_ad(ptr && end_ptr);
 
-	/* The record is empty, except for the record initial part */
+	/* If dealing with a compressed page the record has the
+	compression level used during original compression written in
+	one byte. Otherwise record is empty. */
+	if (compressed) {
+		if (ptr == end_ptr) {
+			return(NULL);
+		}
+
+		level = (ulint)mach_read_from_1(ptr);
+
+		ut_a(level <= 9);
+		++ptr;
+	}
 
 	if (block != NULL) {
-		btr_page_reorganize_low(TRUE, block, index, mtr);
+		btr_page_reorganize_low(TRUE, level, block, index, mtr);
 	}
 
 	return(ptr);
@@ -1827,10 +2015,13 @@ UNIV_INTERN
 rec_t*
 btr_root_raise_and_insert(
 /*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
 	btr_cur_t*	cursor,	/*!< in: cursor at which to insert: must be
 				on the root page; when the function returns,
 				the cursor is positioned on the predecessor
 				of the inserted record */
+	ulint**		offsets,/*!< out: offsets on inserted record */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	const dtuple_t*	tuple,	/*!< in: tuple to insert */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
 	mtr_t*		mtr)	/*!< in: mtr */
@@ -1840,7 +2031,6 @@ btr_root_raise_and_insert(
 	page_t*		new_page;
 	ulint		new_page_no;
 	rec_t*		rec;
-	mem_heap_t*	heap;
 	dtuple_t*	node_ptr;
 	ulint		level;
 	rec_t*		node_ptr_rec;
@@ -1926,7 +2116,9 @@ btr_root_raise_and_insert(
 	lock_update_root_raise(new_block, root_block);
 
 	/* Create a memory heap where the node pointer is stored */
-	heap = mem_heap_create(100);
+	if (!*heap) {
+		*heap = mem_heap_create(1000);
+	}
 
 	rec = page_rec_get_next(page_get_infimum_rec(new_page));
 	new_page_no = buf_block_get_page_no(new_block);
@@ -1934,8 +2126,8 @@ btr_root_raise_and_insert(
 	/* Build the node pointer (= node key and page address) for the
 	child */
 
-	node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
-					     level);
+	node_ptr = dict_index_build_node_ptr(
+		index, rec, new_page_no, *heap, level);
 	/* The node pointer must be marked as the predefined minimum record,
 	as there is no lower alphabetical limit to records in the leftmost
 	node of a level: */
@@ -1961,15 +2153,12 @@ btr_root_raise_and_insert(
 	page_cur_set_before_first(root_block, page_cursor);
 
 	node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
-					     index, 0, mtr);
+					     index, offsets, heap, 0, mtr);
 
 	/* The root page should only contain the node pointer
 	to new_page at this point.  Thus, the data should fit. */
 	ut_a(node_ptr_rec);
 
-	/* Free the memory heap */
-	mem_heap_free(heap);
-
 	/* We play safe and reset the free bits for the new page */
 
 #if 0
@@ -1985,7 +2174,8 @@ btr_root_raise_and_insert(
 			PAGE_CUR_LE, page_cursor);
 
 	/* Split the child and insert tuple */
-	return(btr_page_split_and_insert(cursor, tuple, n_ext, mtr));
+	return(btr_page_split_and_insert(flags, cursor, offsets, heap,
+					 tuple, n_ext, mtr));
 }
 
 /*************************************************************//**
@@ -2213,9 +2403,9 @@ func_exit:
 /*************************************************************//**
 Returns TRUE if the insert fits on the appropriate half-page with the
 chosen split_rec.
-@return	TRUE if fits */
-static
-ibool
+@return	true if fits */
+static __attribute__((nonnull(1,3,4,6), warn_unused_result))
+bool
 btr_page_insert_fits(
 /*=================*/
 	btr_cur_t*	cursor,	/*!< in: cursor at which insert
@@ -2223,11 +2413,11 @@ btr_page_insert_fits(
 	const rec_t*	split_rec,/*!< in: suggestion for first record
 				on upper half-page, or NULL if
 				tuple to be inserted should be first */
-	const ulint*	offsets,/*!< in: rec_get_offsets(
-				split_rec, cursor->index) */
+	ulint**		offsets,/*!< in: rec_get_offsets(
+				split_rec, cursor->index); out: garbage */
 	const dtuple_t*	tuple,	/*!< in: tuple to insert */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	mem_heap_t*	heap)	/*!< in: temporary memory heap */
+	mem_heap_t**	heap)	/*!< in: temporary memory heap */
 {
 	page_t*		page;
 	ulint		insert_size;
@@ -2236,15 +2426,13 @@ btr_page_insert_fits(
 	ulint		total_n_recs;
 	const rec_t*	rec;
 	const rec_t*	end_rec;
-	ulint*		offs;
 
 	page = btr_cur_get_page(cursor);
 
-	ut_ad(!split_rec == !offsets);
-	ut_ad(!offsets
-	      || !page_is_comp(page) == !rec_offs_comp(offsets));
-	ut_ad(!offsets
-	      || rec_offs_validate(split_rec, cursor->index, offsets));
+	ut_ad(!split_rec
+	      || !page_is_comp(page) == !rec_offs_comp(*offsets));
+	ut_ad(!split_rec
+	      || rec_offs_validate(split_rec, cursor->index, *offsets));
 
 	insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
 	free_space  = page_get_free_space_of_empty(page_is_comp(page));
@@ -2262,7 +2450,7 @@ btr_page_insert_fits(
 		rec = page_rec_get_next(page_get_infimum_rec(page));
 		end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
 
-	} else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) {
+	} else if (cmp_dtuple_rec(tuple, split_rec, *offsets) >= 0) {
 
 		rec = page_rec_get_next(page_get_infimum_rec(page));
 		end_rec = split_rec;
@@ -2277,19 +2465,17 @@ btr_page_insert_fits(
 		/* Ok, there will be enough available space on the
 		half page where the tuple is inserted */
 
-		return(TRUE);
+		return(true);
 	}
 
-	offs = NULL;
-
 	while (rec != end_rec) {
 		/* In this loop we calculate the amount of reserved
 		space after rec is removed from page. */
 
-		offs = rec_get_offsets(rec, cursor->index, offs,
-				       ULINT_UNDEFINED, &heap);
+		*offsets = rec_get_offsets(rec, cursor->index, *offsets,
+					   ULINT_UNDEFINED, heap);
 
-		total_data -= rec_offs_size(offs);
+		total_data -= rec_offs_size(*offsets);
 		total_n_recs--;
 
 		if (total_data + page_dir_calc_reserved_space(total_n_recs)
@@ -2298,13 +2484,13 @@ btr_page_insert_fits(
 			/* Ok, there will be enough available space on the
 			half page where the tuple is inserted */
 
-			return(TRUE);
+			return(true);
 		}
 
 		rec = page_rec_get_next_const(rec);
 	}
 
-	return(FALSE);
+	return(false);
 }
 
 /*******************************************************//**
@@ -2314,6 +2500,7 @@ UNIV_INTERN
 void
 btr_insert_on_non_leaf_level_func(
 /*==============================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
 	dict_index_t*	index,	/*!< in: index */
 	ulint		level,	/*!< in: level, must be > 0 */
 	dtuple_t*	tuple,	/*!< in: the record to be inserted */
@@ -2323,8 +2510,10 @@ btr_insert_on_non_leaf_level_func(
 {
 	big_rec_t*	dummy_big_rec;
 	btr_cur_t	cursor;
-	ulint		err;
+	dberr_t		err;
 	rec_t*		rec;
+	ulint*		offsets	= NULL;
+	mem_heap_t*	heap = NULL;
 
 	ut_ad(level > 0);
 
@@ -2335,26 +2524,35 @@ btr_insert_on_non_leaf_level_func(
 	ut_ad(cursor.flag == BTR_CUR_BINARY);
 
 	err = btr_cur_optimistic_insert(
-		BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG
-		| BTR_NO_UNDO_LOG_FLAG, &cursor, tuple, &rec,
-		&dummy_big_rec, 0, NULL, mtr);
+		flags
+		| BTR_NO_LOCKING_FLAG
+		| BTR_KEEP_SYS_FLAG
+		| BTR_NO_UNDO_LOG_FLAG,
+		&cursor, &offsets, &heap,
+		tuple, &rec, &dummy_big_rec, 0, NULL, mtr);
 
 	if (err == DB_FAIL) {
-		err = btr_cur_pessimistic_insert(
-			BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG
-			| BTR_NO_UNDO_LOG_FLAG,
-			&cursor, tuple, &rec, &dummy_big_rec, 0, NULL, mtr);
+		err = btr_cur_pessimistic_insert(flags
+						 | BTR_NO_LOCKING_FLAG
+						 | BTR_KEEP_SYS_FLAG
+						 | BTR_NO_UNDO_LOG_FLAG,
+						 &cursor, &offsets, &heap,
+						 tuple, &rec,
+						 &dummy_big_rec, 0, NULL, mtr);
 		ut_a(err == DB_SUCCESS);
 	}
+	mem_heap_free(heap);
 }
 
 /**************************************************************//**
 Attaches the halves of an index page on the appropriate level in an
 index tree. */
-static
+static __attribute__((nonnull))
 void
 btr_attach_half_pages(
 /*==================*/
+	ulint		flags,		/*!< in: undo logging and
+					locking flags */
 	dict_index_t*	index,		/*!< in: the index tree */
 	buf_block_t*	block,		/*!< in/out: page to be split */
 	const rec_t*	split_rec,	/*!< in: first record on upper
@@ -2432,7 +2630,8 @@ btr_attach_half_pages(
 	/* Insert it next to the pointer to the lower half. Note that this
 	may generate recursion leading to a split on the higher level. */
 
-	btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr);
+	btr_insert_on_non_leaf_level(flags, index, level + 1,
+				     node_ptr_upper, mtr);
 
 	/* Free the memory heap */
 	mem_heap_free(heap);
@@ -2484,13 +2683,13 @@ btr_attach_half_pages(
 /*************************************************************//**
 Determine if a tuple is smaller than any record on the page.
 @return TRUE if smaller */
-static
-ibool
+static __attribute__((nonnull, warn_unused_result))
+bool
 btr_page_tuple_smaller(
 /*===================*/
 	btr_cur_t*	cursor,	/*!< in: b-tree cursor */
 	const dtuple_t*	tuple,	/*!< in: tuple to consider */
-	ulint*		offsets,/*!< in/out: temporary storage */
+	ulint**		offsets,/*!< in/out: temporary storage */
 	ulint		n_uniq,	/*!< in: number of unique fields
 				in the index page records */
 	mem_heap_t**	heap)	/*!< in/out: heap for offsets */
@@ -2505,11 +2704,11 @@ btr_page_tuple_smaller(
 	page_cur_move_to_next(&pcur);
 	first_rec = page_cur_get_rec(&pcur);
 
-	offsets = rec_get_offsets(
-		first_rec, cursor->index, offsets,
+	*offsets = rec_get_offsets(
+		first_rec, cursor->index, *offsets,
 		n_uniq, heap);
 
-	return(cmp_dtuple_rec(tuple, first_rec, offsets) < 0);
+	return(cmp_dtuple_rec(tuple, first_rec, *offsets) < 0);
 }
 
 /*************************************************************//**
@@ -2525,9 +2724,12 @@ UNIV_INTERN
 rec_t*
 btr_page_split_and_insert(
 /*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
 	btr_cur_t*	cursor,	/*!< in: cursor at which to insert; when the
 				function returns, the cursor is positioned
 				on the predecessor of the inserted record */
+	ulint**		offsets,/*!< out: offsets on inserted record */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	const dtuple_t*	tuple,	/*!< in: tuple to insert */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
 	mtr_t*		mtr)	/*!< in: mtr */
@@ -2553,18 +2755,21 @@ btr_page_split_and_insert(
 	ibool		insert_left;
 	ulint		n_iterations = 0;
 	rec_t*		rec;
-	mem_heap_t*	heap;
 	ulint		n_uniq;
-	ulint*		offsets;
 
-	heap = mem_heap_create(1024);
+	if (!*heap) {
+		*heap = mem_heap_create(1024);
+	}
 	n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
 func_start:
-	mem_heap_empty(heap);
-	offsets = NULL;
+	mem_heap_empty(*heap);
+	*offsets = NULL;
 
 	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
 				MTR_MEMO_X_LOCK));
+	ut_ad(!dict_index_is_online_ddl(cursor->index)
+	      || (flags & BTR_CREATE_FLAG)
+	      || dict_index_is_clust(cursor->index));
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
@@ -2590,7 +2795,7 @@ func_start:
 
 		if (split_rec == NULL) {
 			insert_left = btr_page_tuple_smaller(
-				cursor, tuple, offsets, n_uniq, &heap);
+				cursor, tuple, offsets, n_uniq, heap);
 		}
 	} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
 		direction = FSP_UP;
@@ -2612,7 +2817,7 @@ func_start:
 		if (page_get_n_recs(page) > 1) {
 			split_rec = page_get_middle_rec(page);
 		} else if (btr_page_tuple_smaller(cursor, tuple,
-						  offsets, n_uniq, &heap)) {
+						  offsets, n_uniq, heap)) {
 			split_rec = page_rec_get_next(
 				page_get_infimum_rec(page));
 		} else {
@@ -2635,10 +2840,10 @@ func_start:
 	if (split_rec) {
 		first_rec = move_limit = split_rec;
 
-		offsets = rec_get_offsets(split_rec, cursor->index, offsets,
-					  n_uniq, &heap);
+		*offsets = rec_get_offsets(split_rec, cursor->index, *offsets,
+					   n_uniq, heap);
 
-		insert_left = cmp_dtuple_rec(tuple, split_rec, offsets) < 0;
+		insert_left = cmp_dtuple_rec(tuple, split_rec, *offsets) < 0;
 
 		if (!insert_left && new_page_zip && n_iterations > 0) {
 			/* If a compressed page has already been split,
@@ -2665,7 +2870,7 @@ insert_empty:
 
 	/* 4. Do first the modifications in the tree structure */
 
-	btr_attach_half_pages(cursor->index, block,
+	btr_attach_half_pages(flags, cursor->index, block,
 			      first_rec, new_block, direction, mtr);
 
 	/* If the split is made on the leaf level and the insert will fit
@@ -2685,10 +2890,11 @@ insert_empty:
 
 		insert_will_fit = !new_page_zip
 			&& btr_page_insert_fits(cursor, NULL,
-						NULL, tuple, n_ext, heap);
+						offsets, tuple, n_ext, heap);
 	}
 
-	if (insert_will_fit && page_is_leaf(page)) {
+	if (insert_will_fit && page_is_leaf(page)
+	    && !dict_index_is_online_ddl(cursor->index)) {
 
 		mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
 				 MTR_MEMO_X_LOCK);
@@ -2805,8 +3011,8 @@ insert_empty:
 	page_cur_search(insert_block, cursor->index, tuple,
 			PAGE_CUR_LE, page_cursor);
 
-	rec = page_cur_tuple_insert(page_cursor, tuple,
-				    cursor->index, n_ext, mtr);
+	rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
+				    offsets, heap, n_ext, mtr);
 
 #ifdef UNIV_ZIP_DEBUG
 	{
@@ -2837,7 +3043,7 @@ insert_empty:
 	page_cur_search(insert_block, cursor->index, tuple,
 			PAGE_CUR_LE, page_cursor);
 	rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
-				    n_ext, mtr);
+				    offsets, heap, n_ext, mtr);
 
 	if (rec == NULL) {
 		/* The insert did not fit on the page: loop back to the
@@ -2878,7 +3084,7 @@ func_exit:
 	ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
 	ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
 
-	mem_heap_free(heap);
+	ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
 	return(rec);
 }
 
@@ -3058,15 +3264,15 @@ btr_node_ptr_delete(
 {
 	btr_cur_t	cursor;
 	ibool		compressed;
-	ulint		err;
+	dberr_t		err;
 
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 
 	/* Delete node pointer on father page */
 	btr_page_get_father(index, block, mtr, &cursor);
 
-	compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE,
-						mtr);
+	compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor,
+						BTR_CREATE_FLAG, RB_NONE, mtr);
 	ut_a(err == DB_SUCCESS);
 
 	if (!compressed) {
@@ -3098,6 +3304,8 @@ btr_lift_page_up(
 	buf_block_t*	blocks[BTR_MAX_LEVELS];
 	ulint		n_blocks;	/*!< last used index in blocks[] */
 	ulint		i;
+	bool		lift_father_up;
+	buf_block_t*	block_orig	= block;
 
 	ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
 	ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
@@ -3108,11 +3316,13 @@ btr_lift_page_up(
 
 	{
 		btr_cur_t	cursor;
-		mem_heap_t*	heap	= mem_heap_create(100);
-		ulint*		offsets;
+		ulint*		offsets	= NULL;
+		mem_heap_t*	heap	= mem_heap_create(
+			sizeof(*offsets)
+			* (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields));
 		buf_block_t*	b;
 
-		offsets = btr_page_get_father_block(NULL, heap, index,
+		offsets = btr_page_get_father_block(offsets, heap, index,
 						    block, mtr, &cursor);
 		father_block = btr_cur_get_block(&cursor);
 		father_page_zip = buf_block_get_page_zip(father_block);
@@ -3136,6 +3346,29 @@ btr_lift_page_up(
 			blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
 		}
 
+		lift_father_up = (n_blocks && page_level == 0);
+		if (lift_father_up) {
+			/* The father page also should be the only on its level (not
+			root). We should lift up the father page at first.
+			Because the leaf page should be lifted up only for root page.
+			The freeing page is based on page_level (==0 or !=0)
+			to choose segment. If the page_level is changed ==0 from !=0,
+			later freeing of the page doesn't find the page allocation
+			to be freed.*/
+
+			block = father_block;
+			page = buf_block_get_frame(block);
+			page_level = btr_page_get_level(page, mtr);
+
+			ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
+			ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
+			ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+
+			father_block = blocks[0];
+			father_page_zip = buf_block_get_page_zip(father_block);
+			father_page = buf_block_get_frame(father_block);
+		}
+
 		mem_heap_free(heap);
 	}
 
@@ -3143,6 +3376,7 @@ btr_lift_page_up(
 
 	/* Make the father empty */
 	btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
+	page_level++;
 
 	/* Copy the records to the father page one by one. */
 	if (0
@@ -3174,7 +3408,7 @@ btr_lift_page_up(
 	lock_update_copy_and_discard(father_block, block);
 
 	/* Go upward to root page, decrementing levels by one. */
-	for (i = 0; i < n_blocks; i++, page_level++) {
+	for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) {
 		page_t*		page	= buf_block_get_frame(blocks[i]);
 		page_zip_des_t*	page_zip= buf_block_get_page_zip(blocks[i]);
 
@@ -3196,7 +3430,7 @@ btr_lift_page_up(
 	ut_ad(page_validate(father_page, index));
 	ut_ad(btr_check_node_ptr(index, father_block, mtr));
 
-	return(father_block);
+	return(lift_father_up ? block_orig : father_block);
 }
 
 /*************************************************************//**
@@ -3267,6 +3501,7 @@ btr_compress(
 
 	if (adjust) {
 		nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor));
+		ut_ad(nth_rec > 0);
 	}
 
 	/* Decide the page to which we try to merge and which will inherit
@@ -3323,6 +3558,16 @@ err_exit:
 		return(FALSE);
 	}
 
+	/* If compression padding tells us that merging will result in
+	too packed up page i.e.: which is likely to cause compression
+	failure then don't merge the pages. */
+	if (zip_size && page_is_leaf(merge_page)
+	    && (page_get_data_size(merge_page) + data_size
+		>= dict_index_zip_pad_optimal_page_size(index))) {
+
+		goto err_exit;
+	}
+
 	ut_ad(page_validate(merge_page, index));
 
 	max_ins_size = page_get_max_insert_size(merge_page, n_recs);
@@ -3502,6 +3747,7 @@ func_exit:
 	mem_heap_free(heap);
 
 	if (adjust) {
+		ut_ad(nth_rec > 0);
 		btr_cur_position(
 			index,
 			page_rec_get_nth(merge_block->frame, nth_rec),
@@ -3818,7 +4064,7 @@ btr_print_index(
 
 	mtr_start(&mtr);
 
-	root = btr_root_block_get(index, &mtr);
+	root = btr_root_block_get(index, RW_X_LATCH, &mtr);
 
 	btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
 	if (heap) {
@@ -3827,7 +4073,7 @@ btr_print_index(
 
 	mtr_commit(&mtr);
 
-	btr_validate_index(index, NULL);
+	btr_validate_index(index, 0);
 }
 #endif /* UNIV_BTR_PRINT */
 
@@ -4013,8 +4259,22 @@ btr_index_page_validate(
 {
 	page_cur_t	cur;
 	ibool		ret	= TRUE;
+#ifndef DBUG_OFF
+	ulint		nth	= 1;
+#endif /* !DBUG_OFF */
 
 	page_cur_set_before_first(block, &cur);
+
+	/* Directory slot 0 should only contain the infimum record. */
+	DBUG_EXECUTE_IF("check_table_rec_next",
+			ut_a(page_rec_get_nth_const(
+				     page_cur_get_page(&cur), 0)
+			     == cur.rec);
+			ut_a(page_dir_slot_get_n_owned(
+				     page_dir_get_nth_slot(
+					     page_cur_get_page(&cur), 0))
+			     == 1););
+
 	page_cur_move_to_next(&cur);
 
 	for (;;) {
@@ -4028,6 +4288,16 @@ btr_index_page_validate(
 			return(FALSE);
 		}
 
+		/* Verify that page_rec_get_nth_const() is correctly
+		retrieving each record. */
+		DBUG_EXECUTE_IF("check_table_rec_next",
+				ut_a(cur.rec == page_rec_get_nth_const(
+					     page_cur_get_page(&cur),
+					     page_rec_get_n_recs_before(
+						     cur.rec)));
+				ut_a(nth++ == page_rec_get_n_recs_before(
+					     cur.rec)););
+
 		page_cur_move_to_next(&cur);
 	}
 
@@ -4078,14 +4348,15 @@ btr_validate_report2(
 Validates index tree level.
 @return	TRUE if ok */
 static
-ibool
+bool
 btr_validate_level(
 /*===============*/
 	dict_index_t*	index,	/*!< in: index tree */
-	trx_t*		trx,	/*!< in: transaction or NULL */
+	const trx_t*	trx,	/*!< in: transaction or NULL */
 	ulint		level)	/*!< in: level number */
 {
 	ulint		space;
+	ulint		space_flags;
 	ulint		zip_size;
 	buf_block_t*	block;
 	page_t*		page;
@@ -4099,9 +4370,10 @@ btr_validate_level(
 	ulint		left_page_no;
 	page_cur_t	cursor;
 	dtuple_t*	node_ptr_tuple;
-	ibool		ret	= TRUE;
+	bool		ret	= true;
 	mtr_t		mtr;
 	mem_heap_t*	heap	= mem_heap_create(256);
+	fseg_header_t*	seg;
 	ulint*		offsets	= NULL;
 	ulint*		offsets2= NULL;
 #ifdef UNIV_ZIP_DEBUG
@@ -4112,15 +4384,39 @@ btr_validate_level(
 
 	mtr_x_lock(dict_index_get_lock(index), &mtr);
 
-	block = btr_root_block_get(index, &mtr);
+	block = btr_root_block_get(index, RW_X_LATCH, &mtr);
 	page = buf_block_get_frame(block);
+	seg = page + PAGE_HEADER + PAGE_BTR_SEG_TOP;
 
 	space = dict_index_get_space(index);
 	zip_size = dict_table_zip_size(index->table);
 
+	fil_space_get_latch(space, &space_flags);
+
+	if (zip_size != dict_tf_get_zip_size(space_flags)) {
+
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Flags mismatch: table=%lu, tablespace=%lu",
+			(ulint) index->table->flags, (ulint) space_flags);
+
+		mtr_commit(&mtr);
+
+		return(false);
+	}
+
 	while (level != btr_page_get_level(page, &mtr)) {
 		const rec_t*	node_ptr;
 
+		if (fseg_page_is_free(seg,
+				      block->page.space, block->page.offset)) {
+
+			btr_validate_report1(index, level, block);
+
+			ib_logf(IB_LOG_LEVEL_WARN, "page is free");
+
+			ret = false;
+		}
+
 		ut_a(space == buf_block_get_space(block));
 		ut_a(space == page_get_space_id(page));
 #ifdef UNIV_ZIP_DEBUG
@@ -4141,12 +4437,13 @@ btr_validate_level(
 
 	/* Now we are on the desired level. Loop through the pages on that
 	level. */
-loop:
-	if (trx_is_interrupted(trx)) {
-		mtr_commit(&mtr);
-		mem_heap_free(heap);
-		return(ret);
+
+	if (level == 0) {
+		/* Leaf pages are managed in their own file segment. */
+		seg -= PAGE_BTR_SEG_TOP - PAGE_BTR_SEG_LEAF;
 	}
+
+loop:
 	mem_heap_empty(heap);
 	offsets = offsets2 = NULL;
 	mtr_x_lock(dict_index_get_lock(index), &mtr);
@@ -4156,20 +4453,35 @@ loop:
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
-	/* Check ordering etc. of records */
+	ut_a(block->page.space == space);
+
+	if (fseg_page_is_free(seg, block->page.space, block->page.offset)) {
+
+		btr_validate_report1(index, level, block);
+
+		ib_logf(IB_LOG_LEVEL_WARN, "Page is marked as free");
+		ret = false;
+
+	} else if (btr_page_get_index_id(page) != index->id) {
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Page index id " IB_ID_FMT " != data dictionary "
+			"index id " IB_ID_FMT,
+			btr_page_get_index_id(page), index->id);
+
+		ret = false;
+
+	} else if (!page_validate(page, index)) {
 
-	if (!page_validate(page, index)) {
 		btr_validate_report1(index, level, block);
+		ret = false;
+
+	} else if (level == 0 && !btr_index_page_validate(block, index)) {
 
-		ret = FALSE;
-	} else if (level == 0) {
 		/* We are on level 0. Check that the records have the right
 		number of fields, and field lengths are right. */
 
-		if (!btr_index_page_validate(block, index)) {
-
-			ret = FALSE;
-		}
+		ret = false;
 	}
 
 	ut_a(btr_page_get_level(page, &mtr) == level);
@@ -4195,7 +4507,7 @@ loop:
 			buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
 			buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
 
-			ret = FALSE;
+			ret = false;
 		}
 
 		if (page_is_comp(right_page) != page_is_comp(page)) {
@@ -4204,7 +4516,7 @@ loop:
 			buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
 			buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
 
-			ret = FALSE;
+			ret = false;
 
 			goto node_ptr_fails;
 		}
@@ -4237,7 +4549,7 @@ loop:
 			rec_print(stderr, rec, index);
 			putc('\n', stderr);
 
-			ret = FALSE;
+			ret = false;
 		}
 	}
 
@@ -4288,7 +4600,7 @@ loop:
 			fputs("InnoDB: record on page ", stderr);
 			rec_print_new(stderr, rec, offsets);
 			putc('\n', stderr);
-			ret = FALSE;
+			ret = false;
 
 			goto node_ptr_fails;
 		}
@@ -4318,7 +4630,7 @@ loop:
 				fputs("InnoDB: first rec ", stderr);
 				rec_print(stderr, first_rec, index);
 				putc('\n', stderr);
-				ret = FALSE;
+				ret = false;
 
 				goto node_ptr_fails;
 			}
@@ -4346,7 +4658,7 @@ loop:
 
 				if (btr_cur_get_rec(&right_node_cur)
 				    != right_node_ptr) {
-					ret = FALSE;
+					ret = false;
 					fputs("InnoDB: node pointer to"
 					      " the right page is wrong\n",
 					      stderr);
@@ -4372,7 +4684,7 @@ loop:
 				    != page_rec_get_next(
 					    page_get_infimum_rec(
 						    right_father_page))) {
-					ret = FALSE;
+					ret = false;
 					fputs("InnoDB: node pointer 2 to"
 					      " the right page is wrong\n",
 					      stderr);
@@ -4397,7 +4709,7 @@ loop:
 				if (page_get_page_no(right_father_page)
 				    != btr_page_get_next(father_page, &mtr)) {
 
-					ret = FALSE;
+					ret = false;
 					fputs("InnoDB: node pointer 3 to"
 					      " the right page is wrong\n",
 					      stderr);
@@ -4428,17 +4740,23 @@ node_ptr_fails:
 	on the next loop.  The page has already been checked. */
 	mtr_commit(&mtr);
 
-	if (right_page_no != FIL_NULL) {
+	if (trx_is_interrupted(trx)) {
+		/* On interrupt, return the current status. */
+	} else if (right_page_no != FIL_NULL) {
+
 		mtr_start(&mtr);
 
-		block = btr_block_get(space, zip_size, right_page_no,
-				      RW_X_LATCH, index, &mtr);
+		block = btr_block_get(
+			space, zip_size, right_page_no,
+			RW_X_LATCH, index, &mtr);
+
 		page = buf_block_get_frame(block);
 
 		goto loop;
 	}
 
 	mem_heap_free(heap);
+
 	return(ret);
 }
 
@@ -4446,40 +4764,39 @@ node_ptr_fails:
 Checks the consistency of an index tree.
 @return	TRUE if ok */
 UNIV_INTERN
-ibool
+bool
 btr_validate_index(
 /*===============*/
 	dict_index_t*	index,	/*!< in: index */
-	trx_t*		trx)	/*!< in: transaction or NULL */
+	const trx_t*	trx)	/*!< in: transaction or NULL */
 {
-	mtr_t	mtr;
-	page_t*	root;
-	ulint	i;
-	ulint	n;
-
 	/* Full Text index are implemented by auxiliary tables,
 	not the B-tree */
-	if (index->type & DICT_FTS) {
-		return(TRUE);
+	if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) {
+		return(true);
 	}
 
+	mtr_t		mtr;
+
 	mtr_start(&mtr);
-	mtr_x_lock(dict_index_get_lock(index), &mtr);
 
-	root = btr_root_get(index, &mtr);
-	n = btr_page_get_level(root, &mtr);
+	mtr_x_lock(dict_index_get_lock(index), &mtr);
 
-	for (i = 0; i <= n && !trx_is_interrupted(trx); i++) {
-		if (!btr_validate_level(index, trx, n - i)) {
+	bool	ok = true;
+	page_t*	root = btr_root_get(index, &mtr);
+	ulint	n = btr_page_get_level(root, &mtr);
 
-			mtr_commit(&mtr);
+	for (ulint i = 0; i <= n; ++i) {
 
-			return(FALSE);
+		if (!btr_validate_level(index, trx, n - i)) {
+			ok = false;
+			break;
 		}
 	}
 
 	mtr_commit(&mtr);
 
-	return(TRUE);
+	return(ok);
 }
+
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index aeb16200f80..913b2088f24 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -2,6 +2,7 @@
 
 Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
+Copyright (c) 2012, Facebook Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -57,6 +58,7 @@ Created 10/16/1994 Heikki Tuuri
 #include "buf0lru.h"
 #include "btr0btr.h"
 #include "btr0sea.h"
+#include "row0log.h"
 #include "row0purge.h"
 #include "row0upd.h"
 #include "trx0rec.h"
@@ -69,13 +71,13 @@ Created 10/16/1994 Heikki Tuuri
 #include "zlib.h"
 
 /** Buffered B-tree operation types, introduced as part of delete buffering. */
-typedef enum btr_op_enum {
+enum btr_op_t {
 	BTR_NO_OP = 0,			/*!< Not buffered */
 	BTR_INSERT_OP,			/*!< Insert, do not ignore UNIQUE */
 	BTR_INSERT_IGNORE_UNIQUE_OP,	/*!< Insert, ignoring UNIQUE */
 	BTR_DELETE_OP,			/*!< Purge a delete-marked record */
 	BTR_DELMARK_OP			/*!< Mark a record for deletion */
-} btr_op_t;
+};
 
 #ifdef UNIV_DEBUG
 /** If the following is set to TRUE, this module prints a lot of
@@ -97,6 +99,11 @@ srv_refresh_innodb_monitor_stats().  Referenced by
 srv_printf_innodb_monitor(). */
 UNIV_INTERN ulint	btr_cur_n_sea_old	= 0;
 
+#ifdef UNIV_DEBUG
+/* Flag to limit optimistic insert records */
+UNIV_INTERN uint	btr_cur_limit_optimistic_insert_debug = 0;
+#endif /* UNIV_DEBUG */
+
 /** In the optimistic insert, if the insert does not fit, but this much space
 can be released by page reorganize, then it is reorganized */
 #define BTR_CUR_PAGE_REORGANIZE_LIMIT	(UNIV_PAGE_SIZE / 32)
@@ -425,6 +432,14 @@ btr_cur_search_to_nth_level(
 	cursor->low_match = ULINT_UNDEFINED;
 #endif
 
+	ibool	s_latch_by_caller;
+
+	s_latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED;
+
+	ut_ad(!s_latch_by_caller
+	      || mtr_memo_contains(mtr, dict_index_get_lock(index),
+				   MTR_MEMO_S_LOCK));
+
 	/* These flags are mutually exclusive, they are lumped together
 	with the latch mode for historical reasons. It's possible for
 	none of the flags to be set. */
@@ -460,11 +475,11 @@ btr_cur_search_to_nth_level(
 	estimate = latch_mode & BTR_ESTIMATE;
 
 	/* Turn the flags unrelated to the latch mode off. */
-	latch_mode &= ~(BTR_INSERT
-			| BTR_DELETE_MARK
-			| BTR_DELETE
-			| BTR_ESTIMATE
-			| BTR_IGNORE_SEC_UNIQUE);
+	latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+
+	ut_ad(!s_latch_by_caller
+	      || latch_mode == BTR_SEARCH_LEAF
+	      || latch_mode == BTR_MODIFY_LEAF);
 
 	cursor->flag = BTR_CUR_BINARY;
 	cursor->index = index;
@@ -478,16 +493,16 @@ btr_cur_search_to_nth_level(
 
 #ifdef BTR_CUR_HASH_ADAPT
 
-#ifdef UNIV_SEARCH_PERF_STAT
+# ifdef UNIV_SEARCH_PERF_STAT
 	info->n_searches++;
-#endif
+# endif
 	if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
 	    && latch_mode <= BTR_MODIFY_LEAF
 	    && info->last_hash_succ
 	    && !estimate
-#ifdef PAGE_CUR_LE_OR_EXTENDS
+# ifdef PAGE_CUR_LE_OR_EXTENDS
 	    && mode != PAGE_CUR_LE_OR_EXTENDS
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
+# endif /* PAGE_CUR_LE_OR_EXTENDS */
 	    /* If !has_search_latch, we do a dirty read of
 	    btr_search_enabled below, and btr_search_guess_on_hash()
 	    will have to check it again. */
@@ -508,7 +523,7 @@ btr_cur_search_to_nth_level(
 
 		return;
 	}
-#endif /* BTR_CUR_HASH_ADAPT */
+# endif /* BTR_CUR_HASH_ADAPT */
 #endif /* BTR_CUR_ADAPT */
 	btr_cur_n_non_sea++;
 
@@ -525,15 +540,19 @@ btr_cur_search_to_nth_level(
 
 	savepoint = mtr_set_savepoint(mtr);
 
-	if (latch_mode == BTR_MODIFY_TREE) {
+	switch (latch_mode) {
+	case BTR_MODIFY_TREE:
 		mtr_x_lock(dict_index_get_lock(index), mtr);
-
-	} else if (latch_mode == BTR_CONT_MODIFY_TREE) {
+		break;
+	case BTR_CONT_MODIFY_TREE:
 		/* Do nothing */
 		ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
 					MTR_MEMO_X_LOCK));
-	} else {
-		mtr_s_lock(dict_index_get_lock(index), mtr);
+		break;
+	default:
+		if (!s_latch_by_caller) {
+			mtr_s_lock(dict_index_get_lock(index), mtr);
+		}
 	}
 
 	page_cursor = btr_cur_get_page_cur(cursor);
@@ -687,6 +706,7 @@ retry_page_get:
 			? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE);
 	}
 
+	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
 	ut_ad(index->id == btr_page_get_index_id(page));
 
 	if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
@@ -711,13 +731,17 @@ retry_page_get:
 				cursor, mtr);
 		}
 
-		if (latch_mode != BTR_MODIFY_TREE
-		    && latch_mode != BTR_CONT_MODIFY_TREE) {
-
-			/* Release the tree s-latch */
-
-			mtr_release_s_latch_at_savepoint(
-				mtr, savepoint, dict_index_get_lock(index));
+		switch (latch_mode) {
+		case BTR_MODIFY_TREE:
+		case BTR_CONT_MODIFY_TREE:
+			break;
+		default:
+			if (!s_latch_by_caller) {
+				/* Release the tree s-latch */
+				mtr_release_s_latch_at_savepoint(
+					mtr, savepoint,
+					dict_index_get_lock(index));
+			}
 		}
 
 		page_mode = mode;
@@ -784,8 +808,7 @@ retry_page_get:
 		will properly check btr_search_enabled again in
 		btr_search_build_page_hash_index() before building a
 		page hash index, while holding btr_search_latch. */
-		if (UNIV_LIKELY(btr_search_enabled)) {
-
+		if (btr_search_enabled) {
 			btr_search_info_update(index, cursor);
 		}
 #endif
@@ -815,14 +838,16 @@ UNIV_INTERN
 void
 btr_cur_open_at_index_side_func(
 /*============================*/
-	ibool		from_left,	/*!< in: TRUE if open to the low end,
-					FALSE if to the high end */
+	bool		from_left,	/*!< in: true if open to the low end,
+					false if to the high end */
 	dict_index_t*	index,		/*!< in: index */
 	ulint		latch_mode,	/*!< in: latch mode */
-	btr_cur_t*	cursor,		/*!< in: cursor */
+	btr_cur_t*	cursor,		/*!< in/out: cursor */
+	ulint		level,		/*!< in: level to search for
+					(0=leaf). */
 	const char*	file,		/*!< in: file name */
 	ulint		line,		/*!< in: line where called */
-	mtr_t*		mtr)		/*!< in: mtr */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
 	page_cur_t*	page_cursor;
 	ulint		page_no;
@@ -839,16 +864,27 @@ btr_cur_open_at_index_side_func(
 	rec_offs_init(offsets_);
 
 	estimate = latch_mode & BTR_ESTIMATE;
-	latch_mode = latch_mode & ~BTR_ESTIMATE;
+	latch_mode &= ~BTR_ESTIMATE;
+
+	ut_ad(level != ULINT_UNDEFINED);
 
 	/* Store the position of the tree latch we push to mtr so that we
 	know how to release it when we have latched the leaf node */
 
 	savepoint = mtr_set_savepoint(mtr);
 
-	if (latch_mode == BTR_MODIFY_TREE) {
+	switch (latch_mode) {
+	case BTR_CONT_MODIFY_TREE:
+		break;
+	case BTR_MODIFY_TREE:
 		mtr_x_lock(dict_index_get_lock(index), mtr);
-	} else {
+		break;
+	case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED:
+	case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED:
+		ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+					MTR_MEMO_S_LOCK));
+		break;
+	default:
 		mtr_s_lock(dict_index_get_lock(index), mtr);
 	}
 
@@ -868,6 +904,7 @@ btr_cur_open_at_index_side_func(
 					 RW_NO_LATCH, NULL, BUF_GET,
 					 file, line, mtr);
 		page = buf_block_get_frame(block);
+		ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
 		ut_ad(index->id == btr_page_get_index_id(page));
 
 		block->check_index_page_at_flush = TRUE;
@@ -877,26 +914,40 @@ btr_cur_open_at_index_side_func(
 
 			height = btr_page_get_level(page, mtr);
 			root_height = height;
+			ut_a(height >= level);
+		} else {
+			/* TODO: flag the index corrupted if this fails */
+			ut_ad(height == btr_page_get_level(page, mtr));
 		}
 
-		if (height == 0) {
-			btr_cur_latch_leaves(page, space, zip_size, page_no,
-					     latch_mode, cursor, mtr);
-
-			/* In versions <= 3.23.52 we had forgotten to
-			release the tree latch here. If in an index scan
-			we had to scan far to find a record visible to the
-			current transaction, that could starve others
-			waiting for the tree latch. */
-
-			if ((latch_mode != BTR_MODIFY_TREE)
-			    && (latch_mode != BTR_CONT_MODIFY_TREE)) {
+		if (height == level) {
+			btr_cur_latch_leaves(
+				page, space, zip_size, page_no,
+				latch_mode & ~BTR_ALREADY_S_LATCHED,
+				cursor, mtr);
 
-				/* Release the tree s-latch */
+			if (height == 0) {
+				/* In versions <= 3.23.52 we had
+				forgotten to release the tree latch
+				here. If in an index scan we had to
+				scan far to find a record visible to
+				the current transaction, that could
+				starve others waiting for the tree
+				latch. */
+
+				switch (latch_mode) {
+				case BTR_MODIFY_TREE:
+				case BTR_CONT_MODIFY_TREE:
+				case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED:
+				case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED:
+					break;
+				default:
+					/* Release the tree s-latch */
 
-				mtr_release_s_latch_at_savepoint(
-					mtr, savepoint,
-					dict_index_get_lock(index));
+					mtr_release_s_latch_at_savepoint(
+						mtr, savepoint,
+						dict_index_get_lock(index));
+				}
 			}
 		}
 
@@ -906,7 +957,7 @@ btr_cur_open_at_index_side_func(
 			page_cur_set_after_last(block, page_cursor);
 		}
 
-		if (height == 0) {
+		if (height == level) {
 			if (estimate) {
 				btr_cur_add_path_info(cursor, height,
 						      root_height);
@@ -965,9 +1016,12 @@ btr_cur_open_at_rnd_pos_func(
 	ulint*		offsets		= offsets_;
 	rec_offs_init(offsets_);
 
-	if (latch_mode == BTR_MODIFY_TREE) {
+	switch (latch_mode) {
+	case BTR_MODIFY_TREE:
 		mtr_x_lock(dict_index_get_lock(index), mtr);
-	} else {
+		break;
+	default:
+		ut_ad(latch_mode != BTR_CONT_MODIFY_TREE);
 		mtr_s_lock(dict_index_get_lock(index), mtr);
 	}
 
@@ -988,6 +1042,7 @@ btr_cur_open_at_rnd_pos_func(
 					 RW_NO_LATCH, NULL, BUF_GET,
 					 file, line, mtr);
 		page = buf_block_get_frame(block);
+		ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
 		ut_ad(index->id == btr_page_get_index_id(page));
 
 		if (height == ULINT_UNDEFINED) {
@@ -1032,7 +1087,7 @@ be freed by reorganizing. Differs from btr_cur_optimistic_insert because
 no heuristics is applied to whether it pays to use CPU time for
 reorganizing the page or not.
 @return	pointer to inserted record if succeed, else NULL */
-static
+static __attribute__((nonnull, warn_unused_result))
 rec_t*
 btr_cur_insert_if_possible(
 /*=======================*/
@@ -1040,6 +1095,8 @@ btr_cur_insert_if_possible(
 				cursor stays valid */
 	const dtuple_t*	tuple,	/*!< in: tuple to insert; the size info need not
 				have been stored to tuple */
+	ulint**		offsets,/*!< out: offsets on *rec */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
@@ -1055,8 +1112,8 @@ btr_cur_insert_if_possible(
 	page_cursor = btr_cur_get_page_cur(cursor);
 
 	/* Now, try the insert */
-	rec = page_cur_tuple_insert(page_cursor, tuple,
-				    cursor->index, n_ext, mtr);
+	rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
+				    offsets, heap, n_ext, mtr);
 
 	if (UNIV_UNLIKELY(!rec)) {
 		/* If record did not fit, reorganize */
@@ -1066,19 +1123,21 @@ btr_cur_insert_if_possible(
 			page_cur_search(block, cursor->index, tuple,
 					PAGE_CUR_LE, page_cursor);
 
-			rec = page_cur_tuple_insert(page_cursor, tuple,
-						    cursor->index, n_ext, mtr);
+			rec = page_cur_tuple_insert(
+				page_cursor, tuple, cursor->index,
+				offsets, heap, n_ext, mtr);
 		}
 	}
 
+	ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
 	return(rec);
 }
 
 /*************************************************************//**
 For an insert, checks the locks and does the undo logging if desired.
 @return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INLINE
-ulint
+UNIV_INLINE __attribute__((warn_unused_result, nonnull(2,3,5,6)))
+dberr_t
 btr_cur_ins_lock_and_undo(
 /*======================*/
 	ulint		flags,	/*!< in: undo logging and locking flags: if
@@ -1093,7 +1152,7 @@ btr_cur_ins_lock_and_undo(
 				successor record */
 {
 	dict_index_t*	index;
-	ulint		err;
+	dberr_t		err;
 	rec_t*		rec;
 	roll_ptr_t	roll_ptr;
 
@@ -1103,6 +1162,10 @@ btr_cur_ins_lock_and_undo(
 	rec = btr_cur_get_rec(cursor);
 	index = cursor->index;
 
+	ut_ad(!dict_index_is_online_ddl(index)
+	      || dict_index_is_clust(index)
+	      || (flags & BTR_CREATE_FLAG));
+
 	err = lock_rec_insert_check_and_lock(flags, rec,
 					     btr_cur_get_block(cursor),
 					     index, thr, mtr, inherit);
@@ -1115,7 +1178,7 @@ btr_cur_ins_lock_and_undo(
 
 	err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP,
 					    thr, index, entry,
-					    NULL, 0, NULL,
+					    NULL, 0, NULL, NULL,
 					    &roll_ptr);
 	if (err != DB_SUCCESS) {
 
@@ -1140,13 +1203,13 @@ static
 void
 btr_cur_trx_report(
 /*===============*/
-	trx_t*			trx,	/*!< in: transaction */
+	trx_id_t		trx_id,	/*!< in: transaction id */
 	const dict_index_t*	index,	/*!< in: index */
 	const char*		op)	/*!< in: operation */
 {
-	fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ", trx->id);
+	fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ", trx_id);
 	fputs(op, stderr);
-	dict_index_name_print(stderr, trx, index);
+	dict_index_name_print(stderr, NULL, index);
 	putc('\n', stderr);
 }
 #endif /* UNIV_DEBUG */
@@ -1159,7 +1222,7 @@ one record on the page, the insert will always succeed; this is to
 prevent trying to split a page with just one record.
 @return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
 UNIV_INTERN
-ulint
+dberr_t
 btr_cur_optimistic_insert(
 /*======================*/
 	ulint		flags,	/*!< in: undo logging and locking flags: if not
@@ -1167,6 +1230,8 @@ btr_cur_optimistic_insert(
 				specified */
 	btr_cur_t*	cursor,	/*!< in: cursor on page after which to insert;
 				cursor stays valid */
+	ulint**		offsets,/*!< out: offsets on *rec */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	dtuple_t*	entry,	/*!< in/out: entry to insert */
 	rec_t**		rec,	/*!< out: pointer to inserted record if
 				succeed */
@@ -1193,13 +1258,16 @@ btr_cur_optimistic_insert(
 	ibool		inherit;
 	ulint		zip_size;
 	ulint		rec_size;
-	ulint		err;
+	dberr_t		err;
 
 	*big_rec = NULL;
 
 	block = btr_cur_get_block(cursor);
 	page = buf_block_get_frame(block);
 	index = cursor->index;
+	ut_ad(!dict_index_is_online_ddl(index)
+	      || dict_index_is_clust(index)
+	      || (flags & BTR_CREATE_FLAG));
 	zip_size = buf_block_get_zip_size(block);
 #ifdef UNIV_DEBUG_VALGRIND
 	if (zip_size) {
@@ -1214,7 +1282,7 @@ btr_cur_optimistic_insert(
 	}
 #ifdef UNIV_DEBUG
 	if (btr_cur_print_record_ops && thr) {
-		btr_cur_trx_report(thr_get_trx(thr), index, "insert into ");
+		btr_cur_trx_report(thr_get_trx(thr)->id, index, "insert ");
 		dtuple_print(stderr, entry);
 	}
 #endif /* UNIV_DEBUG */
@@ -1276,6 +1344,9 @@ btr_cur_optimistic_insert(
 		}
 	}
 
+	LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
+				      goto fail);
+
 	/* If there have been many consecutive inserts, and we are on the leaf
 	level, check if we have to split the page to reserve enough free space
 	for future updates of records. */
@@ -1305,6 +1376,15 @@ fail_err:
 		goto fail;
 	}
 
+	/* If compression padding tells us that insertion will result in
+	too packed up page i.e.: which is likely to cause compression
+	failure then don't do an optimistic insertion. */
+	if (zip_size && leaf
+	    && (page_get_data_size(page) + rec_size
+		>= dict_index_zip_pad_optimal_page_size(index))) {
+
+		goto fail;
+	}
 	/* Check locks and write to the undo log, if specified */
 	err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
 					thr, mtr, &inherit);
@@ -1321,7 +1401,7 @@ fail_err:
 	{
 		const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
 		*rec = page_cur_tuple_insert(page_cursor, entry, index,
-					     n_ext, mtr);
+					     offsets, heap, n_ext, mtr);
 		reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
 
 		if (UNIV_UNLIKELY(reorg)) {
@@ -1351,7 +1431,7 @@ fail_err:
 		page_cur_search(block, index, entry, PAGE_CUR_LE, page_cursor);
 
 		*rec = page_cur_tuple_insert(page_cursor, entry, index,
-					     n_ext, mtr);
+					     offsets, heap, n_ext, mtr);
 
 		if (UNIV_UNLIKELY(!*rec)) {
 			if (zip_size != 0) {
@@ -1426,7 +1506,7 @@ made on the leaf level, to avoid deadlocks, mtr must also own x-latches
 to brothers of page, if those brothers exist.
 @return	DB_SUCCESS or error number */
 UNIV_INTERN
-ulint
+dberr_t
 btr_cur_pessimistic_insert(
 /*=======================*/
 	ulint		flags,	/*!< in: undo logging and locking flags: if not
@@ -1437,6 +1517,9 @@ btr_cur_pessimistic_insert(
 				insertion will certainly succeed */
 	btr_cur_t*	cursor,	/*!< in: cursor after which to insert;
 				cursor stays valid */
+	ulint**		offsets,/*!< out: offsets on *rec */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap
+				that can be emptied, or NULL */
 	dtuple_t*	entry,	/*!< in/out: entry to insert */
 	rec_t**		rec,	/*!< out: pointer to inserted record if
 				succeed */
@@ -1450,8 +1533,7 @@ btr_cur_pessimistic_insert(
 	dict_index_t*	index		= cursor->index;
 	ulint		zip_size	= dict_table_zip_size(index->table);
 	big_rec_t*	big_rec_vec	= NULL;
-	mem_heap_t*	heap		= NULL;
-	ulint		err;
+	dberr_t		err;
 	ibool		dummy_inh;
 	ibool		success;
 	ulint		n_extents	= 0;
@@ -1466,6 +1548,9 @@ btr_cur_pessimistic_insert(
 				MTR_MEMO_X_LOCK));
 	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
 				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(!dict_index_is_online_ddl(index)
+	      || dict_index_is_clust(index)
+	      || (flags & BTR_CREATE_FLAG));
 
 	cursor->flag = BTR_CUR_BINARY;
 
@@ -1523,13 +1608,11 @@ btr_cur_pessimistic_insert(
 	    == buf_block_get_page_no(btr_cur_get_block(cursor))) {
 
 		/* The page is the root page */
-		*rec = btr_root_raise_and_insert(cursor, entry, n_ext, mtr);
+		*rec = btr_root_raise_and_insert(
+			flags, cursor, offsets, heap, entry, n_ext, mtr);
 	} else {
-		*rec = btr_page_split_and_insert(cursor, entry, n_ext, mtr);
-	}
-
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
+		*rec = btr_page_split_and_insert(
+			flags, cursor, offsets, heap, entry, n_ext, mtr);
 	}
 
 	ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
@@ -1556,29 +1639,36 @@ btr_cur_pessimistic_insert(
 /*************************************************************//**
 For an update, checks the locks and does the undo logging.
 @return	DB_SUCCESS, DB_WAIT_LOCK, or error number */
-UNIV_INLINE
-ulint
+UNIV_INLINE __attribute__((warn_unused_result, nonnull(2,3,6,7)))
+dberr_t
 btr_cur_upd_lock_and_undo(
 /*======================*/
 	ulint		flags,	/*!< in: undo logging and locking flags */
 	btr_cur_t*	cursor,	/*!< in: cursor on record to update */
+	const ulint*	offsets,/*!< in: rec_get_offsets() on cursor */
 	const upd_t*	update,	/*!< in: update vector */
 	ulint		cmpl_info,/*!< in: compiler info on secondary index
 				updates */
-	que_thr_t*	thr,	/*!< in: query thread */
+	que_thr_t*	thr,	/*!< in: query thread
+				(can be NULL if BTR_NO_LOCKING_FLAG) */
 	mtr_t*		mtr,	/*!< in/out: mini-transaction */
 	roll_ptr_t*	roll_ptr)/*!< out: roll pointer */
 {
 	dict_index_t*	index;
-	rec_t*		rec;
-	ulint		err;
+	const rec_t*	rec;
+	dberr_t		err;
 
-	ut_ad(cursor && update && thr && roll_ptr);
+	ut_ad(thr || (flags & BTR_NO_LOCKING_FLAG));
 
 	rec = btr_cur_get_rec(cursor);
 	index = cursor->index;
 
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
 	if (!dict_index_is_clust(index)) {
+		ut_ad(dict_index_is_online_ddl(index)
+		      == !!(flags & BTR_CREATE_FLAG));
+
 		/* We do undo logging only when we update a clustered index
 		record */
 		return(lock_sec_rec_modify_check_and_lock(
@@ -1589,50 +1679,39 @@ btr_cur_upd_lock_and_undo(
 	/* Check if we have to wait for a lock: enqueue an explicit lock
 	request if yes */
 
-	err = DB_SUCCESS;
-
 	if (!(flags & BTR_NO_LOCKING_FLAG)) {
-		mem_heap_t*	heap		= NULL;
-		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-		rec_offs_init(offsets_);
-
 		err = lock_clust_rec_modify_check_and_lock(
 			flags, btr_cur_get_block(cursor), rec, index,
-			rec_get_offsets(rec, index, offsets_,
-					ULINT_UNDEFINED, &heap), thr);
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
+			offsets, thr);
 		if (err != DB_SUCCESS) {
-
 			return(err);
 		}
 	}
 
 	/* Append the info about the update in the undo log */
 
-	err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
-					    index, NULL, update,
-					    cmpl_info, rec, roll_ptr);
-	return(err);
+	return(trx_undo_report_row_operation(
+		       flags, TRX_UNDO_MODIFY_OP, thr,
+		       index, NULL, update,
+		       cmpl_info, rec, offsets, roll_ptr));
 }
 
 /***********************************************************//**
 Writes a redo log record of updating a record in-place. */
-UNIV_INLINE
+UNIV_INLINE __attribute__((nonnull))
 void
 btr_cur_update_in_place_log(
 /*========================*/
 	ulint		flags,		/*!< in: flags */
-	rec_t*		rec,		/*!< in: record */
-	dict_index_t*	index,		/*!< in: index where cursor positioned */
+	const rec_t*	rec,		/*!< in: record */
+	dict_index_t*	index,		/*!< in: index of the record */
 	const upd_t*	update,		/*!< in: update vector */
-	trx_t*		trx,		/*!< in: transaction */
+	trx_id_t	trx_id,		/*!< in: transaction id */
 	roll_ptr_t	roll_ptr,	/*!< in: roll ptr */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
-	byte*	log_ptr;
-	page_t*	page	= page_align(rec);
+	byte*		log_ptr;
+	const page_t*	page	= page_align(rec);
 	ut_ad(flags < 256);
 	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
 
@@ -1657,8 +1736,8 @@ btr_cur_update_in_place_log(
 	mach_write_to_1(log_ptr, flags);
 	log_ptr++;
 
-	log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
-						mtr);
+	log_ptr = row_upd_write_sys_vals_to_log(
+		index, trx_id, roll_ptr, log_ptr, mtr);
 	mach_write_to_2(log_ptr, page_offset(rec));
 	log_ptr += 2;
 
@@ -1761,6 +1840,13 @@ btr_cur_update_alloc_zip(
 				FALSE=update-in-place */
 	mtr_t*		mtr)	/*!< in: mini-transaction */
 {
+
+	/* Have a local copy of the variables as these can change
+	dynamically. */
+	bool	log_compressed = page_log_compressed_pages;
+	ulint	compression_level = page_compression_level;
+	page_t*	page = buf_block_get_frame(block);
+
 	ut_a(page_zip == buf_block_get_page_zip(block));
 	ut_ad(page_zip);
 	ut_ad(!dict_index_is_ibuf(index));
@@ -1776,12 +1862,27 @@ btr_cur_update_alloc_zip(
 		return(FALSE);
 	}
 
-	if (!page_zip_compress(page_zip, buf_block_get_frame(block),
-			       index, mtr)) {
+	page = buf_block_get_frame(block);
+
+	if (create && page_is_leaf(page)
+	    && (length + page_get_data_size(page)
+		>= dict_index_zip_pad_optimal_page_size(index))) {
+
+		return(FALSE);
+	}
+
+	if (!page_zip_compress(
+		page_zip, page, index, compression_level,
+		log_compressed ? mtr : NULL)) {
 		/* Unable to compress the page */
 		return(FALSE);
 	}
 
+	if (mtr && !log_compressed) {
+		page_zip_compress_write_log_no_data(
+			compression_level, page, index, mtr);
+	}
+
 	/* After recompressing a page, we must make sure that the free
 	bits in the insert buffer bitmap will not exceed the free
 	space on the page.  Because this function will not attempt
@@ -1795,8 +1896,7 @@ btr_cur_update_alloc_zip(
 	if (!page_zip_available(page_zip, dict_index_is_clust(index),
 				length, create)) {
 		/* Out of space: reset the free bits. */
-		if (!dict_index_is_clust(index)
-		    && page_is_leaf(buf_block_get_frame(block))) {
+		if (!dict_index_is_clust(index) && page_is_leaf(page)) {
 			ibuf_reset_free_bits(block);
 		}
 		return(FALSE);
@@ -1810,45 +1910,50 @@ Updates a record when the update causes no size changes in its fields.
 We assume here that the ordering fields of the record do not change.
 @return	DB_SUCCESS or error number */
 UNIV_INTERN
-ulint
+dberr_t
 btr_cur_update_in_place(
 /*====================*/
 	ulint		flags,	/*!< in: undo logging and locking flags */
 	btr_cur_t*	cursor,	/*!< in: cursor on the record to update;
 				cursor stays valid and positioned on the
 				same record */
+	const ulint*	offsets,/*!< in: offsets on cursor->page_cur.rec */
 	const upd_t*	update,	/*!< in: update vector */
 	ulint		cmpl_info,/*!< in: compiler info on secondary index
 				updates */
-	que_thr_t*	thr,	/*!< in: query thread */
+	que_thr_t*	thr,	/*!< in: query thread, or NULL if
+				appropriate flags are set */
+	trx_id_t	trx_id,	/*!< in: transaction id */
 	mtr_t*		mtr)	/*!< in: mtr; must be committed before
 				latching any further pages */
 {
 	dict_index_t*	index;
 	buf_block_t*	block;
 	page_zip_des_t*	page_zip;
-	ulint		err;
+	dberr_t		err;
 	rec_t*		rec;
 	roll_ptr_t	roll_ptr	= 0;
-	trx_t*		trx;
 	ulint		was_delete_marked;
 	ibool		is_hashed;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	rec_offs_init(offsets_);
 
 	rec = btr_cur_get_rec(cursor);
 	index = cursor->index;
+	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
 	/* The insert buffer tree should never be updated in place. */
 	ut_ad(!dict_index_is_ibuf(index));
+	ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
+	      || dict_index_is_clust(index));
+	ut_ad(!thr || thr_get_trx(thr)->id == trx_id);
+	ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG)
+	      == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
+		  | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
+	ut_ad(fil_page_get_type(btr_cur_get_page(cursor)) == FIL_PAGE_INDEX);
+	ut_ad(btr_page_get_index_id(btr_cur_get_page(cursor)) == index->id);
 
-	trx = thr_get_trx(thr);
-	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
 #ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops && thr) {
-		btr_cur_trx_report(trx, index, "update ");
+	if (btr_cur_print_record_ops) {
+		btr_cur_trx_report(trx_id, index, "update ");
 		rec_print_new(stderr, rec, offsets);
 	}
 #endif /* UNIV_DEBUG */
@@ -1864,19 +1969,17 @@ btr_cur_update_in_place(
 	}
 
 	/* Do lock checking and undo logging */
-	err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+	err = btr_cur_upd_lock_and_undo(flags, cursor, offsets,
+					update, cmpl_info,
 					thr, mtr, &roll_ptr);
 	if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
 
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
 		return(err);
 	}
 
 	if (!(flags & BTR_KEEP_SYS_FLAG)) {
-		row_upd_rec_sys_fields(rec, NULL,
-				       index, offsets, trx, roll_ptr);
+		row_upd_rec_sys_fields(rec, NULL, index, offsets,
+				       thr_get_trx(thr), roll_ptr);
 	}
 
 	was_delete_marked = rec_get_deleted_flag(
@@ -1917,7 +2020,7 @@ btr_cur_update_in_place(
 	}
 
 	btr_cur_update_in_place_log(flags, rec, index, update,
-				    trx, roll_ptr, mtr);
+				    trx_id, roll_ptr, mtr);
 
 	if (was_delete_marked
 	    && !rec_get_deleted_flag(
@@ -1929,9 +2032,6 @@ btr_cur_update_in_place(
 					     rec, index, offsets, mtr);
 	}
 
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
 	return(DB_SUCCESS);
 }
 
@@ -1945,24 +2045,28 @@ fields of the record do not change.
 DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
 there is not enough space left on the compressed page */
 UNIV_INTERN
-ulint
+dberr_t
 btr_cur_optimistic_update(
 /*======================*/
 	ulint		flags,	/*!< in: undo logging and locking flags */
 	btr_cur_t*	cursor,	/*!< in: cursor on the record to update;
 				cursor stays valid and positioned on the
 				same record */
+	ulint**		offsets,/*!< out: offsets on cursor->page_cur.rec */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	const upd_t*	update,	/*!< in: update vector; this must also
 				contain trx id and roll ptr fields */
 	ulint		cmpl_info,/*!< in: compiler info on secondary index
 				updates */
-	que_thr_t*	thr,	/*!< in: query thread */
+	que_thr_t*	thr,	/*!< in: query thread, or NULL if
+				appropriate flags are set */
+	trx_id_t	trx_id,	/*!< in: transaction id */
 	mtr_t*		mtr)	/*!< in: mtr; must be committed before
 				latching any further pages */
 {
 	dict_index_t*	index;
 	page_cur_t*	page_cursor;
-	ulint		err;
+	dberr_t		err;
 	buf_block_t*	block;
 	page_t*		page;
 	page_zip_des_t*	page_zip;
@@ -1972,10 +2076,8 @@ btr_cur_optimistic_update(
 	ulint		old_rec_size;
 	dtuple_t*	new_entry;
 	roll_ptr_t	roll_ptr;
-	mem_heap_t*	heap;
 	ulint		i;
 	ulint		n_ext;
-	ulint*		offsets;
 
 	block = btr_cur_get_block(cursor);
 	page = buf_block_get_frame(block);
@@ -1985,39 +2087,46 @@ btr_cur_optimistic_update(
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	/* The insert buffer tree should never be updated in place. */
 	ut_ad(!dict_index_is_ibuf(index));
-
-	heap = mem_heap_create(1024);
-	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+	ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
+	      || dict_index_is_clust(index));
+	ut_ad(!thr || thr_get_trx(thr)->id == trx_id);
+	ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG)
+	      == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
+		  | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
+	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	ut_ad(btr_page_get_index_id(page) == index->id);
+
+	*offsets = rec_get_offsets(rec, index, *offsets,
+				   ULINT_UNDEFINED, heap);
 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
-	ut_a(!rec_offs_any_null_extern(rec, offsets)
+	ut_a(!rec_offs_any_null_extern(rec, *offsets)
 	     || trx_is_recv(thr_get_trx(thr)));
 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
 #ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops && thr) {
-		btr_cur_trx_report(thr_get_trx(thr), index, "update ");
-		rec_print_new(stderr, rec, offsets);
+	if (btr_cur_print_record_ops) {
+		btr_cur_trx_report(trx_id, index, "update ");
+		rec_print_new(stderr, rec, *offsets);
 	}
 #endif /* UNIV_DEBUG */
 
-	if (!row_upd_changes_field_size_or_external(index, offsets, update)) {
+	if (!row_upd_changes_field_size_or_external(index, *offsets, update)) {
 
 		/* The simplest and the most common case: the update does not
 		change the size of any field and none of the updated fields is
 		externally stored in rec or update, and there is enough space
 		on the compressed page to log the update. */
 
-		mem_heap_free(heap);
-		return(btr_cur_update_in_place(flags, cursor, update,
-					       cmpl_info, thr, mtr));
+		return(btr_cur_update_in_place(
+			       flags, cursor, *offsets, update,
+			       cmpl_info, thr, trx_id, mtr));
 	}
 
-	if (rec_offs_any_extern(offsets)) {
+	if (rec_offs_any_extern(*offsets)) {
 any_extern:
 		/* Externally stored fields are treated in pessimistic
 		update */
 
-		mem_heap_free(heap);
 		return(DB_OVERFLOW);
 	}
 
@@ -2030,8 +2139,14 @@ any_extern:
 
 	page_cursor = btr_cur_get_page_cur(cursor);
 
-	new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
-					   &n_ext, heap);
+	if (!*heap) {
+		*heap = mem_heap_create(
+			rec_offs_size(*offsets)
+			+ DTUPLE_EST_ALLOC(rec_offs_n_fields(*offsets)));
+	}
+
+	new_entry = row_rec_to_index_entry(rec, index, *offsets,
+					   &n_ext, *heap);
 	/* We checked above that there are no externally stored fields. */
 	ut_a(!n_ext);
 
@@ -2039,8 +2154,8 @@ any_extern:
 	corresponding to new_entry is latched in mtr.
 	Thus the following call is safe. */
 	row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
-						     FALSE, heap);
-	old_rec_size = rec_offs_size(offsets);
+						     FALSE, *heap);
+	old_rec_size = rec_offs_size(*offsets);
 	new_rec_size = rec_get_converted_size(index, new_entry, 0);
 
 	page_zip = buf_block_get_page_zip(block);
@@ -2051,16 +2166,14 @@ any_extern:
 	if (page_zip
 	    && !btr_cur_update_alloc_zip(page_zip, block, index,
 					 new_rec_size, TRUE, mtr)) {
-		err = DB_ZIP_OVERFLOW;
-		goto err_exit;
+		return(DB_ZIP_OVERFLOW);
 	}
 
 	if (UNIV_UNLIKELY(new_rec_size
 			  >= (page_get_free_space_of_empty(page_is_comp(page))
 			      / 2))) {
 
-		err = DB_OVERFLOW;
-		goto err_exit;
+		return(DB_OVERFLOW);
 	}
 
 	if (UNIV_UNLIKELY(page_get_data_size(page)
@@ -2069,8 +2182,7 @@ any_extern:
 
 		/* The page would become too empty */
 
-		err = DB_UNDERFLOW;
-		goto err_exit;
+		return(DB_UNDERFLOW);
 	}
 
 	/* We do not attempt to reorganize if the page is compressed.
@@ -2088,16 +2200,16 @@ any_extern:
 		reorganize: for simplicity, we decide what to do assuming a
 		reorganization is needed, though it might not be necessary */
 
-		err = DB_OVERFLOW;
-		goto err_exit;
+		return(DB_OVERFLOW);
 	}
 
 	/* Do lock checking and undo logging */
-	err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+	err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets,
+					update, cmpl_info,
 					thr, mtr, &roll_ptr);
 	if (err != DB_SUCCESS) {
 
-		goto err_exit;
+		return(err);
 	}
 
 	/* Ok, we may do the replacement. Store on the page infimum the
@@ -2108,13 +2220,7 @@ any_extern:
 
 	btr_search_update_hash_on_delete(cursor);
 
-	/* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
-	invokes rec_offs_make_valid() to point to the copied record that
-	the fields of new_entry point to.  We have to undo it here. */
-	ut_ad(rec_offs_validate(NULL, index, offsets));
-	rec_offs_make_valid(page_cur_get_rec(page_cursor), index, offsets);
-
-	page_cur_delete_rec(page_cursor, index, offsets, mtr);
+	page_cur_delete_rec(page_cursor, index, *offsets, mtr);
 
 	page_cur_move_to_prev(page_cursor);
 
@@ -2122,11 +2228,12 @@ any_extern:
 		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
 					      roll_ptr);
 		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
-					      thr_get_trx(thr)->id);
+					      trx_id);
 	}
 
 	/* There are no externally stored columns in new_entry */
-	rec = btr_cur_insert_if_possible(cursor, new_entry, 0/*n_ext*/, mtr);
+	rec = btr_cur_insert_if_possible(
+		cursor, new_entry, offsets, heap, 0/*n_ext*/, mtr);
 	ut_a(rec); /* <- We calculated above the insert would fit */
 
 	if (page_zip && !dict_index_is_clust(index)
@@ -2141,10 +2248,7 @@ any_extern:
 
 	page_cur_move_to_next(page_cursor);
 
-	err = DB_SUCCESS;
-err_exit:
-	mem_heap_free(heap);
-	return(err);
+	return(DB_SUCCESS);
 }
 
 /*************************************************************//**
@@ -2203,7 +2307,7 @@ own x-latches to brothers of page, if those brothers exist. We assume
 here that the ordering fields of the record do not change.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 btr_cur_pessimistic_update(
 /*=======================*/
 	ulint		flags,	/*!< in: undo logging, locking, and rollback
@@ -2211,7 +2315,13 @@ btr_cur_pessimistic_update(
 	btr_cur_t*	cursor,	/*!< in/out: cursor on the record to update;
 				cursor may become invalid if *big_rec == NULL
 				|| !(flags & BTR_KEEP_POS_FLAG) */
-	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
+	ulint**		offsets,/*!< out: offsets on cursor->page_cur.rec */
+	mem_heap_t**	offsets_heap,
+				/*!< in/out: pointer to memory heap
+				that can be emptied, or NULL */
+	mem_heap_t*	entry_heap,
+				/*!< in/out: memory heap for allocating
+				big_rec and the index tuple */
 	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
 				be stored externally by the caller, or NULL */
 	const upd_t*	update,	/*!< in: update vector; this is allowed also
@@ -2219,7 +2329,9 @@ btr_cur_pessimistic_update(
 				the values in update vector have no effect */
 	ulint		cmpl_info,/*!< in: compiler info on secondary index
 				updates */
-	que_thr_t*	thr,	/*!< in: query thread */
+	que_thr_t*	thr,	/*!< in: query thread, or NULL if
+				appropriate flags are set */
+	trx_id_t	trx_id,	/*!< in: transaction id */
 	mtr_t*		mtr)	/*!< in: mtr; must be committed before
 				latching any further pages */
 {
@@ -2231,17 +2343,15 @@ btr_cur_pessimistic_update(
 	page_zip_des_t*	page_zip;
 	rec_t*		rec;
 	page_cur_t*	page_cursor;
-	dtuple_t*	new_entry;
-	ulint		err;
-	ulint		optim_err;
+	dberr_t		err;
+	dberr_t		optim_err;
 	roll_ptr_t	roll_ptr;
-	trx_t*		trx;
 	ibool		was_first;
 	ulint		n_extents	= 0;
 	ulint		n_reserved;
 	ulint		n_ext;
-	ulint*		offsets		= NULL;
 
+	*offsets = NULL;
 	*big_rec = NULL;
 
 	block = btr_cur_get_block(cursor);
@@ -2258,9 +2368,16 @@ btr_cur_pessimistic_update(
 #endif /* UNIV_ZIP_DEBUG */
 	/* The insert buffer tree should never be updated in place. */
 	ut_ad(!dict_index_is_ibuf(index));
+	ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
+	      || dict_index_is_clust(index));
+	ut_ad(!thr || thr_get_trx(thr)->id == trx_id);
+	ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG)
+	      == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
+		  | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
 
-	optim_err = btr_cur_optimistic_update(flags, cursor, update,
-					      cmpl_info, thr, mtr);
+	optim_err = btr_cur_optimistic_update(
+		flags, cursor, offsets, offsets_heap, update,
+		cmpl_info, thr, trx_id, mtr);
 
 	switch (optim_err) {
 	case DB_UNDERFLOW:
@@ -2272,7 +2389,8 @@ btr_cur_pessimistic_update(
 	}
 
 	/* Do lock checking and undo logging */
-	err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+	err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets,
+					update, cmpl_info,
 					thr, mtr, &roll_ptr);
 	if (err != DB_SUCCESS) {
 
@@ -2300,20 +2418,11 @@ btr_cur_pessimistic_update(
 		}
 	}
 
-	if (!*heap) {
-		*heap = mem_heap_create(1024);
-	}
-	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, heap);
-
-	trx = thr_get_trx(thr);
+	*offsets = rec_get_offsets(
+		rec, index, *offsets, ULINT_UNDEFINED, offsets_heap);
 
-	new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
-					   &n_ext, *heap);
-	/* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
-	invokes rec_offs_make_valid() to point to the copied record that
-	the fields of new_entry point to.  We have to undo it here. */
-	ut_ad(rec_offs_validate(NULL, index, offsets));
-	rec_offs_make_valid(rec, index, offsets);
+	dtuple_t*	new_entry = row_rec_to_index_entry(
+		rec, index, *offsets, &n_ext, entry_heap);
 
 	/* The page containing the clustered index record
 	corresponding to new_entry is latched in mtr.  If the
@@ -2322,15 +2431,15 @@ btr_cur_pessimistic_update(
 	purge would also have removed the clustered index record
 	itself.  Thus the following call is safe. */
 	row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
-						     FALSE, *heap);
+						     FALSE, entry_heap);
 	if (!(flags & BTR_KEEP_SYS_FLAG)) {
 		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
 					      roll_ptr);
 		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
-					      trx->id);
+					      trx_id);
 	}
 
-	if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(offsets)) {
+	if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(*offsets)) {
 		/* We are in a transaction rollback undoing a row
 		update: we must free possible externally stored fields
 		which got new values in the update, if they are not
@@ -2341,16 +2450,17 @@ btr_cur_pessimistic_update(
 		ut_ad(big_rec_vec == NULL);
 
 		btr_rec_free_updated_extern_fields(
-			index, rec, page_zip, offsets, update,
-			trx_is_recv(trx) ? RB_RECOVERY : RB_NORMAL, mtr);
+			index, rec, page_zip, *offsets, update,
+			trx_is_recv(thr_get_trx(thr))
+			? RB_RECOVERY : RB_NORMAL, mtr);
 	}
 
 	/* We have to set appropriate extern storage bits in the new
 	record to be inserted: we have to remember which fields were such */
 
 	ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
-	ut_ad(rec_offs_validate(rec, index, offsets));
-	n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
+	ut_ad(rec_offs_validate(rec, index, *offsets));
+	n_ext += btr_push_update_extern_fields(new_entry, update, entry_heap);
 
 	if (page_zip) {
 		ut_ad(page_is_comp(page));
@@ -2396,11 +2506,12 @@ make_external:
 #endif /* UNIV_ZIP_DEBUG */
 	page_cursor = btr_cur_get_page_cur(cursor);
 
-	page_cur_delete_rec(page_cursor, index, offsets, mtr);
+	page_cur_delete_rec(page_cursor, index, *offsets, mtr);
 
 	page_cur_move_to_prev(page_cursor);
 
-	rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
+	rec = btr_cur_insert_if_possible(cursor, new_entry,
+					 offsets, offsets_heap, n_ext, mtr);
 
 	if (rec) {
 		page_cursor->rec = rec;
@@ -2408,20 +2519,19 @@ make_external:
 		lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
 						   rec, block);
 
-		offsets = rec_get_offsets(rec, index, offsets,
-					  ULINT_UNDEFINED, heap);
-
-		if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+		if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
 			/* The new inserted record owns its possible externally
 			stored fields */
-			btr_cur_unmark_extern_fields(page_zip,
-						     rec, index, offsets, mtr);
+			btr_cur_unmark_extern_fields(
+				page_zip, rec, index, *offsets, mtr);
 		}
 
-		btr_cur_compress_if_useful(
-			cursor,
-			big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG),
-			mtr);
+		bool adjust = big_rec_vec && (flags & BTR_KEEP_POS_FLAG);
+
+		if (btr_cur_compress_if_useful(cursor, adjust, mtr)
+		    && adjust) {
+			rec_offs_make_valid(page_cursor->rec, index, *offsets);
+		}
 
 		if (page_zip && !dict_index_is_clust(index)
 		    && page_is_leaf(page)) {
@@ -2440,8 +2550,7 @@ make_external:
 		ut_a(page_zip || optim_err != DB_UNDERFLOW);
 
 		/* Out of space: reset the free bits. */
-		if (!dict_index_is_clust(index)
-		    && page_is_leaf(page)) {
+		if (!dict_index_is_clust(index) && page_is_leaf(page)) {
 			ibuf_reset_free_bits(block);
 		}
 	}
@@ -2473,11 +2582,13 @@ make_external:
 	err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG
 					 | BTR_NO_LOCKING_FLAG
 					 | BTR_KEEP_SYS_FLAG,
-					 cursor, new_entry, &rec,
+					 cursor, offsets, offsets_heap,
+					 new_entry, &rec,
 					 &dummy_big_rec, n_ext, NULL, mtr);
 	ut_a(rec);
 	ut_a(err == DB_SUCCESS);
 	ut_a(dummy_big_rec == NULL);
+	ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
 	page_cursor->rec = rec;
 
 	if (dict_index_is_sec_or_ibuf(index)) {
@@ -2490,10 +2601,10 @@ make_external:
 
 		page_update_max_trx_id(rec_block,
 				       buf_block_get_page_zip(rec_block),
-				       trx->id, mtr);
+				       trx_id, mtr);
 	}
 
-	if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+	if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
 		/* The new inserted record owns its possible externally
 		stored fields */
 		buf_block_t*	rec_block = btr_cur_get_block(cursor);
@@ -2504,10 +2615,8 @@ make_external:
 #endif /* UNIV_ZIP_DEBUG */
 		page_zip = buf_block_get_page_zip(rec_block);
 
-		offsets = rec_get_offsets(rec, index, offsets,
-					  ULINT_UNDEFINED, heap);
 		btr_cur_unmark_extern_fields(page_zip,
-					     rec, index, offsets, mtr);
+					     rec, index, *offsets, mtr);
 	}
 
 	lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
@@ -2546,17 +2655,13 @@ UNIV_INLINE
 void
 btr_cur_del_mark_set_clust_rec_log(
 /*===============================*/
-	ulint		flags,	/*!< in: flags */
 	rec_t*		rec,	/*!< in: record */
 	dict_index_t*	index,	/*!< in: index of the record */
-	ibool		val,	/*!< in: value to set */
-	trx_t*		trx,	/*!< in: deleting transaction */
+	trx_id_t	trx_id,	/*!< in: transaction id */
 	roll_ptr_t	roll_ptr,/*!< in: roll ptr to the undo log record */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	byte*	log_ptr;
-	ut_ad(flags < 256);
-	ut_ad(val <= 1);
 
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
 
@@ -2572,13 +2677,11 @@ btr_cur_del_mark_set_clust_rec_log(
 		return;
 	}
 
-	mach_write_to_1(log_ptr, flags);
-	log_ptr++;
-	mach_write_to_1(log_ptr, val);
-	log_ptr++;
+	*log_ptr++ = 0;
+	*log_ptr++ = 1;
 
-	log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
-						mtr);
+	log_ptr = row_upd_write_sys_vals_to_log(
+		index, trx_id, roll_ptr, log_ptr, mtr);
 	mach_write_to_2(log_ptr, page_offset(rec));
 	log_ptr += 2;
 
@@ -2675,20 +2778,18 @@ of the deleting transaction, and in the roll ptr field pointer to the
 undo log record created.
 @return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
 UNIV_INTERN
-ulint
+dberr_t
 btr_cur_del_mark_set_clust_rec(
 /*===========================*/
-	ulint		flags,	/*!< in: undo logging and locking flags */
 	buf_block_t*	block,	/*!< in/out: buffer block of the record */
 	rec_t*		rec,	/*!< in/out: record */
 	dict_index_t*	index,	/*!< in: clustered index of the record */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
-	ibool		val,	/*!< in: value to set */
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	roll_ptr_t	roll_ptr;
-	ulint		err;
+	dberr_t		err;
 	page_zip_des_t*	page_zip;
 	trx_t*		trx;
 
@@ -2700,7 +2801,7 @@ btr_cur_del_mark_set_clust_rec(
 
 #ifdef UNIV_DEBUG
 	if (btr_cur_print_record_ops && thr) {
-		btr_cur_trx_report(thr_get_trx(thr), index, "del mark ");
+		btr_cur_trx_report(thr_get_trx(thr)->id, index, "del mark ");
 		rec_print_new(stderr, rec, offsets);
 	}
 #endif /* UNIV_DEBUG */
@@ -2708,7 +2809,7 @@ btr_cur_del_mark_set_clust_rec(
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
 
-	err = lock_clust_rec_modify_check_and_lock(flags, block,
+	err = lock_clust_rec_modify_check_and_lock(BTR_NO_LOCKING_FLAG, block,
 						   rec, index, offsets, thr);
 
 	if (err != DB_SUCCESS) {
@@ -2716,8 +2817,8 @@ btr_cur_del_mark_set_clust_rec(
 		return(err);
 	}
 
-	err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
-					    index, NULL, NULL, 0, rec,
+	err = trx_undo_report_row_operation(0, TRX_UNDO_MODIFY_OP, thr,
+					    index, NULL, NULL, 0, rec, offsets,
 					    &roll_ptr);
 	if (err != DB_SUCCESS) {
 
@@ -2730,17 +2831,21 @@ btr_cur_del_mark_set_clust_rec(
 
 	page_zip = buf_block_get_page_zip(block);
 
-	btr_blob_dbg_set_deleted_flag(rec, index, offsets, val);
-	btr_rec_set_deleted_flag(rec, page_zip, val);
+	btr_blob_dbg_set_deleted_flag(rec, index, offsets, TRUE);
+	btr_rec_set_deleted_flag(rec, page_zip, TRUE);
 
 	trx = thr_get_trx(thr);
 
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
-		row_upd_rec_sys_fields(rec, page_zip,
-				       index, offsets, trx, roll_ptr);
+	if (dict_index_is_online_ddl(index)) {
+		row_log_table_delete(
+			rec, index, offsets,
+			trx_read_trx_id(row_get_trx_id_offset(index, offsets)
+					+ rec));
 	}
 
-	btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
+	row_upd_rec_sys_fields(rec, page_zip, index, offsets, trx, roll_ptr);
+
+	btr_cur_del_mark_set_clust_rec_log(rec, index, trx->id,
 					   roll_ptr, mtr);
 
 	return(err);
@@ -2829,7 +2934,7 @@ btr_cur_parse_del_mark_set_sec_rec(
 Sets a secondary index record delete mark to TRUE or FALSE.
 @return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
 UNIV_INTERN
-ulint
+dberr_t
 btr_cur_del_mark_set_sec_rec(
 /*=========================*/
 	ulint		flags,	/*!< in: locking flag */
@@ -2840,14 +2945,14 @@ btr_cur_del_mark_set_sec_rec(
 {
 	buf_block_t*	block;
 	rec_t*		rec;
-	ulint		err;
+	dberr_t		err;
 
 	block = btr_cur_get_block(cursor);
 	rec = btr_cur_get_rec(cursor);
 
 #ifdef UNIV_DEBUG
 	if (btr_cur_print_record_ops && thr) {
-		btr_cur_trx_report(thr_get_trx(thr), cursor->index,
+		btr_cur_trx_report(thr_get_trx(thr)->id, cursor->index,
 				   "del mark ");
 		rec_print(stderr, rec, cursor->index);
 	}
@@ -2937,12 +3042,15 @@ positioned, but no latch on the whole tree.
 @return	TRUE if success, i.e., the page did not become too empty */
 UNIV_INTERN
 ibool
-btr_cur_optimistic_delete(
-/*======================*/
+btr_cur_optimistic_delete_func(
+/*===========================*/
 	btr_cur_t*	cursor,	/*!< in: cursor on leaf page, on the record to
 				delete; cursor stays valid: if deletion
 				succeeds, on function exit it points to the
 				successor of the deleted record */
+#ifdef UNIV_DEBUG
+	ulint		flags,	/*!< in: BTR_CREATE_FLAG or 0 */
+#endif /* UNIV_DEBUG */
 	mtr_t*		mtr)	/*!< in: mtr; if this function returns
 				TRUE on a leaf page of a secondary
 				index, the mtr must be committed
@@ -2956,6 +3064,7 @@ btr_cur_optimistic_delete(
 	ibool		no_compress_needed;
 	rec_offs_init(offsets_);
 
+	ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
 	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
 				MTR_MEMO_PAGE_X_FIX));
 	/* This is intended only for leaf page deletions */
@@ -2963,6 +3072,9 @@ btr_cur_optimistic_delete(
 	block = btr_cur_get_block(cursor);
 
 	ut_ad(page_is_leaf(buf_block_get_frame(block)));
+	ut_ad(!dict_index_is_online_ddl(cursor->index)
+	      || dict_index_is_clust(cursor->index)
+	      || (flags & BTR_CREATE_FLAG));
 
 	rec = btr_cur_get_rec(cursor);
 	offsets = rec_get_offsets(rec, cursor->index, offsets,
@@ -3030,7 +3142,7 @@ UNIV_INTERN
 ibool
 btr_cur_pessimistic_delete(
 /*=======================*/
-	ulint*		err,	/*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+	dberr_t*	err,	/*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
 				the latter may occur because we may have
 				to update node pointers on upper levels,
 				and in the case of variable length keys
@@ -3043,6 +3155,7 @@ btr_cur_pessimistic_delete(
 				if compression does not occur, the cursor
 				stays valid: it points to successor of
 				deleted record on function exit */
+	ulint		flags,	/*!< in: BTR_CREATE_FLAG or 0 */
 	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
@@ -3051,7 +3164,6 @@ btr_cur_pessimistic_delete(
 	page_zip_des_t*	page_zip;
 	dict_index_t*	index;
 	rec_t*		rec;
-	dtuple_t*	node_ptr;
 	ulint		n_extents	= 0;
 	ulint		n_reserved;
 	ibool		success;
@@ -3064,6 +3176,10 @@ btr_cur_pessimistic_delete(
 	page = buf_block_get_frame(block);
 	index = btr_cur_get_index(cursor);
 
+	ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
+	ut_ad(!dict_index_is_online_ddl(index)
+	      || dict_index_is_clust(index)
+	      || (flags & BTR_CREATE_FLAG));
 	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
 				MTR_MEMO_X_LOCK));
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
@@ -3112,13 +3228,15 @@ btr_cur_pessimistic_delete(
 
 		btr_discard_page(cursor, mtr);
 
-		*err = DB_SUCCESS;
 		ret = TRUE;
 
 		goto return_after_reservations;
 	}
 
-	lock_update_delete(block, rec);
+	if (flags == 0) {
+		lock_update_delete(block, rec);
+	}
+
 	level = btr_page_get_level(page, mtr);
 
 	if (level > 0
@@ -3147,12 +3265,12 @@ btr_cur_pessimistic_delete(
 
 			btr_node_ptr_delete(index, block, mtr);
 
-			node_ptr = dict_index_build_node_ptr(
+			dtuple_t*	node_ptr = dict_index_build_node_ptr(
 				index, next_rec, buf_block_get_page_no(block),
 				heap, level);
 
-			btr_insert_on_non_leaf_level(index,
-						     level + 1, node_ptr, mtr);
+			btr_insert_on_non_leaf_level(
+				flags, index, level + 1, node_ptr, mtr);
 		}
 	}
 
@@ -3165,9 +3283,9 @@ btr_cur_pessimistic_delete(
 
 	ut_ad(btr_check_node_ptr(index, block, mtr));
 
+return_after_reservations:
 	*err = DB_SUCCESS;
 
-return_after_reservations:
 	mem_heap_free(heap);
 
 	if (ret == FALSE) {
@@ -3194,8 +3312,8 @@ btr_cur_add_path_info(
 	ulint		root_height)	/*!< in: root node height in tree */
 {
 	btr_path_t*	slot;
-	rec_t*		rec;
-	page_t*		page;
+	const rec_t*	rec;
+	const page_t*	page;
 
 	ut_a(cursor->path_arr);
 
@@ -3407,6 +3525,9 @@ btr_estimate_n_rows_in_range(
 	ibool		is_n_rows_exact;
 	ulint		i;
 	mtr_t		mtr;
+	ib_int64_t	table_n_rows;
+
+	table_n_rows = dict_table_get_n_rows(index->table);
 
 	mtr_start(&mtr);
 
@@ -3419,9 +3540,9 @@ btr_estimate_n_rows_in_range(
 					    &cursor, 0,
 					    __FILE__, __LINE__, &mtr);
 	} else {
-		btr_cur_open_at_index_side(TRUE, index,
+		btr_cur_open_at_index_side(true, index,
 					   BTR_SEARCH_LEAF | BTR_ESTIMATE,
-					   &cursor, &mtr);
+					   &cursor, 0, &mtr);
 	}
 
 	mtr_commit(&mtr);
@@ -3437,9 +3558,9 @@ btr_estimate_n_rows_in_range(
 					    &cursor, 0,
 					    __FILE__, __LINE__, &mtr);
 	} else {
-		btr_cur_open_at_index_side(FALSE, index,
+		btr_cur_open_at_index_side(false, index,
 					   BTR_SEARCH_LEAF | BTR_ESTIMATE,
-					   &cursor, &mtr);
+					   &cursor, 0, &mtr);
 	}
 
 	mtr_commit(&mtr);
@@ -3471,20 +3592,21 @@ btr_estimate_n_rows_in_range(
 				n_rows = n_rows * 2;
 			}
 
+			DBUG_EXECUTE_IF("bug14007649", return(n_rows););
+
 			/* Do not estimate the number of rows in the range
 			to over 1 / 2 of the estimated rows in the whole
 			table */
 
-			if (n_rows > index->table->stat_n_rows / 2
-			    && !is_n_rows_exact) {
+			if (n_rows > table_n_rows / 2 && !is_n_rows_exact) {
 
-				n_rows = index->table->stat_n_rows / 2;
+				n_rows = table_n_rows / 2;
 
 				/* If there are just 0 or 1 rows in the table,
 				then we estimate all rows are in the range */
 
 				if (n_rows == 0) {
-					n_rows = index->table->stat_n_rows;
+					n_rows = table_n_rows;
 				}
 			}
 
@@ -3544,9 +3666,9 @@ btr_estimate_n_rows_in_range(
 
 /*******************************************************************//**
 Record the number of non_null key values in a given index for
-each n-column prefix of the index where n < dict_index_get_n_unique(index).
+each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
 The estimates are eventually stored in the array:
-index->stat_n_non_null_key_vals. */
+index->stat_n_non_null_key_vals[], which is indexed from 0 to n-1. */
 static
 void
 btr_record_not_null_field_in_rec(
@@ -3557,7 +3679,7 @@ btr_record_not_null_field_in_rec(
 	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index),
 					its size could be for all fields or
 					that of "n_unique" */
-	ib_int64_t*	n_not_null)	/*!< in/out: array to record number of
+	ib_uint64_t*	n_not_null)	/*!< in/out: array to record number of
 					not null rows for n-column prefix */
 {
 	ulint	i;
@@ -3579,11 +3701,12 @@ btr_record_not_null_field_in_rec(
 
 /*******************************************************************//**
 Estimates the number of different key values in a given index, for
-each n-column prefix of the index where n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals[] and
-the number of pages that were sampled is saved in index->stat_n_sample_sizes[].
-If innodb_stats_method is "nulls_ignored", we also record the number of
-non-null values for each prefix and store the estimates in
+each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
+The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
+0..n_uniq-1) and the number of pages that were sampled is saved in
+index->stat_n_sample_sizes[].
+If innodb_stats_method is nulls_ignored, we also record the number of
+non-null values for each prefix and stored the estimates in
 array index->stat_n_non_null_key_vals. */
 UNIV_INTERN
 void
@@ -3597,8 +3720,8 @@ btr_estimate_number_of_different_key_vals(
 	ulint		n_cols;
 	ulint		matched_fields;
 	ulint		matched_bytes;
-	ib_int64_t*	n_diff;
-	ib_int64_t*	n_not_null;
+	ib_uint64_t*	n_diff;
+	ib_uint64_t*	n_not_null;
 	ibool		stats_null_not_equal;
 	ullint		n_sample_pages; /* number of pages to sample */
 	ulint		not_empty_flag	= 0;
@@ -3614,13 +3737,13 @@ btr_estimate_number_of_different_key_vals(
 	n_cols = dict_index_get_n_unique(index);
 
 	heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null)
-			       * (n_cols + 1)
+			       * n_cols
 			       + dict_index_get_n_fields(index)
 			       * (sizeof *offsets_rec
 				  + sizeof *offsets_next_rec));
 
-	n_diff = (ib_int64_t*) mem_heap_zalloc(heap, (n_cols + 1)
-					       * sizeof(ib_int64_t));
+	n_diff = (ib_uint64_t*) mem_heap_zalloc(
+		heap, n_cols * sizeof(ib_int64_t));
 
 	n_not_null = NULL;
 
@@ -3629,8 +3752,8 @@ btr_estimate_number_of_different_key_vals(
 	considered equal (by setting stats_null_not_equal value) */
 	switch (srv_innodb_stats_method) {
 	case SRV_STATS_NULLS_IGNORED:
-		n_not_null = (ib_int64_t*) mem_heap_zalloc(heap, (n_cols + 1)
-					     * sizeof *n_not_null);
+		n_not_null = (ib_uint64_t*) mem_heap_zalloc(
+			heap, n_cols * sizeof *n_not_null);
 		/* fall through */
 
 	case SRV_STATS_NULLS_UNEQUAL:
@@ -3681,7 +3804,7 @@ btr_estimate_number_of_different_key_vals(
 			offsets_rec = rec_get_offsets(rec, index, offsets_rec,
 						      ULINT_UNDEFINED, &heap);
 
-			if (n_not_null) {
+			if (n_not_null != NULL) {
 				btr_record_not_null_field_in_rec(
 					n_cols, offsets_rec, n_not_null);
 			}
@@ -3709,14 +3832,14 @@ btr_estimate_number_of_different_key_vals(
 					       &matched_fields,
 					       &matched_bytes);
 
-			for (j = matched_fields + 1; j <= n_cols; j++) {
+			for (j = matched_fields; j < n_cols; j++) {
 				/* We add one if this index record has
 				a different prefix from the previous */
 
 				n_diff[j]++;
 			}
 
-			if (n_not_null) {
+			if (n_not_null != NULL) {
 				btr_record_not_null_field_in_rec(
 					n_cols, offsets_next_rec, n_not_null);
 			}
@@ -3751,7 +3874,7 @@ btr_estimate_number_of_different_key_vals(
 			if (btr_page_get_prev(page, &mtr) != FIL_NULL
 			    || btr_page_get_next(page, &mtr) != FIL_NULL) {
 
-				n_diff[n_cols]++;
+				n_diff[n_cols - 1]++;
 			}
 		}
 
@@ -3766,7 +3889,7 @@ btr_estimate_number_of_different_key_vals(
 	also the pages used for external storage of fields (those pages are
 	included in index->stat_n_leaf_pages) */
 
-	for (j = 0; j <= n_cols; j++) {
+	for (j = 0; j < n_cols; j++) {
 		index->stat_n_diff_key_vals[j]
 			= BTR_TABLE_STATS_FROM_SAMPLE(
 				n_diff[j], index, n_sample_pages,
@@ -3796,7 +3919,7 @@ btr_estimate_number_of_different_key_vals(
 		sampled result. stat_n_non_null_key_vals[] is created
 		and initialized to zero in dict_index_add_to_cache(),
 		along with stat_n_diff_key_vals[] array */
-		if (n_not_null != NULL && (j < n_cols)) {
+		if (n_not_null != NULL) {
 			index->stat_n_non_null_key_vals[j] =
 				 BTR_TABLE_STATS_FROM_SAMPLE(
 					n_not_null[j], index, n_sample_pages,
@@ -4146,7 +4269,7 @@ The fields are stored on pages allocated from leaf node
 file segment of the index tree.
 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 UNIV_INTERN
-enum db_err
+dberr_t
 btr_store_big_rec_extern_fields(
 /*============================*/
 	dict_index_t*	index,		/*!< in: index of rec; the index tree
@@ -4180,7 +4303,7 @@ btr_store_big_rec_extern_fields(
 	z_stream	c_stream;
 	buf_block_t**	freed_pages	= NULL;
 	ulint		n_freed_pages	= 0;
-	enum db_err	error		= DB_SUCCESS;
+	dberr_t		error		= DB_SUCCESS;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(rec_offs_any_extern(offsets));
@@ -4211,7 +4334,7 @@ btr_store_big_rec_extern_fields(
 		heap = mem_heap_create(250000);
 		page_zip_set_alloc(&c_stream, heap);
 
-		err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
+		err = deflateInit2(&c_stream, page_compression_level,
 				   Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
 		ut_a(err == Z_OK);
 	}
@@ -5083,6 +5206,7 @@ btr_copy_zblob_prefix(
 				" page %lu space %lu\n",
 				(ulong) fil_page_get_type(bpage->zip.data),
 				(ulong) page_no, (ulong) space_id);
+			ut_ad(0);
 			goto end_of_blob;
 		}
 
diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc
index 5a67afc7e69..aceb6bd1d41 100644
--- a/storage/innobase/btr/btr0pcur.cc
+++ b/storage/innobase/btr/btr0pcur.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -133,6 +133,8 @@ btr_pcur_store_position(
 
 		ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
 		ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
+		ut_ad(page_is_leaf(page));
+		ut_ad(page_get_page_no(page) == index->page);
 
 		cursor->old_stored = BTR_PCUR_OLD_STORED;
 
@@ -258,7 +260,8 @@ btr_pcur_restore_position_func(
 
 		btr_cur_open_at_index_side(
 			cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
-			index, latch_mode, btr_pcur_get_btr_cur(cursor), mtr);
+			index, latch_mode,
+			btr_pcur_get_btr_cur(cursor), 0, mtr);
 
 		cursor->latch_mode = latch_mode;
 		cursor->pos_state = BTR_PCUR_IS_POSITIONED;
@@ -326,13 +329,19 @@ btr_pcur_restore_position_func(
 	/* Save the old search mode of the cursor */
 	old_mode = cursor->search_mode;
 
-	if (UNIV_LIKELY(cursor->rel_pos == BTR_PCUR_ON)) {
+	switch (cursor->rel_pos) {
+	case BTR_PCUR_ON:
 		mode = PAGE_CUR_LE;
-	} else if (cursor->rel_pos == BTR_PCUR_AFTER) {
+		break;
+	case BTR_PCUR_AFTER:
 		mode = PAGE_CUR_G;
-	} else {
-		ut_ad(cursor->rel_pos == BTR_PCUR_BEFORE);
+		break;
+	case BTR_PCUR_BEFORE:
 		mode = PAGE_CUR_L;
+		break;
+	default:
+		ut_error;
+		mode = 0;
 	}
 
 	btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode,
@@ -341,25 +350,39 @@ btr_pcur_restore_position_func(
 	/* Restore the old search mode */
 	cursor->search_mode = old_mode;
 
-	if (cursor->rel_pos == BTR_PCUR_ON
-	    && btr_pcur_is_on_user_rec(cursor)
-	    && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
-				   rec_get_offsets(
-					   btr_pcur_get_rec(cursor), index,
-					   NULL, ULINT_UNDEFINED, &heap))) {
-
-		/* We have to store the NEW value for the modify clock, since
-		the cursor can now be on a different page! But we can retain
-		the value of old_rec */
-
-		cursor->block_when_stored = btr_pcur_get_block(cursor);
-		cursor->modify_clock = buf_block_get_modify_clock(
-			cursor->block_when_stored);
-		cursor->old_stored = BTR_PCUR_OLD_STORED;
-
-		mem_heap_free(heap);
-
-		return(TRUE);
+	switch (cursor->rel_pos) {
+	case BTR_PCUR_ON:
+		if (btr_pcur_is_on_user_rec(cursor)
+		    && !cmp_dtuple_rec(
+			    tuple, btr_pcur_get_rec(cursor),
+			    rec_get_offsets(btr_pcur_get_rec(cursor),
+					    index, NULL,
+					    ULINT_UNDEFINED, &heap))) {
+
+			/* We have to store the NEW value for
+			the modify clock, since the cursor can
+			now be on a different page! But we can
+			retain the value of old_rec */
+
+			cursor->block_when_stored =
+				btr_pcur_get_block(cursor);
+			cursor->modify_clock =
+				buf_block_get_modify_clock(
+					cursor->block_when_stored);
+			cursor->old_stored = BTR_PCUR_OLD_STORED;
+
+			mem_heap_free(heap);
+
+			return(TRUE);
+		}
+#ifdef UNIV_DEBUG
+		/* fall through */
+	case BTR_PCUR_BEFORE:
+	case BTR_PCUR_AFTER:
+		break;
+	default:
+		ut_error;
+#endif /* UNIV_DEBUG */
 	}
 
 	mem_heap_free(heap);
diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc
index 7e6e2ef1cb1..432fef05dd5 100644
--- a/storage/innobase/btr/btr0sea.cc
+++ b/storage/innobase/btr/btr0sea.cc
@@ -42,7 +42,6 @@ Created 2/17/1996 Heikki Tuuri
 #include "btr0pcur.h"
 #include "btr0btr.h"
 #include "ha0ha.h"
-#include "srv0mon.h"
 
 /** Flag: has the search system been enabled?
 Protected by btr_search_latch. */
@@ -1077,6 +1076,7 @@ btr_search_drop_page_hash_index(
 	mem_heap_t*		heap;
 	const dict_index_t*	index;
 	ulint*			offsets;
+	btr_search_t*		info;
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
@@ -1102,6 +1102,27 @@ retry:
 	}
 
 	ut_a(!dict_index_is_ibuf(index));
+#ifdef UNIV_DEBUG
+	switch (dict_index_get_online_status(index)) {
+	case ONLINE_INDEX_CREATION:
+		/* The index is being created (bulk loaded). */
+	case ONLINE_INDEX_COMPLETE:
+		/* The index has been published. */
+	case ONLINE_INDEX_ABORTED:
+		/* Either the index creation was aborted due to an
+		error observed by InnoDB (in which case there should
+		not be any adaptive hash index entries), or it was
+		completed and then flagged aborted in
+		rollback_inplace_alter_table(). */
+		break;
+	case ONLINE_INDEX_ABORTED_DROPPED:
+		/* The index should have been dropped from the tablespace
+		already, and the adaptive hash index entries should have
+		been dropped as well. */
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
+
 	table = btr_search_sys->hash_index;
 
 #ifdef UNIV_SYNC_DEBUG
@@ -1196,8 +1217,9 @@ next_rec:
 		ha_remove_all_nodes_to_page(table, folds[i], page);
 	}
 
-	ut_a(index->search_info->ref_count > 0);
-	index->search_info->ref_count--;
+	info = btr_search_get_info(block->index);
+	ut_a(info->ref_count > 0);
+	info->ref_count--;
 
 	block->index = NULL;
 
diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc
index b6774aede8e..e34216dbc8f 100644
--- a/storage/innobase/buf/buf0buddy.cc
+++ b/storage/innobase/buf/buf0buddy.cc
@@ -335,7 +335,7 @@ buf_buddy_relocate(
 {
 	buf_page_t*	bpage;
 	const ulint	size	= BUF_BUDDY_LOW << i;
-	mutex_t*	mutex;
+	ib_mutex_t*	mutex;
 	ulint		space;
 	ulint		page_no;
 
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 96821478e60..6efa14e6791 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -53,7 +53,6 @@ Created 11/5/1995 Heikki Tuuri
 #include "page0zip.h"
 #include "srv0mon.h"
 #include "buf0checksum.h"
-#include "buf0dblwr.h"
 
 /*
 		IMPLEMENTATION OF THE BUFFER POOL
@@ -372,10 +371,6 @@ buf_get_total_list_len(
 
 		buf_pool = buf_pool_from_array(i);
 
-		if (!buf_pool) {
-			continue;
-		}
-
 		*LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
 		*free_len += UT_LIST_GET_LEN(buf_pool->free);
 		*flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
@@ -383,6 +378,32 @@ buf_get_total_list_len(
 }
 
 /********************************************************************//**
+Get total list size in bytes from all buffer pools. */
+UNIV_INTERN
+void
+buf_get_total_list_size_in_bytes(
+/*=============================*/
+	buf_pools_list_size_t*	buf_pools_list_size)	/*!< out: list sizes
+							in all buffer pools */
+{
+	ut_ad(buf_pools_list_size);
+	memset(buf_pools_list_size, 0, sizeof(*buf_pools_list_size));
+
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool_t*	buf_pool;
+
+		buf_pool = buf_pool_from_array(i);
+		/* We don't need mutex protection since this is
+		for statistics purpose */
+		buf_pools_list_size->LRU_bytes += buf_pool->stat.LRU_bytes;
+		buf_pools_list_size->unzip_LRU_bytes +=
+			UT_LIST_GET_LEN(buf_pool->unzip_LRU) * UNIV_PAGE_SIZE;
+		buf_pools_list_size->flush_list_bytes +=
+			buf_pool->stat.flush_list_bytes;
+	}
+}
+
+/********************************************************************//**
 Get total buffer pool statistics. */
 UNIV_INTERN
 void
@@ -400,10 +421,6 @@ buf_get_total_stat(
 
 		buf_pool = buf_pool_from_array(i);
 
-		if (!buf_pool) {
-			continue;
-		}
-
 		buf_stat = &buf_pool->stat;
 		tot_stat->n_page_gets += buf_stat->n_page_gets;
 		tot_stat->n_pages_read += buf_stat->n_pages_read;
@@ -456,6 +473,8 @@ UNIV_INTERN
 ibool
 buf_page_is_corrupted(
 /*==================*/
+	bool		check_lsn,	/*!< in: true if we need to check
+					and complain about the LSN */
 	const byte*	read_buf,	/*!< in: a database page */
 	ulint		zip_size)	/*!< in: size of compressed page;
 					0 for uncompressed pages */
@@ -480,14 +499,17 @@ buf_page_is_corrupted(
 	if (recv_lsn_checks_on) {
 		lsn_t	current_lsn;
 
-		if (log_peek_lsn(&current_lsn)
-		    && UNIV_UNLIKELY
-		    (current_lsn
-		     < mach_read_from_8(read_buf + FIL_PAGE_LSN))) {
+		/* Since we are going to reset the page LSN during the import
+		phase it makes no sense to spam the log with error messages. */
+
+		if (check_lsn
+		    && log_peek_lsn(&current_lsn)
+		    && current_lsn
+		    < mach_read_from_8(read_buf + FIL_PAGE_LSN)) {
 			ut_print_timestamp(stderr);
 
 			fprintf(stderr,
-				"  InnoDB: Error: page %lu log sequence number"
+				" InnoDB: Error: page %lu log sequence number"
 				" " LSN_PF "\n"
 				"InnoDB: is in the future! Current system "
 				"log sequence number " LSN_PF ".\n"
@@ -673,6 +695,8 @@ buf_page_is_corrupted(
 	is added and not handled here */
 	}
 
+	DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(TRUE); );
+
 	return(FALSE);
 }
 
@@ -885,7 +909,7 @@ pfs_register_buffer_block(
 				 PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
 
 	for (i = 0; i < num_to_register; i++) {
-		mutex_t*	mutex;
+		ib_mutex_t*	mutex;
 		rw_lock_t*	rwlock;
 
 #  ifdef UNIV_PFS_MUTEX
@@ -1267,7 +1291,7 @@ buf_pool_init_instance(
 		     SYNC_BUF_FLUSH_LIST);
 
 	for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
-		buf_pool->no_flush[i] = os_event_create(NULL);
+		buf_pool->no_flush[i] = os_event_create();
 	}
 
 	buf_pool->watch = (buf_page_t*) mem_zalloc(
@@ -1334,7 +1358,7 @@ buf_pool_free_instance(
 Creates the buffer pool.
 @return	DB_SUCCESS if success, DB_ERROR if not enough memory or error */
 UNIV_INTERN
-ulint
+dberr_t
 buf_pool_init(
 /*==========*/
 	ulint	total_size,	/*!< in: size of the total pool in bytes */
@@ -1731,7 +1755,7 @@ buf_pool_watch_unset(
 	ut_a(bpage);
 
 	if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
-		mutex_t* mutex = buf_page_get_mutex(bpage);
+		ib_mutex_t* mutex = buf_page_get_mutex(bpage);
 
 		mutex_enter(mutex);
 		ut_a(bpage->buf_fix_count > 0);
@@ -1802,34 +1826,24 @@ buf_page_make_young(
 }
 
 /********************************************************************//**
-Sets the time of the first access of a page and moves a page to the
-start of the buffer pool LRU list if it is too old.  This high-level
-function can be used to prevent an important page from slipping
-out of the buffer pool. */
+Moves a page to the start of the buffer pool LRU list if it is too old.
+This high-level function can be used to prevent an important page from
+slipping out of the buffer pool. */
 static
 void
-buf_page_set_accessed_make_young(
-/*=============================*/
-	buf_page_t*	bpage,		/*!< in/out: buffer block of a
+buf_page_make_young_if_needed(
+/*==========================*/
+	buf_page_t*	bpage)		/*!< in/out: buffer block of a
 					file page */
-	unsigned	access_time)	/*!< in: bpage->access_time
-					read under mutex protection,
-					or 0 if unknown */
 {
+#ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-
 	ut_ad(!buf_pool_mutex_own(buf_pool));
+#endif /* UNIV_DEBUG */
 	ut_a(buf_page_in_file(bpage));
 
 	if (buf_page_peek_if_too_old(bpage)) {
-		buf_pool_mutex_enter(buf_pool);
-		buf_LRU_make_block_young(bpage);
-		buf_pool_mutex_exit(buf_pool);
-	} else if (!access_time) {
-		ulint	time_ms = ut_time_ms();
-		buf_pool_mutex_enter(buf_pool);
-		buf_page_set_accessed(bpage, time_ms);
-		buf_pool_mutex_exit(buf_pool);
+		buf_page_make_young(bpage);
 	}
 }
 
@@ -1880,7 +1894,7 @@ buf_page_set_file_page_was_freed(
 					   &hash_lock);
 
 	if (bpage) {
-		mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+		ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
 		ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
 		mutex_enter(block_mutex);
 		rw_lock_s_unlock(hash_lock);
@@ -1913,7 +1927,7 @@ buf_page_reset_file_page_was_freed(
 	bpage = buf_page_hash_get_s_locked(buf_pool, space, offset,
 					   &hash_lock);
 	if (bpage) {
-		mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+		ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
 		ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
 		mutex_enter(block_mutex);
 		rw_lock_s_unlock(hash_lock);
@@ -1974,11 +1988,10 @@ buf_page_get_zip(
 	ulint		offset)	/*!< in: page number */
 {
 	buf_page_t*	bpage;
-	mutex_t*	block_mutex;
+	ib_mutex_t*	block_mutex;
 	rw_lock_t*	hash_lock;
 	ibool		discard_attempted = FALSE;
 	ibool		must_read;
-	unsigned	access_time;
 	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
 
 	buf_pool->stat.n_page_gets++;
@@ -2051,15 +2064,17 @@ err_exit:
 
 got_block:
 	must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
-	access_time = buf_page_is_accessed(bpage);
 
 	rw_lock_s_unlock(hash_lock);
 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
 	ut_a(!bpage->file_page_was_freed);
 #endif
+
+	buf_page_set_accessed(bpage);
+
 	mutex_exit(block_mutex);
 
-	buf_page_set_accessed_make_young(bpage, access_time);
+	buf_page_make_young_if_needed(bpage);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ut_a(++buf_dbg_counter % 5771 || buf_validate());
@@ -2372,6 +2387,28 @@ buf_block_is_uncompressed(
 	return(buf_pointer_is_block_field_instance(buf_pool, (void*) block));
 }
 
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+/********************************************************************//**
+Return true if probe is enabled.
+@return true if probe enabled. */
+static
+bool
+buf_debug_execute_is_force_flush()
+/*==============================*/
+{
+	DBUG_EXECUTE_IF("ib_buf_force_flush", return(true); );
+
+	/* This is used during queisce testing, we want to ensure maximum
+	buffering by the change buffer. */
+
+	if (srv_ibuf_disable_background_merge) {
+		return(true);
+	}
+
+	return(false);
+}
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
 /********************************************************************//**
 This is the general function used to get access to a database page.
 @return	pointer to the block or NULL */
@@ -2398,7 +2435,7 @@ buf_page_get_gen(
 	ulint		fix_type;
 	ibool		must_read;
 	rw_lock_t*	hash_lock;
-	mutex_t*	block_mutex;
+	ib_mutex_t*	block_mutex;
 	buf_page_t*	hash_bpage;
 	ulint		retries = 0;
 	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
@@ -2666,27 +2703,38 @@ wait_until_unfixed:
 
 		block->page.buf_fix_count = 1;
 		buf_block_set_io_fix(block, BUF_IO_READ);
-		rw_lock_x_lock_func(&block->lock, 0, file, line);
+		rw_lock_x_lock_inline(&block->lock, 0, file, line);
 
 		UNIV_MEM_INVALID(bpage, sizeof *bpage);
 
 		rw_lock_x_unlock(hash_lock);
-		mutex_exit(&block->mutex);
-		mutex_exit(&buf_pool->zip_mutex);
-		buf_pool->n_pend_unzip++;
 
+		buf_pool->n_pend_unzip++;
 		buf_pool_mutex_exit(buf_pool);
 
+		access_time = buf_page_is_accessed(&block->page);
+		mutex_exit(&block->mutex);
+		mutex_exit(&buf_pool->zip_mutex);
+
 		buf_page_free_descriptor(bpage);
 
-		/* Decompress the page and apply buffered operations
-		while not holding buf_pool->mutex or block->mutex. */
+		/* Decompress the page while not holding
+		buf_pool->mutex or block->mutex. */
 
-		ut_a(buf_zip_decompress(block, TRUE));
+		/* Page checksum verification is already done when
+		the page is read from disk. Hence page checksum
+		verification is not necessary when decompressing the page. */
+		ut_a(buf_zip_decompress(block, FALSE));
 
 		if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
-			ibuf_merge_or_delete_for_page(block, space, offset,
-						      zip_size, TRUE);
+			if (access_time) {
+#ifdef UNIV_IBUF_COUNT_DEBUG
+				ut_a(ibuf_count_get(space, offset) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+			} else {
+				ibuf_merge_or_delete_for_page(
+					block, space, offset, zip_size, TRUE);
+			}
 		}
 
 		/* Unfix and unlatch the block. */
@@ -2723,8 +2771,9 @@ wait_until_unfixed:
 	UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
 #endif
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+
 	if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)
-	    && ibuf_debug) {
+	    && (ibuf_debug || buf_debug_execute_is_force_flush())) {
 		/* Try to evict the block from the buffer pool, to use the
 		insert buffer (change buffer) as much as possible. */
 
@@ -2759,19 +2808,18 @@ wait_until_unfixed:
 					buf_pool, space, offset, fold);
 			}
 
-			if (UNIV_LIKELY_NULL(block)) {
-				block_mutex = buf_page_get_mutex(
-					&block->page);
-				/* The page entered the buffer
-				pool for some reason. Try to
-				evict it again. */
-				mutex_enter(block_mutex);
-				rw_lock_x_unlock(hash_lock);
+			rw_lock_x_unlock(hash_lock);
 
-				goto got_block;
+			if (UNIV_LIKELY_NULL(block)) {
+				/* Either the page has been read in or
+				a watch was set on that in the window
+				where we released the buf_pool::mutex
+				and before we acquire the hash_lock
+				above. Try again. */
+				guess = block;
+				goto loop;
 			}
 
-			rw_lock_x_unlock(hash_lock);
 			fprintf(stderr,
 				"innodb_change_buffering_debug evict %u %u\n",
 				(unsigned) space, (unsigned) offset);
@@ -2799,14 +2847,15 @@ wait_until_unfixed:
 	ut_a(mode == BUF_GET_POSSIBLY_FREED
 	     || !block->page.file_page_was_freed);
 #endif
-	mutex_exit(&block->mutex);
-
 	/* Check if this is the first access to the page */
-
 	access_time = buf_page_is_accessed(&block->page);
 
-	if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {
-		buf_page_set_accessed_make_young(&block->page, access_time);
+	buf_page_set_accessed(&block->page);
+
+	mutex_exit(&block->mutex);
+
+	if (mode != BUF_PEEK_IF_IN_POOL) {
+		buf_page_make_young_if_needed(&block->page);
 	}
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -2842,14 +2891,14 @@ wait_until_unfixed:
 		break;
 
 	case RW_S_LATCH:
-		rw_lock_s_lock_func(&(block->lock), 0, file, line);
+		rw_lock_s_lock_inline(&(block->lock), 0, file, line);
 
 		fix_type = MTR_MEMO_PAGE_S_FIX;
 		break;
 
 	default:
 		ut_ad(rw_latch == RW_X_LATCH);
-		rw_lock_x_lock_func(&(block->lock), 0, file, line);
+		rw_lock_x_lock_inline(&(block->lock), 0, file, line);
 
 		fix_type = MTR_MEMO_PAGE_X_FIX;
 		break;
@@ -2857,7 +2906,7 @@ wait_until_unfixed:
 
 	mtr_memo_push(mtr, block, fix_type);
 
-	if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL) && !access_time) {
+	if (mode != BUF_PEEK_IF_IN_POOL && !access_time) {
 		/* In the case of a first access, try to apply linear
 		read-ahead */
 
@@ -2912,15 +2961,13 @@ buf_page_optimistic_get(
 
 	buf_block_buf_fix_inc(block, file, line);
 
-	mutex_exit(&block->mutex);
+	access_time = buf_page_is_accessed(&block->page);
 
-	/* Check if this is the first access to the page.
-	We do a dirty read on purpose, to avoid mutex contention.
-	This field is only used for heuristic purposes; it does not
-	affect correctness. */
+	buf_page_set_accessed(&block->page);
 
-	access_time = buf_page_is_accessed(&block->page);
-	buf_page_set_accessed_make_young(&block->page, access_time);
+	mutex_exit(&block->mutex);
+
+	buf_page_make_young_if_needed(&block->page);
 
 	ut_ad(!ibuf_inside(mtr)
 	      || ibuf_page(buf_block_get_space(block),
@@ -2932,8 +2979,8 @@ buf_page_optimistic_get(
 						file, line);
 		fix_type = MTR_MEMO_PAGE_S_FIX;
 	} else {
-		success = rw_lock_x_lock_func_nowait(&(block->lock),
-						     file, line);
+		success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
+							    file, line);
 		fix_type = MTR_MEMO_PAGE_X_FIX;
 	}
 
@@ -2975,7 +3022,7 @@ buf_page_optimistic_get(
 	mutex_exit(&block->mutex);
 #endif
 
-	if (UNIV_UNLIKELY(!access_time)) {
+	if (!access_time) {
 		/* In the case of a first access, try to apply linear
 		read-ahead */
 
@@ -3038,24 +3085,14 @@ buf_page_get_known_nowait(
 
 	buf_block_buf_fix_inc(block, file, line);
 
+	buf_page_set_accessed(&block->page);
+
 	mutex_exit(&block->mutex);
 
 	buf_pool = buf_pool_from_block(block);
 
-	if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
-		buf_pool_mutex_enter(buf_pool);
-		buf_LRU_make_block_young(&block->page);
-		buf_pool_mutex_exit(buf_pool);
-	} else if (!buf_page_is_accessed(&block->page)) {
-		/* Above, we do a dirty read on purpose, to avoid
-		mutex contention.  The field buf_page_t::access_time
-		is only used for heuristic purposes.  Writes to the
-		field must be protected by mutex, however. */
-		ulint	time_ms = ut_time_ms();
-
-		buf_pool_mutex_enter(buf_pool);
-		buf_page_set_accessed(&block->page, time_ms);
-		buf_pool_mutex_exit(buf_pool);
+	if (mode == BUF_MAKE_YOUNG) {
+		buf_page_make_young_if_needed(&block->page);
 	}
 
 	ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
@@ -3065,8 +3102,8 @@ buf_page_get_known_nowait(
 						file, line);
 		fix_type = MTR_MEMO_PAGE_S_FIX;
 	} else {
-		success = rw_lock_x_lock_func_nowait(&(block->lock),
-						     file, line);
+		success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
+							    file, line);
 		fix_type = MTR_MEMO_PAGE_X_FIX;
 	}
 
@@ -3167,8 +3204,8 @@ buf_page_try_get_func(
 		S-latch. */
 
 		fix_type = MTR_MEMO_PAGE_X_FIX;
-		success = rw_lock_x_lock_func_nowait(&block->lock,
-						     file, line);
+		success = rw_lock_x_lock_func_nowait_inline(&block->lock,
+							    file, line);
 	}
 
 	if (!success) {
@@ -3234,6 +3271,7 @@ buf_page_init(
 	ulint		offset,	/*!< in: offset of the page within space
 				in units of a page */
 	ulint		fold,	/*!< in: buf_page_address_fold(space,offset) */
+	ulint		zip_size,/*!< in: compressed page size, or 0 */
 	buf_block_t*	block)	/*!< in/out: block to init */
 {
 	buf_page_t*	hash_page;
@@ -3302,6 +3340,9 @@ buf_page_init(
 	ut_d(block->page.in_page_hash = TRUE);
 	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
 		    fold, &block->page);
+	if (zip_size) {
+		page_zip_set_size(&block->page.zip, zip_size);
+	}
 }
 
 /********************************************************************//**
@@ -3318,7 +3359,7 @@ UNIV_INTERN
 buf_page_t*
 buf_page_init_for_read(
 /*===================*/
-	ulint*		err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
+	dberr_t*	err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
 	ulint		mode,	/*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
 	ulint		space,	/*!< in: space id */
 	ulint		zip_size,/*!< in: compressed page size, or 0 */
@@ -3407,7 +3448,7 @@ err_exit:
 
 		ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
 
-		buf_page_init(buf_pool, space, offset, fold, block);
+		buf_page_init(buf_pool, space, offset, fold, zip_size, block);
 		rw_lock_x_unlock(hash_lock);
 
 		/* The block must be put to the LRU list, to the old blocks */
@@ -3426,8 +3467,6 @@ err_exit:
 		buf_page_set_io_fix(bpage, BUF_IO_READ);
 
 		if (zip_size) {
-			page_zip_set_size(&block->page.zip, zip_size);
-
 			/* buf_pool->mutex may be released and
 			reacquired by buf_buddy_alloc().  Thus, we
 			must release block->mutex in order not to
@@ -3528,7 +3567,8 @@ err_exit:
 
 		rw_lock_x_unlock(hash_lock);
 
-		/* The block must be put to the LRU list, to the old blocks */
+		/* The block must be put to the LRU list, to the old blocks.
+		The zip_size is already set into the page zip */
 		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		buf_LRU_insert_zip_clean(bpage);
@@ -3578,7 +3618,6 @@ buf_page_create(
 	buf_block_t*	block;
 	ulint		fold;
 	buf_block_t*	free_block	= NULL;
-	ulint		time_ms		= ut_time_ms();
 	buf_pool_t*	buf_pool	= buf_pool_get(space, offset);
 	rw_lock_t*	hash_lock;
 
@@ -3630,7 +3669,7 @@ buf_page_create(
 
 	mutex_enter(&block->mutex);
 
-	buf_page_init(buf_pool, space, offset, fold, block);
+	buf_page_init(buf_pool, space, offset, fold, zip_size, block);
 
 	rw_lock_x_unlock(hash_lock);
 
@@ -3651,7 +3690,6 @@ buf_page_create(
 		buf_page_set_io_fix(&block->page, BUF_IO_READ);
 		rw_lock_x_lock(&block->lock);
 
-		page_zip_set_size(&block->page.zip, zip_size);
 		mutex_exit(&block->mutex);
 		/* buf_pool->mutex may be released and reacquired by
 		buf_buddy_alloc().  Thus, we must release block->mutex
@@ -3675,12 +3713,12 @@ buf_page_create(
 		rw_lock_x_unlock(&block->lock);
 	}
 
-	buf_page_set_accessed(&block->page, time_ms);
-
 	buf_pool_mutex_exit(buf_pool);
 
 	mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
 
+	buf_page_set_accessed(&block->page);
+
 	mutex_exit(&block->mutex);
 
 	/* Delete possible entries for the page from the insert buffer:
@@ -3849,6 +3887,8 @@ buf_mark_space_corrupt(
 			BUF_IO_READ);
 	}
 
+	mutex_exit(buf_page_get_mutex(bpage));
+
 	/* Find the table with specified space id, and mark it corrupted */
 	if (dict_set_corrupted_by_space(space)) {
 		buf_LRU_free_one_page(bpage);
@@ -3859,7 +3899,6 @@ buf_mark_space_corrupt(
 	ut_ad(buf_pool->n_pend_reads > 0);
 	buf_pool->n_pend_reads--;
 
-	mutex_exit(buf_page_get_mutex(bpage));
 	buf_pool_mutex_exit(buf_pool);
 
 	return(ret);
@@ -3868,9 +3907,9 @@ buf_mark_space_corrupt(
 /********************************************************************//**
 Completes an asynchronous read or write request of a file page to or from
 the buffer pool.
-@return TRUE if successful */
+@return true if successful */
 UNIV_INTERN
-ibool
+bool
 buf_page_io_complete(
 /*=================*/
 	buf_page_t*	bpage)	/*!< in: pointer to the block in question */
@@ -3952,8 +3991,20 @@ buf_page_io_complete(
 		/* From version 3.23.38 up we store the page checksum
 		to the 4 first bytes of the page end lsn field */
 
-		if (buf_page_is_corrupted(frame,
+		if (buf_page_is_corrupted(true, frame,
 					  buf_page_get_zip_size(bpage))) {
+
+			/* Not a real corruption if it was triggered by
+			error injection */
+			DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
+				if (bpage->space > TRX_SYS_SPACE
+				    && buf_mark_space_corrupt(bpage)) {
+					ib_logf(IB_LOG_LEVEL_INFO,
+						"Simulated page corruption");
+					return(true);
+				}
+				goto page_not_corrupt;
+				;);
 corrupt:
 			fprintf(stderr,
 				"InnoDB: Database page corruption on disk"
@@ -3997,7 +4048,7 @@ corrupt:
 				table as corrupted instead of crashing server */
 				if (bpage->space > TRX_SYS_SPACE
 				    && buf_mark_space_corrupt(bpage)) {
-					return(FALSE);
+					return(false);
 				} else {
 					fputs("InnoDB: Ending processing"
 					      " because of"
@@ -4008,6 +4059,9 @@ corrupt:
 			}
 		}
 
+		DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
+				page_not_corrupt:  bpage = bpage; );
+
 		if (recv_recovery_is_on()) {
 			/* Pages must be uncompressed for crash recovery. */
 			ut_a(uncompressed);
@@ -4090,7 +4144,7 @@ corrupt:
 	mutex_exit(buf_page_get_mutex(bpage));
 	buf_pool_mutex_exit(buf_pool);
 
-	return(TRUE);
+	return(true);
 }
 
 /*********************************************************************//**
@@ -5118,9 +5172,7 @@ void
 buf_refresh_io_stats_all(void)
 /*==========================*/
 {
-	ulint		i;
-
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t*	buf_pool;
 
 		buf_pool = buf_pool_from_array(i);
@@ -5137,9 +5189,7 @@ ibool
 buf_all_freed(void)
 /*===============*/
 {
-	ulint	i;
-
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t*	buf_pool;
 
 		buf_pool = buf_pool_from_array(i);
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index ad6ef7c4cef..fb853fe1543 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -25,16 +25,16 @@ Created 2011/12/19
 
 #include "buf0dblwr.h"
 
+#ifdef UNIV_NONINL
+#include "buf0buf.ic"
+#endif
+
 #include "buf0buf.h"
-#include "buf0lru.h"
-#include "buf0flu.h"
 #include "buf0checksum.h"
 #include "srv0start.h"
 #include "srv0srv.h"
 #include "page0zip.h"
 #include "trx0sys.h"
-#include "page0page.h"
-#include "mtr0log.h"
 
 #ifndef UNIV_HOTBACKUP
 
@@ -195,22 +195,20 @@ start_again:
 		return;
 	}
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		" InnoDB: Doublewrite buffer not found:"
-		" creating new\n");
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Doublewrite buffer not found: creating new");
 
 	if (buf_pool_get_curr_size()
 	    < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
 		+ FSP_EXTENT_SIZE / 2 + 100)
 	       * UNIV_PAGE_SIZE)) {
-		fprintf(stderr,
-			"InnoDB: Cannot create doublewrite buffer:"
-			" you must\n"
-			"InnoDB: increase your buffer pool size.\n"
-			"InnoDB: Cannot continue operation.\n");
 
-		exit(1);
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Cannot create doublewrite buffer: you must "
+			"increase your buffer pool size. Cannot continue "
+			"operation.");
+
+		exit(EXIT_FAILURE);
 	}
 
 	block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
@@ -223,16 +221,15 @@ start_again:
 	buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
 
 	if (block2 == NULL) {
-		fprintf(stderr,
-			"InnoDB: Cannot create doublewrite buffer:"
-			" you must\n"
-			"InnoDB: increase your tablespace size.\n"
-			"InnoDB: Cannot continue operation.\n");
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Cannot create doublewrite buffer: you must "
+			"increase your tablespace size. "
+			"Cannot continue operation.");
 
 		/* We exit without committing the mtr to prevent
 		its modifications to the database getting to disk */
 
-		exit(1);
+		exit(EXIT_FAILURE);
 	}
 
 	fseg_header = doublewrite + TRX_SYS_DOUBLEWRITE_FSEG;
@@ -243,15 +240,12 @@ start_again:
 		new_block = fseg_alloc_free_page(
 			fseg_header, prev_page_no + 1, FSP_UP, &mtr);
 		if (new_block == NULL) {
-			fprintf(stderr,
-				"InnoDB: Cannot create doublewrite"
-				" buffer: you must\n"
-				"InnoDB: increase your"
-				" tablespace size.\n"
-				"InnoDB: Cannot continue operation.\n"
-				);
-
-			exit(1);
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Cannot create doublewrite buffer: you must "
+				"increase your tablespace size. "
+				"Cannot continue operation.");
+
+			exit(EXIT_FAILURE);
 		}
 
 		/* We read the allocated pages to the buffer pool;
@@ -331,8 +325,7 @@ start_again:
 	/* Remove doublewrite pages from LRU */
 	buf_pool_invalidate();
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr, " InnoDB: Doublewrite buffer created\n");
+	ib_logf(IB_LOG_LEVEL_INFO, "Doublewrite buffer created");
 
 	goto start_again;
 }
@@ -391,7 +384,7 @@ buf_dblwr_init_or_restore_pages(
 	}
 
 	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
-	!= TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
+	    != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
 
 		/* We are upgrading from a version < 4.1.x to a version where
 		multiple tablespaces are supported. We must reset the space id
@@ -401,9 +394,8 @@ buf_dblwr_init_or_restore_pages(
 
 		reset_space_ids = TRUE;
 
-		fprintf(stderr,
-			"InnoDB: Resetting space id's in the"
-			" doublewrite buffer\n");
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"Resetting space id's in the doublewrite buffer");
 	}
 
 	/* Read the pages from the doublewrite buffer to memory */
@@ -459,12 +451,11 @@ buf_dblwr_init_or_restore_pages(
 
 		} else if (!fil_check_adress_in_tablespace(space_id,
 							   page_no)) {
-			fprintf(stderr,
-				"InnoDB: Warning: a page in the"
-				" doublewrite buffer is not within space\n"
-				"InnoDB: bounds; space id %lu"
-				" page number %lu, page %lu in"
-				" doublewrite buf.\n",
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"A page in the doublewrite buffer is not "
+				"within space bounds; space id %lu "
+				"page number %lu, page %lu in "
+				"doublewrite buf.",
 				(ulong) space_id, (ulong) page_no, (ulong) i);
 
 		} else if (space_id == TRX_SYS_SPACE
@@ -489,8 +480,7 @@ buf_dblwr_init_or_restore_pages(
 
 			/* Check if the page is corrupt */
 
-			if (UNIV_UNLIKELY
-			    (buf_page_is_corrupted(read_buf, zip_size))) {
+			if (buf_page_is_corrupted(true, read_buf, zip_size)) {
 
 				fprintf(stderr,
 					"InnoDB: Warning: database page"
@@ -501,7 +491,8 @@ buf_dblwr_init_or_restore_pages(
 					" the doublewrite buffer.\n",
 					(ulong) space_id, (ulong) page_no);
 
-				if (buf_page_is_corrupted(page, zip_size)) {
+				if (buf_page_is_corrupted(true,
+							  page, zip_size)) {
 					fprintf(stderr,
 						"InnoDB: Dump of the page:\n");
 					buf_page_print(
@@ -538,9 +529,10 @@ buf_dblwr_init_or_restore_pages(
 				       zip_size, page_no, 0,
 				       zip_size ? zip_size : UNIV_PAGE_SIZE,
 				       page, NULL);
-				fprintf(stderr,
-					"InnoDB: Recovered the page from"
-					" the doublewrite buffer.\n");
+
+				ib_logf(IB_LOG_LEVEL_INFO,
+					"Recovered the page from"
+					" the doublewrite buffer.");
 			}
 		}
 
@@ -595,6 +587,7 @@ buf_dblwr_update(void)
 
 	ut_ad(buf_dblwr->batch_running);
 	ut_ad(buf_dblwr->b_reserved > 0);
+	ut_ad(buf_dblwr->b_reserved <= buf_dblwr->first_free);
 
 	buf_dblwr->b_reserved--;
 	if (buf_dblwr->b_reserved == 0) {
@@ -705,23 +698,29 @@ static
 void
 buf_dblwr_write_block_to_datafile(
 /*==============================*/
-	const buf_block_t*	block)	/*!< in: block to write */
+	const buf_page_t*	bpage)	/*!< in: page to write */
 {
-	ut_a(block);
-	ut_a(buf_page_in_file(&block->page));
+	ut_a(bpage);
+	ut_a(buf_page_in_file(bpage));
 
-	if (block->page.zip.data) {
+	/* Increment the counter of I/O operations used
+	for selecting LRU policy. */
+	buf_LRU_stat_inc_io();
+
+	if (bpage->zip.data) {
 		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
-		       FALSE, buf_page_get_space(&block->page),
-		       buf_page_get_zip_size(&block->page),
-		       buf_page_get_page_no(&block->page), 0,
-		       buf_page_get_zip_size(&block->page),
-		       (void*) block->page.zip.data,
-		       (void*) block);
-
-		goto exit;
+		       FALSE, buf_page_get_space(bpage),
+		       buf_page_get_zip_size(bpage),
+		       buf_page_get_page_no(bpage), 0,
+		       buf_page_get_zip_size(bpage),
+		       (void*) bpage->zip.data,
+		       (void*) bpage);
+
+		return;
 	}
 
+
+	const buf_block_t* block = (buf_block_t*) bpage;
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 	buf_dblwr_check_page_lsn(block->frame);
 
@@ -729,11 +728,6 @@ buf_dblwr_write_block_to_datafile(
 	       FALSE, buf_block_get_space(block), 0,
 	       buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
 	       (void*) block->frame, (void*) block);
-
-exit:
-	/* Increment the counter of I/O operations used
-	for selecting LRU policy. */
-	buf_LRU_stat_inc_io();
 }
 
 /********************************************************************//**
@@ -748,9 +742,8 @@ buf_dblwr_flush_buffered_writes(void)
 /*=================================*/
 {
 	byte*		write_buf;
+	ulint		first_free;
 	ulint		len;
-	ulint		len2;
-	ulint		i;
 
 	if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
 		/* Sync the writes to the disk. */
@@ -782,10 +775,12 @@ try_again:
 	}
 
 	ut_a(!buf_dblwr->batch_running);
+	ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved);
 
 	/* Disallow anyone else to post to doublewrite buffer or to
 	start another batch of flushing. */
 	buf_dblwr->batch_running = TRUE;
+	first_free = buf_dblwr->first_free;
 
 	/* Now safe to release the mutex. Note that though no other
 	thread is allowed to post to the doublewrite batch flushing
@@ -795,7 +790,7 @@ try_again:
 
 	write_buf = buf_dblwr->write_buf;
 
-	for (len2 = 0, i = 0;
+	for (ulint len2 = 0, i = 0;
 	     i < buf_dblwr->first_free;
 	     len2 += UNIV_PAGE_SIZE, i++) {
 
@@ -845,8 +840,8 @@ try_again:
 
 flush:
 	/* increment the doublewrite flushed pages counter */
-	srv_dblwr_pages_written += buf_dblwr->first_free;
-	srv_dblwr_writes++;
+	srv_stats.dblwr_pages_written.add(buf_dblwr->first_free);
+	srv_stats.dblwr_writes.inc();
 
 	/* Now flush the doublewrite buffer data to disk */
 	fil_flush(TRX_SYS_SPACE);
@@ -855,11 +850,21 @@ flush:
 	and in recovery we will find them in the doublewrite buffer
 	blocks. Next do the writes to the intended positions. */
 
-	for (i = 0; i < buf_dblwr->first_free; i++) {
-		const buf_block_t* block = (buf_block_t*)
-			buf_dblwr->buf_block_arr[i];
-
-		buf_dblwr_write_block_to_datafile(block);
+	/* Up to this point first_free and buf_dblwr->first_free are
+	same because we have set the buf_dblwr->batch_running flag
+	disallowing any other thread to post any request but we
+	can't safely access buf_dblwr->first_free in the loop below.
+	This is so because it is possible that after we are done with
+	the last iteration and before we terminate the loop, the batch
+	gets finished in the IO helper thread and another thread posts
+	a new batch setting buf_dblwr->first_free to a higher value.
+	If this happens and we are using buf_dblwr->first_free in the
+	loop termination condition then we'll end up dispatching
+	the same block twice from two different threads. */
+	ut_ad(first_free == buf_dblwr->first_free);
+	for (ulint i = 0; i < first_free; i++) {
+		buf_dblwr_write_block_to_datafile(
+			buf_dblwr->buf_block_arr[i]);
 	}
 
 	/* Wake possible simulated aio thread to actually post the
@@ -935,6 +940,8 @@ try_again:
 	buf_dblwr->first_free++;
 	buf_dblwr->b_reserved++;
 
+	ut_ad(!buf_dblwr->batch_running);
+	ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved);
 	ut_ad(buf_dblwr->b_reserved <= srv_doublewrite_batch_size);
 
 	if (buf_dblwr->first_free == srv_doublewrite_batch_size) {
@@ -1065,7 +1072,7 @@ retry:
 	/* We know that the write has been flushed to disk now
 	and during recovery we will find it in the doublewrite buffer
 	blocks. Next do the write to the intended position. */
-	buf_dblwr_write_block_to_datafile((buf_block_t*) bpage);
+	buf_dblwr_write_block_to_datafile(bpage);
 
 	/* Sync the writes to the disk. */
 	buf_flush_sync_datafiles();
@@ -1077,8 +1084,8 @@ retry:
 	buf_dblwr->in_use[i] = FALSE;
 
 	/* increment the doublewrite flushed pages counter */
-	srv_dblwr_pages_written += buf_dblwr->first_free;
-	srv_dblwr_writes++;
+	srv_stats.dblwr_pages_written.inc();
+	srv_stats.dblwr_writes.inc();
 
 	mutex_exit(&(buf_dblwr->mutex));
 
diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc
index 27757241c3e..467f817a2d1 100644
--- a/storage/innobase/buf/buf0dump.cc
+++ b/storage/innobase/buf/buf0dump.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,14 +23,14 @@ Implements a buffer pool dump/load.
 Created April 08, 2011 Vasil Dimov
 *******************************************************/
 
+#include "univ.i"
+
 #include <stdarg.h> /* va_* */
 #include <string.h> /* strerror() */
 
-#include "univ.i"
-
 #include "buf0buf.h" /* buf_pool_mutex_enter(), srv_buf_pool_instances */
 #include "buf0dump.h"
-#include "db0err.h" /* enum db_err */
+#include "db0err.h"
 #include "dict0dict.h" /* dict_operation_lock */
 #include "os0file.h" /* OS_FILE_MAX_PATH */
 #include "os0sync.h" /* os_event* */
@@ -40,7 +40,6 @@ Created April 08, 2011 Vasil Dimov
 #include "sync0rw.h" /* rw_lock_s_lock() */
 #include "ut0byte.h" /* ut_ull_create() */
 #include "ut0sort.h" /* UT_SORT_FUNCTION_BODY */
-#include "buf0rea.h" /* buf_read_page_async() */
 
 enum status_severity {
 	STATUS_INFO,
@@ -579,6 +578,8 @@ DECLARE_THREAD(buf_dump_thread)(
 	void*	arg __attribute__((unused)))	/*!< in: a dummy parameter
 						required by os_thread_create */
 {
+	ut_ad(!srv_read_only_mode);
+
 	srv_buf_dump_thread_active = TRUE;
 
 	buf_dump_status(STATUS_INFO, "not started");
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 023ed766c62..542c1669667 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -25,6 +25,10 @@ Created 11/11/1995 Heikki Tuuri
 
 #include "buf0flu.h"
 
+#ifdef UNIV_NONINL
+#include "buf0flu.ic"
+#endif
+
 #include "buf0buf.h"
 #include "buf0checksum.h"
 #include "srv0start.h"
@@ -44,39 +48,6 @@ Created 11/11/1995 Heikki Tuuri
 #include "srv0mon.h"
 #include "mysql/plugin.h"
 #include "mysql/service_thd_wait.h"
-#include "buf0dblwr.h"
-
-#ifdef UNIV_NONINL
-#include "buf0flu.ic"
-#endif
-
-/**********************************************************************
-These statistics are generated for heuristics used in estimating the
-rate at which we should flush the dirty blocks to avoid bursty IO
-activity. Note that the rate of flushing not only depends on how many
-dirty pages we have in the buffer pool but it is also a fucntion of
-how much redo the workload is generating and at what rate. */
-/* @{ */
-
-/** Number of intervals for which we keep the history of these stats.
-Each interval is 1 second, defined by the rate at which
-srv_error_monitor_thread() calls buf_flush_stat_update(). */
-#define BUF_FLUSH_STAT_N_INTERVAL 20
-
-/** Sampled values buf_flush_stat_cur.
-Not protected by any mutex.  Updated by buf_flush_stat_update(). */
-static buf_flush_stat_t	buf_flush_stat_arr[BUF_FLUSH_STAT_N_INTERVAL];
-
-/** Cursor to buf_flush_stat_arr[]. Updated in a round-robin fashion. */
-static ulint		buf_flush_stat_arr_ind;
-
-/** Values at start of the current interval. Reset by
-buf_flush_stat_update(). */
-static buf_flush_stat_t	buf_flush_stat_cur;
-
-/** Running sum of past values of buf_flush_stat_cur.
-Updated by buf_flush_stat_update(). Not protected by any mutex. */
-static buf_flush_stat_t	buf_flush_stat_sum;
 
 /** Number of pages flushed through non flush_list flushes. */
 static ulint buf_lru_flush_page_count = 0;
@@ -104,6 +75,22 @@ in thrashing. */
 
 /* @} */
 
+/******************************************************************//**
+Increases flush_list size in bytes with zip_size for compressed page,
+UNIV_PAGE_SIZE for uncompressed page in inline function */
+static inline
+void
+incr_flush_list_size_in_bytes(
+/*==========================*/
+	buf_block_t*	block,		/*!< in: control block */
+	buf_pool_t*	buf_pool)	/*!< in: buffer pool instance */
+{
+	ut_ad(buf_flush_list_mutex_own(buf_pool));
+	ulint zip_size = page_zip_get_size(&block->page.zip);
+	buf_pool->stat.flush_list_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
+	ut_ad(buf_pool->stat.flush_list_bytes <= buf_pool->curr_pool_size);
+}
+
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /******************************************************************//**
 Validates the flush list.
@@ -333,6 +320,7 @@ buf_flush_insert_into_flush_list(
 	ut_d(block->page.in_flush_list = TRUE);
 	block->page.oldest_modification = lsn;
 	UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
+	incr_flush_list_size_in_bytes(block, buf_pool);
 
 #ifdef UNIV_DEBUG_VALGRIND
 	{
@@ -437,7 +425,7 @@ buf_flush_insert_sorted_into_flush_list(
 				     prev_b, &block->page);
 	}
 
-	MONITOR_INC(MONITOR_PAGE_INFLUSH);
+	incr_flush_list_size_in_bytes(block, buf_pool);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ut_a(buf_flush_validate_low(buf_pool));
@@ -538,6 +526,7 @@ buf_flush_remove(
 	buf_page_t*	bpage)	/*!< in: pointer to the block in question */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+	ulint		zip_size;
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
@@ -576,14 +565,15 @@ buf_flush_remove(
 	because we assert on in_flush_list in comparison function. */
 	ut_d(bpage->in_flush_list = FALSE);
 
+	zip_size = page_zip_get_size(&bpage->zip);
+	buf_pool->stat.flush_list_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
+
 	bpage->oldest_modification = 0;
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ut_a(buf_flush_validate_skip(buf_pool));
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
-	MONITOR_DEC(MONITOR_PAGE_INFLUSH);
-
 	buf_flush_list_mutex_exit(buf_pool);
 }
 
@@ -606,7 +596,7 @@ buf_flush_relocate_on_flush_list(
 	buf_page_t*	dpage)	/*!< in/out: destination block */
 {
 	buf_page_t*	prev;
-	buf_page_t* 	prev_b = NULL;
+	buf_page_t*	prev_b = NULL;
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
@@ -710,6 +700,27 @@ buf_flush_write_complete(
 #endif /* !UNIV_HOTBACKUP */
 
 /********************************************************************//**
+Calculate the checksum of a page from compressed table and update the page. */
+UNIV_INTERN
+void
+buf_flush_update_zip_checksum(
+/*==========================*/
+	buf_frame_t*	page,		/*!< in/out: Page to update */
+	ulint		zip_size,	/*!< in: Compressed page size */
+	lsn_t		lsn)		/*!< in: Lsn to stamp on the page */
+{
+	ut_a(zip_size > 0);
+
+	ib_uint32_t	checksum = page_zip_calc_checksum(
+		page, zip_size,
+		static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm));
+
+	mach_write_to_8(page + FIL_PAGE_LSN, lsn);
+	memset(page + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
+	mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
+}
+
+/********************************************************************//**
 Initializes a page for writing to the tablespace. */
 UNIV_INTERN
 void
@@ -747,17 +758,10 @@ buf_flush_init_for_writing(
 		case FIL_PAGE_TYPE_ZBLOB:
 		case FIL_PAGE_TYPE_ZBLOB2:
 		case FIL_PAGE_INDEX:
-			checksum = page_zip_calc_checksum(
-				page_zip->data, zip_size,
-				static_cast<srv_checksum_algorithm_t>(
-					srv_checksum_algorithm));
-
-			mach_write_to_8(page_zip->data
-					+ FIL_PAGE_LSN, newest_lsn);
-			memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
-			mach_write_to_4(page_zip->data
-					+ FIL_PAGE_SPACE_OR_CHKSUM,
-					checksum);
+
+			buf_flush_update_zip_checksum(
+				page_zip->data, zip_size, newest_lsn);
+
 			return;
 		}
 
@@ -865,7 +869,7 @@ buf_flush_write_block_low(
 #endif
 
 #ifdef UNIV_LOG_DEBUG
-	static ibool univ_log_debug_warned;
+	static ibool	univ_log_debug_warned;
 #endif /* UNIV_LOG_DEBUG */
 
 	ut_ad(buf_page_in_file(bpage));
@@ -949,15 +953,15 @@ os_aio_simulated_wake_handler_threads after we have posted a batch of
 writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
 held upon entering this function, and they will be released by this
 function. */
-static
+UNIV_INTERN
 void
 buf_flush_page(
 /*===========*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
 	buf_page_t*	bpage,		/*!< in: buffer control block */
-	enum buf_flush	flush_type)	/*!< in: type of flush */
+	buf_flush	flush_type)	/*!< in: type of flush */
 {
-	mutex_t*	block_mutex;
+	ib_mutex_t*	block_mutex;
 	ibool		is_uncompressed;
 
 	ut_ad(flush_type < BUF_FLUSH_N_TYPES);
@@ -1091,6 +1095,56 @@ buf_flush_page_try(
 }
 # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 /***********************************************************//**
+Check the page is in buffer pool and can be flushed.
+@return	true if the page can be flushed. */
+static
+bool
+buf_flush_check_neighbor(
+/*=====================*/
+	ulint		space,		/*!< in: space id */
+	ulint		offset,		/*!< in: page offset */
+	enum buf_flush	flush_type)	/*!< in: BUF_FLUSH_LRU or
+					BUF_FLUSH_LIST */
+{
+	buf_page_t*	bpage;
+	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	bool		ret;
+
+	ut_ad(flush_type == BUF_FLUSH_LRU
+	      || flush_type == BUF_FLUSH_LIST);
+
+	buf_pool_mutex_enter(buf_pool);
+
+	/* We only want to flush pages from this buffer pool. */
+	bpage = buf_page_hash_get(buf_pool, space, offset);
+
+	if (!bpage) {
+
+		buf_pool_mutex_exit(buf_pool);
+		return(false);
+	}
+
+	ut_a(buf_page_in_file(bpage));
+
+	/* We avoid flushing 'non-old' blocks in an LRU flush,
+	because the flushed blocks are soon freed */
+
+	ret = false;
+	if (flush_type != BUF_FLUSH_LRU || buf_page_is_old(bpage)) {
+		ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+
+		mutex_enter(block_mutex);
+		if (buf_flush_ready_for_flush(bpage, flush_type)) {
+			ret = true;
+		}
+		mutex_exit(block_mutex);
+	}
+	buf_pool_mutex_exit(buf_pool);
+
+	return(ret);
+}
+
+/***********************************************************//**
 Flushes to disk all flushable pages within the flush area.
 @return	number of pages flushed */
 static
@@ -1115,7 +1169,7 @@ buf_flush_try_neighbors(
 	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
 
 	if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN
-	    || !srv_flush_neighbors) {
+	    || srv_flush_neighbors == 0) {
 		/* If there is little space or neighbor flushing is
 		not enabled then just flush the victim. */
 		low = offset;
@@ -1133,6 +1187,30 @@ buf_flush_try_neighbors(
 
 		low = (offset / buf_flush_area) * buf_flush_area;
 		high = (offset / buf_flush_area + 1) * buf_flush_area;
+
+		if (srv_flush_neighbors == 1) {
+			/* adjust 'low' and 'high' to limit
+			   for contiguous dirty area */
+			if (offset > low) {
+				for (i = offset - 1;
+				     i >= low
+				     && buf_flush_check_neighbor(
+						space, i, flush_type);
+				     i--) {
+					/* do nothing */
+				}
+				low = i + 1;
+			}
+
+			for (i = offset + 1;
+			     i < high
+			     && buf_flush_check_neighbor(
+						space, i, flush_type);
+			     i++) {
+				/* do nothing */
+			}
+			high = i;
+		}
 	}
 
 	/* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
@@ -1181,7 +1259,7 @@ buf_flush_try_neighbors(
 		if (flush_type != BUF_FLUSH_LRU
 		    || i == offset
 		    || buf_page_is_old(bpage)) {
-			mutex_t* block_mutex = buf_page_get_mutex(bpage);
+			ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
 
 			mutex_enter(block_mutex);
 
@@ -1240,7 +1318,7 @@ buf_flush_page_and_try_neighbors(
 	ulint*		count)		/*!< in/out: number of pages
 					flushed */
 {
-	mutex_t*	block_mutex;
+	ib_mutex_t*	block_mutex;
 	ibool		flushed = FALSE;
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
@@ -1374,7 +1452,7 @@ buf_flush_LRU_list_batch(
 	       && free_len < srv_LRU_scan_depth
 	       && lru_len > BUF_LRU_MIN_LEN) {
 
-		mutex_t* block_mutex = buf_page_get_mutex(bpage);
+		ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
 		ibool	 evict;
 
 		mutex_enter(block_mutex);
@@ -1576,8 +1654,7 @@ NOTE 1: in the case of an LRU flush the calling thread may own latches to
 pages: to avoid deadlocks, this function must be written so that it cannot
 end up waiting for these latches! NOTE 2: in the case of a flush list flush,
 the calling thread is not allowed to own any latches on pages!
-@return number of blocks for which the write request was queued;
-ULINT_UNDEFINED if there was a flush of the same type already running */
+@return number of blocks for which the write request was queued */
 static
 ulint
 buf_flush_batch(
@@ -1621,8 +1698,6 @@ buf_flush_batch(
 
 	buf_pool_mutex_exit(buf_pool);
 
-	buf_dblwr_flush_buffered_writes();
-
 #ifdef UNIV_DEBUG
 	if (buf_debug_prints && count > 0) {
 		fprintf(stderr, flush_type == BUF_FLUSH_LRU
@@ -1632,8 +1707,6 @@ buf_flush_batch(
 	}
 #endif /* UNIV_DEBUG */
 
-	srv_buf_pool_flushed += count;
-
 	return(count);
 }
 
@@ -1659,14 +1732,7 @@ buf_flush_common(
 	}
 #endif /* UNIV_DEBUG */
 
-	srv_buf_pool_flushed += page_count;
-
-	if (flush_type == BUF_FLUSH_LRU) {
-		/* We keep track of all flushes happening as part of LRU
-		flush. When estimating the desired rate at which flush_list
-		should be flushed we factor in this value. */
-		buf_lru_flush_page_count += page_count;
-	}
+	srv_stats.buf_pool_flushed.add(page_count);
 }
 
 /******************************************************************//**
@@ -1750,7 +1816,7 @@ buf_flush_wait_batch_end(
 		}
 	} else {
 		thd_wait_begin(NULL, THD_WAIT_DISKIO);
-		os_event_wait(buf_pool->no_flush[type]);
+	os_event_wait(buf_pool->no_flush[type]);
 		thd_wait_end(NULL);
 	}
 }
@@ -1760,21 +1826,28 @@ This utility flushes dirty blocks from the end of the LRU list and also
 puts replaceable clean pages from the end of the LRU list to the free
 list.
 NOTE: The calling thread is not allowed to own any latches on pages!
-@return number of blocks for which the write request was queued;
-ULINT_UNDEFINED if there was a flush of the same type already running */
+@return true if a batch was queued successfully. false if another batch
+of same type was already running. */
 static
-ulint
+bool
 buf_flush_LRU(
 /*==========*/
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
-	ulint		min_n)		/*!< in: wished minimum mumber of blocks
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
 					flushed (it is not guaranteed that the
 					actual number is that big, though) */
+	ulint*		n_processed)	/*!< out: the number of pages
+					which were processed is passed
+					back to caller. Ignored if NULL */
 {
 	ulint		page_count;
 
+	if (n_processed) {
+		*n_processed = 0;
+	}
+
 	if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
-		return(ULINT_UNDEFINED);
+		return(false);
 	}
 
 	page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
@@ -1783,31 +1856,43 @@ buf_flush_LRU(
 
 	buf_flush_common(BUF_FLUSH_LRU, page_count);
 
-	return(page_count);
+	if (n_processed) {
+		*n_processed = page_count;
+	}
+
+	return(true);
 }
 
 /*******************************************************************//**
 This utility flushes dirty blocks from the end of the flush list of
 all buffer pool instances.
 NOTE: The calling thread is not allowed to own any latches on pages!
-@return number of blocks for which the write request was queued;
-ULINT_UNDEFINED if there was a flush of the same type already running */
+@return true if a batch was queued successfully for each buffer pool
+instance. false if another batch of same type was already running in
+at least one of the buffer pool instance */
 UNIV_INTERN
-ulint
+bool
 buf_flush_list(
 /*===========*/
 	ulint		min_n,		/*!< in: wished minimum mumber of blocks
 					flushed (it is not guaranteed that the
 					actual number is that big, though) */
-	lsn_t		lsn_limit)	/*!< in the case BUF_FLUSH_LIST all
+	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
 					blocks whose oldest_modification is
 					smaller than this should be flushed
 					(if their number does not exceed
 					min_n), otherwise ignored */
+	ulint*		n_processed)	/*!< out: the number of pages
+					which were processed is passed
+					back to caller. Ignored if NULL */
+
 {
 	ulint		i;
-	ulint		total_page_count = 0;
-	ibool		skipped = FALSE;
+	bool		success = true;
+
+	if (n_processed) {
+		*n_processed = 0;
+	}
 
 	if (min_n != ULINT_MAX) {
 		/* Ensure that flushing is spread evenly amongst the
@@ -1836,7 +1921,7 @@ buf_flush_list(
 			pools based on the assumption that it will
 			help in the retry which will follow the
 			failure. */
-			skipped = TRUE;
+			success = false;
 
 			continue;
 		}
@@ -1848,7 +1933,9 @@ buf_flush_list(
 
 		buf_flush_common(BUF_FLUSH_LIST, page_count);
 
-		total_page_count += page_count;
+		if (n_processed) {
+			*n_processed += page_count;
+		}
 
 		if (page_count) {
 			MONITOR_INC_VALUE_CUMULATIVE(
@@ -1859,8 +1946,7 @@ buf_flush_list(
 		}
 	}
 
-	return(lsn_limit != LSN_MAX && skipped
-	       ? ULINT_UNDEFINED : total_page_count);
+	return(success);
 }
 
 /******************************************************************//**
@@ -1879,7 +1965,7 @@ buf_flush_single_page_from_LRU(
 {
 	ulint		scanned;
 	buf_page_t*	bpage;
-	mutex_t*	block_mutex;
+	ib_mutex_t*	block_mutex;
 	ibool		freed;
 	ibool		evict_zip;
 
@@ -1957,128 +2043,6 @@ buf_flush_single_page_from_LRU(
 	return(freed);
 }
 
-/*********************************************************************
-Update the historical stats that we are collecting for flush rate
-heuristics at the end of each interval.
-Flush rate heuristic depends on (a) rate of redo log generation and
-(b) the rate at which LRU flush is happening. */
-UNIV_INTERN
-void
-buf_flush_stat_update(void)
-/*=======================*/
-{
-	buf_flush_stat_t*	item;
-	lsn_t			lsn_diff;
-	lsn_t			lsn;
-	ulint			n_flushed;
-
-	lsn = log_get_lsn();
-	if (buf_flush_stat_cur.redo == 0) {
-		/* First time around. Just update the current LSN
-		and return. */
-		buf_flush_stat_cur.redo = lsn;
-		return;
-	}
-
-	item = &buf_flush_stat_arr[buf_flush_stat_arr_ind];
-
-	/* values for this interval */
-	lsn_diff = lsn - buf_flush_stat_cur.redo;
-	n_flushed = buf_lru_flush_page_count
-		    - buf_flush_stat_cur.n_flushed;
-
-	/* add the current value and subtract the obsolete entry. */
-	buf_flush_stat_sum.redo += lsn_diff - item->redo;
-	buf_flush_stat_sum.n_flushed += n_flushed - item->n_flushed;
-
-	/* put current entry in the array. */
-	item->redo = lsn_diff;
-	item->n_flushed = n_flushed;
-
-	/* update the index */
-	buf_flush_stat_arr_ind++;
-	buf_flush_stat_arr_ind %= BUF_FLUSH_STAT_N_INTERVAL;
-
-	/* reset the current entry. */
-	buf_flush_stat_cur.redo = lsn;
-	buf_flush_stat_cur.n_flushed = buf_lru_flush_page_count;
-}
-
-/*********************************************************************
-Determines the fraction of dirty pages that need to be flushed based
-on the speed at which we generate redo log. Note that if redo log
-is generated at a significant rate without corresponding increase
-in the number of dirty pages (for example, an in-memory workload)
-it can cause IO bursts of flushing. This function implements heuristics
-to avoid this burstiness.
-@return	number of dirty pages to be flushed / second */
-static
-ulint
-buf_flush_get_desired_flush_rate(void)
-/*==================================*/
-{
-	ulint		i;
-	lsn_t		redo_avg;
-	ulint		n_dirty = 0;
-	ib_uint64_t	n_flush_req;
-	ib_uint64_t	lru_flush_avg;
-	lsn_t		lsn = log_get_lsn();
-	lsn_t		log_capacity = log_get_capacity();
-
-	/* log_capacity should never be zero after the initialization
-	of log subsystem. */
-	ut_ad(log_capacity != 0);
-
-	/* Get total number of dirty pages. It is OK to access
-	flush_list without holding any mutex as we are using this
-	only for heuristics. */
-	for (i = 0; i < srv_buf_pool_instances; i++) {
-		buf_pool_t*	buf_pool;
-
-		buf_pool = buf_pool_from_array(i);
-		n_dirty += UT_LIST_GET_LEN(buf_pool->flush_list);
-	}
-
-	/* An overflow can happen if we generate more than 2^32 bytes
-	of redo in this interval i.e.: 4G of redo in 1 second. We can
-	safely consider this as infinity because if we ever come close
-	to 4G we'll start a synchronous flush of dirty pages. */
-	/* redo_avg below is average at which redo is generated in
-	past BUF_FLUSH_STAT_N_INTERVAL + redo generated in the current
-	interval. */
-	redo_avg = buf_flush_stat_sum.redo / BUF_FLUSH_STAT_N_INTERVAL
-		+ (lsn - buf_flush_stat_cur.redo);
-
-	/* An overflow can happen possibly if we flush more than 2^32
-	pages in BUF_FLUSH_STAT_N_INTERVAL. This is a very very
-	unlikely scenario. Even when this happens it means that our
-	flush rate will be off the mark. It won't affect correctness
-	of any subsystem. */
-	/* lru_flush_avg below is rate at which pages are flushed as
-	part of LRU flush in past BUF_FLUSH_STAT_N_INTERVAL + the
-	number of pages flushed in the current interval. */
-	lru_flush_avg = buf_flush_stat_sum.n_flushed
-			/ BUF_FLUSH_STAT_N_INTERVAL
-			+ (buf_lru_flush_page_count
-			   - buf_flush_stat_cur.n_flushed);
-
-	n_flush_req = (n_dirty * redo_avg) / log_capacity;
-
-	/* The number of pages that we want to flush from the flush
-	list is the difference between the required rate and the
-	number of pages that we are historically flushing from the
-	LRU list */
-	if (n_flush_req <= lru_flush_avg) {
-		return(0);
-	} else {
-		ib_uint64_t	rate;
-
-		rate = n_flush_req - lru_flush_avg;
-
-		return((ulint) (rate < PCT_IO(100) ? rate : PCT_IO(100)));
-	}
-}
-
 /*********************************************************************//**
 Clears up tail of the LRU lists:
 * Put replaceable pages at the tail of LRU to the free list
@@ -2086,36 +2050,35 @@ Clears up tail of the LRU lists:
 The depth to which we scan each buffer pool is controlled by dynamic
 config parameter innodb_LRU_scan_depth.
 @return total pages flushed */
-UNIV_INLINE
+UNIV_INTERN
 ulint
-page_cleaner_flush_LRU_tail(void)
-/*=============================*/
+buf_flush_LRU_tail(void)
+/*====================*/
 {
-	ulint	i;
-	ulint	j;
 	ulint	total_flushed = 0;
 
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 
 		buf_pool_t*	buf_pool = buf_pool_from_array(i);
 
 		/* We divide LRU flush into smaller chunks because
 		there may be user threads waiting for the flush to
 		end in buf_LRU_get_free_block(). */
-		for (j = 0;
+		for (ulint j = 0;
 		     j < srv_LRU_scan_depth;
 		     j += PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE) {
 
-			ulint	n_flushed = buf_flush_LRU(buf_pool,
-				PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE);
+			ulint	n_flushed = 0;
 
 			/* Currently page_cleaner is the only thread
 			that can trigger an LRU flush. It is possible
 			that a batch triggered during last iteration is
 			still running, */
-			if (n_flushed != ULINT_UNDEFINED) {
-				total_flushed += n_flushed;
-			}
+			buf_flush_LRU(buf_pool,
+				      PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE,
+				      &n_flushed);
+
+			total_flushed += n_flushed;
 		}
 	}
 
@@ -2132,14 +2095,12 @@ page_cleaner_flush_LRU_tail(void)
 
 /*********************************************************************//**
 Wait for any possible LRU flushes that are in progress to end. */
-UNIV_INLINE
+UNIV_INTERN
 void
-page_cleaner_wait_LRU_flush(void)
-/*=============================*/
+buf_flush_wait_LRU_batch_end(void)
+/*==============================*/
 {
-	ulint	i;
-
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t*	buf_pool;
 
 		buf_pool = buf_pool_from_array(i);
@@ -2166,22 +2127,87 @@ ulint
 page_cleaner_do_flush_batch(
 /*========================*/
 	ulint		n_to_flush,	/*!< in: number of pages that
-					we should attempt to flush. If
-					an lsn_limit is provided then
-					this value will have no affect */
+					we should attempt to flush. */
 	lsn_t		lsn_limit)	/*!< in: LSN up to which flushing
 					must happen */
 {
 	ulint n_flushed;
 
-	ut_ad(n_to_flush == ULINT_MAX || lsn_limit == LSN_MAX);
+	buf_flush_list(n_to_flush, lsn_limit, &n_flushed);
+
+	return(n_flushed);
+}
 
-	n_flushed = buf_flush_list(n_to_flush, lsn_limit);
-	if (n_flushed == ULINT_UNDEFINED) {
-		n_flushed = 0;
+/*********************************************************************//**
+Calculates if flushing is required based on number of dirty pages in
+the buffer pool.
+@return percent of io_capacity to flush to manage dirty page ratio */
+static
+ulint
+af_get_pct_for_dirty()
+/*==================*/
+{
+	ulint dirty_pct = buf_get_modified_ratio_pct();
+
+	ut_a(srv_max_dirty_pages_pct_lwm
+	     <= srv_max_buf_pool_modified_pct);
+
+	if (srv_max_dirty_pages_pct_lwm == 0) {
+		/* The user has not set the option to preflush dirty
+		pages as we approach the high water mark. */
+		if (dirty_pct > srv_max_buf_pool_modified_pct) {
+			/* We have crossed the high water mark of dirty
+			pages In this case we start flushing at 100% of
+			innodb_io_capacity. */
+			return(100);
+		}
+	} else if (dirty_pct > srv_max_dirty_pages_pct_lwm) {
+		/* We should start flushing pages gradually. */
+		return((dirty_pct * 100)
+		       / (srv_max_buf_pool_modified_pct + 1));
 	}
 
-	return(n_flushed);
+	return(0);
+}
+
+/*********************************************************************//**
+Calculates if flushing is required based on redo generation rate.
+@return percent of io_capacity to flush to manage redo space */
+static
+ulint
+af_get_pct_for_lsn(
+/*===============*/
+	lsn_t	age)	/*!< in: current age of LSN. */
+{
+	lsn_t	max_async_age;
+	lsn_t	lsn_age_factor;
+	lsn_t	af_lwm = (srv_adaptive_flushing_lwm
+			  * log_get_capacity()) / 100;
+
+	if (age < af_lwm) {
+		/* No adaptive flushing. */
+		return(0);
+	}
+
+	max_async_age = log_get_max_modified_age_async();
+
+	if (age < max_async_age && !srv_adaptive_flushing) {
+		/* We have still not reached the max_async point and
+		the user has disabled adaptive flushing. */
+		return(0);
+	}
+
+	/* If we are here then we know that either:
+	1) User has enabled adaptive flushing
+	2) User may have disabled adaptive flushing but we have reached
+	max_async_age. */
+	lsn_age_factor = (age * 100) / max_async_age;
+
+	ut_ad(srv_max_io_capacity >= srv_io_capacity);
+	return(static_cast<ulint>(
+		((srv_max_io_capacity / srv_io_capacity)
+		* (lsn_age_factor * sqrt((double)lsn_age_factor)))
+		/ 7.5));
 }
 
 /*********************************************************************//**
@@ -2195,78 +2221,103 @@ ulint
 page_cleaner_flush_pages_if_needed(void)
 /*====================================*/
 {
-	ulint	n_pages_flushed = 0;
-	lsn_t	lsn_limit = log_async_flush_lsn();
+	static	lsn_t		lsn_avg_rate = 0;
+	static	lsn_t		prev_lsn = 0;
+	static	lsn_t		last_lsn = 0;
+	static	ulint		sum_pages = 0;
+	static	ulint		last_pages = 0;
+	static	ulint		prev_pages = 0;
+	static	ulint		avg_page_rate = 0;
+	static	ulint		n_iterations = 0;
+	lsn_t			oldest_lsn;
+	lsn_t			cur_lsn;
+	lsn_t			age;
+	lsn_t			lsn_rate;
+	ulint			n_pages = 0;
+	ulint			pct_for_dirty = 0;
+	ulint			pct_for_lsn = 0;
+	ulint			pct_total = 0;
+	int			age_factor = 0;
+
+	cur_lsn = log_get_lsn();
+
+	if (prev_lsn == 0) {
+		/* First time around. */
+		prev_lsn = cur_lsn;
+		return(0);
+	}
 
-	/* Currently we decide whether or not to flush and how much to
-	flush based on three factors.
+	if (prev_lsn == cur_lsn) {
+		return(0);
+	}
 
-	1) If the amount of LSN for which pages are not flushed to disk
-	yet is greater than log_sys->max_modified_age_async. This is
-	the most urgent type of flush and we attempt to cleanup enough
-	of the tail of the flush_list to avoid flushing inside user
-	threads.
+	/* We update our variables every srv_flushing_avg_loops
+	iterations to smooth out transition in workload. */
+	if (++n_iterations >= srv_flushing_avg_loops) {
 
-	2) If modified page ratio is greater than the one specified by
-	the user. In that case we flush full 100% IO_CAPACITY of the
-	server. Note that 1 and 2 are not mutually exclusive. We can
-	end up executing both steps.
+		avg_page_rate = ((sum_pages / srv_flushing_avg_loops)
+				 + avg_page_rate) / 2;
 
-	3) If adaptive_flushing is set by the user and neither of 1
-	or 2 has occurred above then we flush a batch based on our
-	heuristics. */
+		/* How much LSN we have generated since last call. */
+		lsn_rate = (cur_lsn - prev_lsn) / srv_flushing_avg_loops;
 
-	if (lsn_limit != LSN_MAX) {
+		lsn_avg_rate = (lsn_avg_rate + lsn_rate) / 2;
 
-		/* async flushing is requested */
-		n_pages_flushed = page_cleaner_do_flush_batch(ULINT_MAX,
-							      lsn_limit);
+		prev_lsn = cur_lsn;
 
-		MONITOR_INC_VALUE_CUMULATIVE(
-			MONITOR_FLUSH_ASYNC_TOTAL_PAGE,
-			MONITOR_FLUSH_ASYNC_COUNT,
-			MONITOR_FLUSH_ASYNC_PAGES,
-			n_pages_flushed);
+		n_iterations = 0;
+
+		sum_pages = 0;
 	}
 
-	if (UNIV_UNLIKELY(n_pages_flushed < PCT_IO(100)
-			  && buf_get_modified_ratio_pct()
-			     > srv_max_buf_pool_modified_pct)) {
+	oldest_lsn = buf_pool_get_oldest_modification();
 
-		/* Try to keep the number of modified pages in the
-		buffer pool under the limit wished by the user */
+	ut_ad(oldest_lsn <= cur_lsn);
 
-		n_pages_flushed += page_cleaner_do_flush_batch(PCT_IO(100),
-							       LSN_MAX);
+	age = cur_lsn - oldest_lsn;
 
-		MONITOR_INC_VALUE_CUMULATIVE(
-			MONITOR_FLUSH_MAX_DIRTY_TOTAL_PAGE,
-			MONITOR_FLUSH_MAX_DIRTY_COUNT,
-			MONITOR_FLUSH_MAX_DIRTY_PAGES,
-			n_pages_flushed);
+	pct_for_dirty = af_get_pct_for_dirty();
+	pct_for_lsn = af_get_pct_for_lsn(age);
+
+	pct_total = ut_max(pct_for_dirty, pct_for_lsn);
+
+	/* Cap the maximum IO capacity that we are going to use by
+	max_io_capacity. */
+	n_pages = (PCT_IO(pct_total) + avg_page_rate) / 2;
+
+	if (n_pages > srv_max_io_capacity) {
+		n_pages = srv_max_io_capacity;
 	}
 
-	if (srv_adaptive_flushing && n_pages_flushed == 0) {
+	if (last_pages && cur_lsn - last_lsn > lsn_avg_rate / 2) {
+		age_factor = prev_pages / last_pages;
+	}
 
-		/* Try to keep the rate of flushing of dirty
-		pages such that redo log generation does not
-		produce bursts of IO at checkpoint time. */
-		ulint n_flush = buf_flush_get_desired_flush_rate();
+	MONITOR_SET(MONITOR_FLUSH_N_TO_FLUSH_REQUESTED, n_pages);
 
-		ut_ad(n_flush <= PCT_IO(100));
-		if (n_flush) {
-			n_pages_flushed = page_cleaner_do_flush_batch(
-				n_flush, LSN_MAX);
+	prev_pages = n_pages;
+	n_pages = page_cleaner_do_flush_batch(
+		n_pages, oldest_lsn + lsn_avg_rate * (age_factor + 1));
 
-			MONITOR_INC_VALUE_CUMULATIVE(
-				MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
-				MONITOR_FLUSH_ADAPTIVE_COUNT,
-				MONITOR_FLUSH_ADAPTIVE_PAGES,
-				n_pages_flushed);
-		}
+	last_lsn= cur_lsn;
+	last_pages= n_pages + 1;
+
+	MONITOR_SET(MONITOR_FLUSH_AVG_PAGE_RATE, avg_page_rate);
+	MONITOR_SET(MONITOR_FLUSH_LSN_AVG_RATE, lsn_avg_rate);
+	MONITOR_SET(MONITOR_FLUSH_PCT_FOR_DIRTY, pct_for_dirty);
+	MONITOR_SET(MONITOR_FLUSH_PCT_FOR_LSN, pct_for_lsn);
+
+	if (n_pages) {
+		MONITOR_INC_VALUE_CUMULATIVE(
+			MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
+			MONITOR_FLUSH_ADAPTIVE_COUNT,
+			MONITOR_FLUSH_ADAPTIVE_PAGES,
+			n_pages);
+
+		sum_pages += n_pages;
 	}
 
-	return(n_pages_flushed);
+	return(n_pages);
 }
 
 /*********************************************************************//**
@@ -2306,7 +2357,8 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 	ulint	next_loop_time = ut_time_ms() + 1000;
 	ulint	n_flushed = 0;
 	ulint	last_activity = srv_get_activity_count();
-	ulint	i;
+
+	ut_ad(!srv_read_only_mode);
 
 #ifdef UNIV_PFS_THREAD
 	pfs_register_thread(buf_page_cleaner_thread_key);
@@ -2336,7 +2388,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 			last_activity = srv_get_activity_count();
 
 			/* Flush pages from end of LRU if required */
-			n_flushed = page_cleaner_flush_LRU_tail();
+			n_flushed = buf_flush_LRU_tail();
 
 			/* Flush pages from flush_list if required */
 			n_flushed += page_cleaner_flush_pages_if_needed();
@@ -2396,19 +2448,21 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 	sweep and we'll come out of the loop leaving behind dirty pages
 	in the flush_list */
 	buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
-	page_cleaner_wait_LRU_flush();
+	buf_flush_wait_LRU_batch_end();
+
+	bool	success;
 
 	do {
 
-		n_flushed = buf_flush_list(PCT_IO(100), LSN_MAX);
+		success = buf_flush_list(PCT_IO(100), LSN_MAX, &n_flushed);
 		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 
-	} while (n_flushed > 0);
+	} while (!success || n_flushed > 0);
 
 	/* Some sanity checks */
 	ut_a(srv_get_active_thread_type() == SRV_NONE);
 	ut_a(srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE);
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t* buf_pool = buf_pool_from_array(i);
 		ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == 0);
 	}
@@ -2521,3 +2575,66 @@ buf_flush_validate(
 }
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_DEBUG
+/******************************************************************//**
+Check if there are any dirty pages that belong to a space id in the flush
+list in a particular buffer pool.
+@return	number of dirty pages present in a single buffer pool */
+UNIV_INTERN
+ulint
+buf_pool_get_dirty_pages_count(
+/*===========================*/
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool */
+	ulint		id)		/*!< in: space id to check */
+
+{
+	ulint		count = 0;
+
+	buf_pool_mutex_enter(buf_pool);
+	buf_flush_list_mutex_enter(buf_pool);
+
+	buf_page_t*	bpage;
+
+	for (bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
+	     bpage != 0;
+	     bpage = UT_LIST_GET_NEXT(list, bpage)) {
+
+		ut_ad(buf_page_in_file(bpage));
+		ut_ad(bpage->in_flush_list);
+		ut_ad(bpage->oldest_modification > 0);
+
+		if (buf_page_get_space(bpage) == id) {
+			++count;
+		}
+	}
+
+	buf_flush_list_mutex_exit(buf_pool);
+	buf_pool_mutex_exit(buf_pool);
+
+	return(count);
+}
+
+/******************************************************************//**
+Check if there are any dirty pages that belong to a space id in the flush list.
+@return	number of dirty pages present in all the buffer pools */
+UNIV_INTERN
+ulint
+buf_flush_get_dirty_pages_count(
+/*============================*/
+	ulint		id)		/*!< in: space id to check */
+
+{
+	ulint		count = 0;
+
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool;
+
+		buf_pool = buf_pool_from_array(i);
+
+		count += buf_pool_get_dirty_pages_count(buf_pool, id);
+	}
+
+	return(count);
+}
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index c35d84cb985..270263d95f1 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -51,6 +51,9 @@ Created 11/5/1995 Heikki Tuuri
 #include "log0recv.h"
 #include "srv0srv.h"
 #include "srv0mon.h"
+#include "lock0lock.h"
+
+#include "ha_prototypes.h"
 
 /** The number of blocks from the LRU_old pointer onward, including
 the block pointed to, must be buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
@@ -158,6 +161,22 @@ buf_LRU_block_free_hashed_page(
 				be in a state where it can be freed */
 
 /******************************************************************//**
+Increases LRU size in bytes with zip_size for compressed page,
+UNIV_PAGE_SIZE for uncompressed page in inline function */
+static inline
+void
+incr_LRU_size_in_bytes(
+/*===================*/
+	buf_page_t*	bpage,		/*!< in: control block */
+	buf_pool_t*	buf_pool)	/*!< in: buffer pool instance */
+{
+	ut_ad(buf_pool_mutex_own(buf_pool));
+	ulint zip_size = page_zip_get_size(&bpage->zip);
+	buf_pool->stat.LRU_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
+	ut_ad(buf_pool->stat.LRU_bytes <= buf_pool->curr_pool_size);
+}
+
+/******************************************************************//**
 Determines if the unzip_LRU list should be used for evicting a victim
 instead of the general LRU list.
 @return	TRUE if should use unzip_LRU */
@@ -342,39 +361,338 @@ next_page:
 }
 
 /******************************************************************//**
+While flushing (or removing dirty) pages from a tablespace we don't
+want to hog the CPU and resources. Release the buffer pool and block
+mutex and try to force a context switch. Then reacquire the same mutexes.
+The current page is "fixed" before the release of the mutexes and then
+"unfixed" again once we have reacquired the mutexes. */
+static	__attribute__((nonnull))
+void
+buf_flush_yield(
+/*============*/
+	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
+	buf_page_t*	bpage)		/*!< in/out: current page */
+{
+	ib_mutex_t*	block_mutex;
+
+	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(buf_page_in_file(bpage));
+
+	block_mutex = buf_page_get_mutex(bpage);
+
+	mutex_enter(block_mutex);
+	/* "Fix" the block so that the position cannot be
+	changed after we release the buffer pool and
+	block mutexes. */
+	buf_page_set_sticky(bpage);
+
+	/* Now it is safe to release the buf_pool->mutex. */
+	buf_pool_mutex_exit(buf_pool);
+
+	mutex_exit(block_mutex);
+	/* Try and force a context switch. */
+	os_thread_yield();
+
+	buf_pool_mutex_enter(buf_pool);
+
+	mutex_enter(block_mutex);
+	/* "Unfix" the block now that we have both the
+	buffer pool and block mutex again. */
+	buf_page_unset_sticky(bpage);
+	mutex_exit(block_mutex);
+}
+
+/******************************************************************//**
+If we have hogged the resources for too long then release the buffer
+pool and flush list mutex and do a thread yield. Set the current page
+to "sticky" so that it is not relocated during the yield.
+@return true if yielded */
+static	__attribute__((nonnull(1), warn_unused_result))
+bool
+buf_flush_try_yield(
+/*================*/
+	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
+	buf_page_t*	bpage,		/*!< in/out: bpage to remove */
+	ulint		processed)	/*!< in: number of pages processed */
+{
+	/* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the
+	loop we release buf_pool->mutex to let other threads
+	do their job but only if the block is not IO fixed. This
+	ensures that the block stays in its position in the
+	flush_list. */
+
+	if (bpage != NULL
+	    && processed >= BUF_LRU_DROP_SEARCH_SIZE
+	    && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
+
+		buf_flush_list_mutex_exit(buf_pool);
+
+		/* Release the buffer pool and block mutex
+		to give the other threads a go. */
+
+		buf_flush_yield(buf_pool, bpage);
+
+		buf_flush_list_mutex_enter(buf_pool);
+
+		/* Should not have been removed from the flush
+		list during the yield. However, this check is
+		not sufficient to catch a remove -> add. */
+
+		ut_ad(bpage->in_flush_list);
+
+		return(true);
+	}
+
+	return(false);
+}
+
+/******************************************************************//**
+Removes a single page from a given tablespace inside a specific
+buffer pool instance.
+@return true if page was removed. */
+static	__attribute__((nonnull, warn_unused_result))
+bool
+buf_flush_or_remove_page(
+/*=====================*/
+	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
+	buf_page_t*	bpage,		/*!< in/out: bpage to remove */
+	bool		flush)		/*!< in: flush to disk if true but
+					don't remove else remove without
+					flushing to disk */
+{
+	ib_mutex_t*	block_mutex;
+	bool		processed = false;
+
+	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(buf_flush_list_mutex_own(buf_pool));
+
+	block_mutex = buf_page_get_mutex(bpage);
+
+	/* bpage->space and bpage->io_fix are protected by
+	buf_pool->mutex and block_mutex. It is safe to check
+	them while holding buf_pool->mutex only. */
+
+	if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+
+		/* We cannot remove this page during this scan
+		yet; maybe the system is currently reading it
+		in, or flushing the modifications to the file */
+
+	} else {
+
+		/* We have to release the flush_list_mutex to obey the
+		latching order. We are however guaranteed that the page
+		will stay in the flush_list because buf_flush_remove()
+		needs buf_pool->mutex as well (for the non-flush case). */
+
+		buf_flush_list_mutex_exit(buf_pool);
+
+		mutex_enter(block_mutex);
+
+		ut_ad(bpage->oldest_modification != 0);
+
+		if (bpage->buf_fix_count > 0) {
+
+			mutex_exit(block_mutex);
+
+			/* We cannot remove this page yet;
+			maybe the system is currently reading
+			it in, or flushing the modifications
+			to the file */
+
+		} else if (!flush) {
+
+			buf_flush_remove(bpage);
+
+			mutex_exit(block_mutex);
+
+			processed = true;
+
+		} else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+
+			/* Check the status again after releasing the flush
+			list mutex and acquiring the block mutex. The background
+			flush thread may be in the process of flushing this
+			page when we released the flush list mutex. */
+
+			/* The following call will release the buffer pool
+			and block mutex. */
+			buf_flush_page(buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE);
+
+			/* Wake possible simulated aio thread to actually
+			post the writes to the operating system */
+			os_aio_simulated_wake_handler_threads();
+
+			buf_pool_mutex_enter(buf_pool);
+
+			processed = true;
+		} else {
+			mutex_exit(block_mutex);
+		}
+
+		buf_flush_list_mutex_enter(buf_pool);
+	}
+
+	ut_ad(!mutex_own(block_mutex));
+
+	return(processed);
+}
+
+/******************************************************************//**
 Remove all dirty pages belonging to a given tablespace inside a specific
 buffer pool instance when we are deleting the data file(s) of that
 tablespace. The pages still remain a part of LRU and are evicted from
-the list as they age towards the tail of the LRU. */
-static
+the list as they age towards the tail of the LRU.
+@retval DB_SUCCESS if all freed
+@retval DB_FAIL if not all freed
+@retval DB_INTERRUPTED if the transaction was interrupted */
+static	__attribute__((nonnull(1), warn_unused_result))
+dberr_t
+buf_flush_or_remove_pages(
+/*======================*/
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	ulint		id,		/*!< in: target space id for which
+					to remove or flush pages */
+	bool		flush,		/*!< in: flush to disk if true but
+					don't remove else remove without
+					flushing to disk */
+	const trx_t*	trx)		/*!< to check if the operation must
+					be interrupted, can be 0 */
+{
+	buf_page_t*	prev;
+	buf_page_t*	bpage;
+	ulint		processed = 0;
+	bool		all_freed = true;
+
+	buf_flush_list_mutex_enter(buf_pool);
+
+	for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+	     bpage != NULL;
+	     bpage = prev) {
+
+		ut_a(buf_page_in_file(bpage));
+
+		/* Save the previous link because once we free the
+		page we can't rely on the links. */
+
+		prev = UT_LIST_GET_PREV(list, bpage);
+
+		if (buf_page_get_space(bpage) != id) {
+
+			/* Skip this block, as it does not belong to
+			the target space. */
+
+		} else if (!buf_flush_or_remove_page(buf_pool, bpage, flush)) {
+
+			/* Remove was unsuccessful, we have to try again
+			by scanning the entire list from the end. */
+
+			all_freed = false;
+		}
+
+		++processed;
+
+		/* Yield if we have hogged the CPU and mutexes for too long. */
+		if (buf_flush_try_yield(buf_pool, prev, processed)) {
+
+			/* Reset the batch size counter if we had to yield. */
+
+			processed = 0;
+		}
+
+#ifdef DBUG_OFF
+		if (flush) {
+			DBUG_EXECUTE_IF("ib_export_flush_crash",
+					static ulint	n_pages;
+					if (++n_pages == 4) {DBUG_SUICIDE();});
+		}
+#endif /* DBUG_OFF */
+
+		/* The check for trx is interrupted is expensive, we want
+		to check every N iterations. */
+		if (!processed && trx && trx_is_interrupted(trx)) {
+			buf_flush_list_mutex_exit(buf_pool);
+			return(DB_INTERRUPTED);
+		}
+	}
+
+	buf_flush_list_mutex_exit(buf_pool);
+
+	return(all_freed ? DB_SUCCESS : DB_FAIL);
+}
+
+/******************************************************************//**
+Remove or flush all the dirty pages that belong to a given tablespace
+inside a specific buffer pool instance. The pages will remain in the LRU
+list and will be evicted from the LRU list as they age and move towards
+the tail of the LRU list. */
+static __attribute__((nonnull(1)))
 void
-buf_LRU_remove_dirty_pages_for_tablespace(
-/*======================================*/
+buf_flush_dirty_pages(
+/*==================*/
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	ulint		id,		/*!< in: space id */
+	bool		flush,		/*!< in: flush to disk if true otherwise
+					remove the pages without flushing */
+	const trx_t*	trx)		/*!< to check if the operation must
+					be interrupted */
+{
+	dberr_t		err;
+
+	do {
+		buf_pool_mutex_enter(buf_pool);
+
+		err = buf_flush_or_remove_pages(buf_pool, id, flush, trx);
+
+		buf_pool_mutex_exit(buf_pool);
+
+		ut_ad(buf_flush_validate(buf_pool));
+
+		if (err == DB_FAIL) {
+			os_thread_sleep(20000);
+		}
+
+		/* DB_FAIL is a soft error, it means that the task wasn't
+		completed, needs to be retried. */
+
+		ut_ad(buf_flush_validate(buf_pool));
+
+	} while (err == DB_FAIL);
+}
+
+/******************************************************************//**
+Remove all pages that belong to a given tablespace inside a specific
+buffer pool instance when we are DISCARDing the tablespace. */
+static __attribute__((nonnull))
+void
+buf_LRU_remove_all_pages(
+/*=====================*/
 	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
 	ulint		id)		/*!< in: space id */
 {
 	buf_page_t*	bpage;
 	ibool		all_freed;
-	ulint		i;
 
 scan_again:
 	buf_pool_mutex_enter(buf_pool);
-	buf_flush_list_mutex_enter(buf_pool);
 
 	all_freed = TRUE;
 
-	for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list), i = 0;
-	     bpage != NULL; ++i) {
+	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+	     bpage != NULL;
+	     /* No op */) {
 
+		rw_lock_t*	hash_lock;
 		buf_page_t*	prev_bpage;
-		mutex_t*	block_mutex = NULL;
+		ib_mutex_t*	block_mutex = NULL;
 
 		ut_a(buf_page_in_file(bpage));
+		ut_ad(bpage->in_LRU_list);
 
-		prev_bpage = UT_LIST_GET_PREV(list, bpage);
+		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
 
 		/* bpage->space and bpage->io_fix are protected by
-		buf_pool->mutex and block_mutex. It is safe to check
+		buf_pool->mutex and the block_mutex. It is safe to check
 		them while holding buf_pool->mutex only. */
 
 		if (buf_page_get_space(bpage) != id) {
@@ -388,83 +706,103 @@ scan_again:
 
 			all_freed = FALSE;
 			goto next_page;
-		}
+		} else {
+			ulint	fold = buf_page_address_fold(
+				bpage->space, bpage->offset);
 
-		/* We have to release the flush_list_mutex to obey the
-		latching order. We are however guaranteed that the page
-		will stay in the flush_list because buf_flush_remove()
-		needs buf_pool->mutex as well. */
-		buf_flush_list_mutex_exit(buf_pool);
-		block_mutex = buf_page_get_mutex(bpage);
-		mutex_enter(block_mutex);
+			hash_lock = buf_page_hash_lock_get(buf_pool, fold);
 
-		if (bpage->buf_fix_count > 0) {
-			mutex_exit(block_mutex);
-			buf_flush_list_mutex_enter(buf_pool);
+			rw_lock_x_lock(hash_lock);
 
-			/* We cannot remove this page during
-			this scan yet; maybe the system is
-			currently reading it in, or flushing
-			the modifications to the file */
+			block_mutex = buf_page_get_mutex(bpage);
+			mutex_enter(block_mutex);
 
-			all_freed = FALSE;
-			goto next_page;
-		}
+			if (bpage->buf_fix_count > 0) {
 
-		ut_ad(bpage->oldest_modification != 0);
+				mutex_exit(block_mutex);
 
-		buf_flush_remove(bpage);
+				rw_lock_x_unlock(hash_lock);
 
-		mutex_exit(block_mutex);
-		buf_flush_list_mutex_enter(buf_pool);
-next_page:
-		bpage = prev_bpage;
+				/* We cannot remove this page during
+				this scan yet; maybe the system is
+				currently reading it in, or flushing
+				the modifications to the file */
 
-		if (!bpage) {
-			break;
+				all_freed = FALSE;
+
+				goto next_page;
+			}
 		}
 
-		/* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the
-		loop we release buf_pool->mutex to let other threads
-		do their job. */
-		if (i < BUF_LRU_DROP_SEARCH_SIZE) {
-			continue;
+		ut_ad(mutex_own(block_mutex));
+
+#ifdef UNIV_DEBUG
+		if (buf_debug_prints) {
+			fprintf(stderr,
+				"Dropping space %lu page %lu\n",
+				(ulong) buf_page_get_space(bpage),
+				(ulong) buf_page_get_page_no(bpage));
 		}
+#endif
+		if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+			/* Do nothing, because the adaptive hash index
+			covers uncompressed pages only. */
+		} else if (((buf_block_t*) bpage)->index) {
+			ulint	page_no;
+			ulint	zip_size;
 
-		/* We IO-fix the block to make sure that the block
-		stays in its position in the flush_list. */
-		if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
-			/* Block is already IO-fixed. We don't
-			want to change the value. Lets leave
-			this block alone. */
-			continue;
+			buf_pool_mutex_exit(buf_pool);
+
+			zip_size = buf_page_get_zip_size(bpage);
+			page_no = buf_page_get_page_no(bpage);
+
+			rw_lock_x_unlock(hash_lock);
+
+			mutex_exit(block_mutex);
+
+			/* Note that the following call will acquire
+			and release block->lock X-latch. */
+
+			btr_search_drop_page_hash_when_freed(
+				id, zip_size, page_no);
+
+			goto scan_again;
 		}
 
-		buf_flush_list_mutex_exit(buf_pool);
-		block_mutex = buf_page_get_mutex(bpage);
-		mutex_enter(block_mutex);
-		buf_page_set_sticky(bpage);
-		mutex_exit(block_mutex);
+		if (bpage->oldest_modification != 0) {
 
-		/* Now it is safe to release the buf_pool->mutex. */
-		buf_pool_mutex_exit(buf_pool);
-		os_thread_yield();
-		buf_pool_mutex_enter(buf_pool);
+			buf_flush_remove(bpage);
+		}
 
-		mutex_enter(block_mutex);
-		buf_page_unset_sticky(bpage);
-		mutex_exit(block_mutex);
+		ut_ad(!bpage->in_flush_list);
 
-		buf_flush_list_mutex_enter(buf_pool);
-		ut_ad(bpage->in_flush_list);
+		/* Remove from the LRU list. */
 
-		i = 0;
+		if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
+		    != BUF_BLOCK_ZIP_FREE) {
+
+			buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
+
+		} else {
+			/* The block_mutex should have been released
+			by buf_LRU_block_remove_hashed_page() when it
+			returns BUF_BLOCK_ZIP_FREE. */
+			ut_ad(block_mutex == &buf_pool->zip_mutex);
+		}
+
+		ut_ad(!mutex_own(block_mutex));
+
+#ifdef UNIV_SYNC_DEBUG
+		/* buf_LRU_block_remove_hashed_page() releases the hash_lock */
+		ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
+		ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
+
+next_page:
+		bpage = prev_bpage;
 	}
 
 	buf_pool_mutex_exit(buf_pool);
-	buf_flush_list_mutex_exit(buf_pool);
-
-	ut_ad(buf_flush_validate(buf_pool));
 
 	if (!all_freed) {
 		os_thread_sleep(20000);
@@ -474,15 +812,60 @@ next_page:
 }
 
 /******************************************************************//**
-Invalidates all pages belonging to a given tablespace when we are deleting
-the data file(s) of that tablespace. */
+Remove pages belonging to a given tablespace inside a specific
+buffer pool instance when we are deleting the data file(s) of that
+tablespace. The pages still remain a part of LRU and are evicted from
+the list as they age towards the tail of the LRU only if buf_remove
+is BUF_REMOVE_FLUSH_NO_WRITE. */
+static	__attribute__((nonnull(1)))
+void
+buf_LRU_remove_pages(
+/*=================*/
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	ulint		id,		/*!< in: space id */
+	buf_remove_t	buf_remove,	/*!< in: remove or flush strategy */
+	const trx_t*	trx)		/*!< to check if the operation must
+					be interrupted */
+{
+	switch (buf_remove) {
+	case BUF_REMOVE_ALL_NO_WRITE:
+		buf_LRU_remove_all_pages(buf_pool, id);
+		break;
+
+	case BUF_REMOVE_FLUSH_NO_WRITE:
+		ut_a(trx == 0);
+		buf_flush_dirty_pages(buf_pool, id, false, NULL);
+		ut_ad(trx_is_interrupted(trx)
+		      || buf_pool_get_dirty_pages_count(buf_pool, id) == 0);
+		break;
+
+	case BUF_REMOVE_FLUSH_WRITE:
+		ut_a(trx != 0);
+		buf_flush_dirty_pages(buf_pool, id, true, trx);
+		ut_ad(trx_is_interrupted(trx)
+		      || buf_pool_get_dirty_pages_count(buf_pool, id) == 0);
+		/* Ensure that all asynchronous IO is completed. */
+		os_aio_wait_until_no_pending_writes();
+		fil_flush(id);
+		break;
+	}
+}
+
+/******************************************************************//**
+Flushes all dirty pages or removes all pages belonging
+to a given tablespace. A PROBLEM: if readahead is being started, what
+guarantees that it will not try to read in pages after this operation
+has completed? */
 UNIV_INTERN
 void
-buf_LRU_invalidate_tablespace(
+buf_LRU_flush_or_remove_pages(
 /*==========================*/
-	ulint	id)	/*!< in: space id */
+	ulint		id,		/*!< in: space id */
+	buf_remove_t	buf_remove,	/*!< in: remove or flush strategy */
+	const trx_t*	trx)		/*!< to check if the operation must
+					be interrupted */
 {
-	ulint	i;
+	ulint		i;
 
 	/* Before we attempt to drop pages one by one we first
 	attempt to drop page hash index entries in batches to make
@@ -494,9 +877,28 @@ buf_LRU_invalidate_tablespace(
 		buf_pool_t*	buf_pool;
 
 		buf_pool = buf_pool_from_array(i);
-		buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
-		buf_LRU_remove_dirty_pages_for_tablespace(buf_pool, id);
+
+		switch (buf_remove) {
+		case BUF_REMOVE_ALL_NO_WRITE:
+		case BUF_REMOVE_FLUSH_NO_WRITE:
+			buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
+			break;
+
+		case BUF_REMOVE_FLUSH_WRITE:
+			/* We allow read-only queries against the
+			table, there is no need to drop the AHI entries. */
+			break;
+		}
+
+		buf_LRU_remove_pages(buf_pool, id, buf_remove, trx);
 	}
+
+#ifdef UNIV_DEBUG
+	if (trx != 0 && id != 0) {
+		ut_ad(trx_is_interrupted(trx)
+		      || buf_flush_get_dirty_pages_count(id) == 0);
+	}
+#endif /* UNIV_DEBUG */
 }
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -790,7 +1192,7 @@ buf_LRU_check_size_of_non_data_objects(
 
 			buf_lru_switched_on_innodb_mon = TRUE;
 			srv_print_innodb_monitor = TRUE;
-			os_event_set(srv_timeout_event);
+			os_event_set(lock_sys->timeout_event);
 		}
 	} else if (buf_lru_switched_on_innodb_mon) {
 
@@ -938,7 +1340,7 @@ loop:
 		mon_value_was = srv_print_innodb_monitor;
 		started_monitor = TRUE;
 		srv_print_innodb_monitor = TRUE;
-		os_event_set(srv_timeout_event);
+		os_event_set(lock_sys->timeout_event);
 	}
 
 	/* If we have scanned the whole LRU and still are unable to
@@ -965,7 +1367,7 @@ loop:
 		++flush_failures;
 	}
 
-	++srv_buf_pool_wait_free;
+	srv_stats.buf_pool_wait_free.add(n_iterations, 1);
 
 	n_iterations++;
 
@@ -1107,6 +1509,7 @@ buf_LRU_remove_block(
 	buf_page_t*	bpage)	/*!< in: control block */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+	ulint		zip_size;
 
 	ut_ad(buf_pool);
 	ut_ad(bpage);
@@ -1142,6 +1545,9 @@ buf_LRU_remove_block(
 	UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
 	ut_d(bpage->in_LRU_list = FALSE);
 
+	zip_size = page_zip_get_size(&bpage->zip);
+	buf_pool->stat.LRU_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
+
 	buf_unzip_LRU_remove_block_if_needed(bpage);
 
 	/* If the LRU list is so short that LRU_old is not defined,
@@ -1202,7 +1608,10 @@ buf_unzip_LRU_add_block(
 }
 
 /******************************************************************//**
-Adds a block to the LRU list end. */
+Adds a block to the LRU list end. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
 UNIV_INLINE
 void
 buf_LRU_add_block_to_end_low(
@@ -1221,6 +1630,8 @@ buf_LRU_add_block_to_end_low(
 	UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
 	ut_d(bpage->in_LRU_list = TRUE);
 
+	incr_LRU_size_in_bytes(bpage, buf_pool);
+
 	if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
 
 		ut_ad(buf_pool->LRU_old);
@@ -1249,7 +1660,10 @@ buf_LRU_add_block_to_end_low(
 }
 
 /******************************************************************//**
-Adds a block to the LRU list. */
+Adds a block to the LRU list. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
 UNIV_INLINE
 void
 buf_LRU_add_block_low(
@@ -1291,6 +1705,8 @@ buf_LRU_add_block_low(
 
 	ut_d(bpage->in_LRU_list = TRUE);
 
+	incr_LRU_size_in_bytes(bpage, buf_pool);
+
 	if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
 
 		ut_ad(buf_pool->LRU_old);
@@ -1318,7 +1734,10 @@ buf_LRU_add_block_low(
 }
 
 /******************************************************************//**
-Adds a block to the LRU list. */
+Adds a block to the LRU list. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
 UNIV_INTERN
 void
 buf_LRU_add_block(
@@ -1391,7 +1810,7 @@ buf_LRU_free_block(
 						     bpage->offset);
 	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
 
-	mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(buf_page_in_file(bpage));
@@ -1540,6 +1959,8 @@ func_exit:
 			UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
 					     prev_b, b);
 
+			incr_LRU_size_in_bytes(b, buf_pool);
+
 			if (buf_page_is_old(b)) {
 				buf_pool->LRU_old_len++;
 				if (UNIV_UNLIKELY
@@ -1995,24 +2416,28 @@ buf_LRU_free_one_page(
 				be in a state where it can be freed; there
 				may or may not be a hash index to the page */
 {
-#ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-#endif
-	mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+	const ulint	fold = buf_page_address_fold(bpage->space,
+						     bpage->offset);
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
-	ut_ad(mutex_own(block_mutex));
+
+	rw_lock_x_lock(hash_lock);
+	mutex_enter(block_mutex);
 
 	if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
 	    != BUF_BLOCK_ZIP_FREE) {
 		buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
-	} else {
-		/* The block_mutex should have been released by
-		buf_LRU_block_remove_hashed_page() when it returns
-		BUF_BLOCK_ZIP_FREE. */
-		ut_ad(block_mutex == &buf_pool->zip_mutex);
-		mutex_enter(block_mutex);
 	}
+
+	/* buf_LRU_block_remove_hashed_page() releases hash_lock and block_mutex */
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
+	      && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!mutex_own(block_mutex));
 }
 
 /**********************************************************************//**
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index 227cb083725..3a579e251ff 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -61,7 +61,7 @@ buf_read_page_handle_error(
 	buf_page_t*	bpage)	/*!< in: pointer to the block */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	const ibool	uncompressed = (buf_page_get_state(bpage)
+	const bool	uncompressed = (buf_page_get_state(bpage)
 					== BUF_BLOCK_FILE_PAGE);
 
 	/* First unfix and release lock on the bpage */
@@ -79,13 +79,14 @@ buf_read_page_handle_error(
 			BUF_IO_READ);
 	}
 
+	mutex_exit(buf_page_get_mutex(bpage));
+
 	/* remove the block from LRU list */
 	buf_LRU_free_one_page(bpage);
 
 	ut_ad(buf_pool->n_pend_reads > 0);
 	buf_pool->n_pend_reads--;
 
-	mutex_exit(buf_page_get_mutex(bpage));
 	buf_pool_mutex_exit(buf_pool);
 }
 
@@ -103,7 +104,7 @@ static
 ulint
 buf_read_page_low(
 /*==============*/
-	ulint*	err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
+	dberr_t*	err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
 			trying to read from a non-existent tablespace, or a
 			tablespace which is just now being dropped */
 	ibool	sync,	/*!< in: TRUE if synchronous aio is desired */
@@ -192,13 +193,9 @@ buf_read_page_low(
 	}
 	thd_wait_end(NULL);
 
-	if (*err == DB_TABLESPACE_DELETED) {
-		buf_read_page_handle_error(bpage);
-		return(0);
-	}
-
 	if (*err != DB_SUCCESS) {
-		if (ignore_nonexistent_pages) {
+		if (ignore_nonexistent_pages || *err == DB_TABLESPACE_DELETED) {
+			buf_read_page_handle_error(bpage);
 			return(0);
 		}
 		/* else */
@@ -248,7 +245,7 @@ buf_read_ahead_random(
 	ulint		ibuf_mode;
 	ulint		count;
 	ulint		low, high;
-	ulint		err;
+	dberr_t		err;
 	ulint		i;
 	const ulint	buf_read_ahead_random_area
 				= BUF_READ_AHEAD_AREA(buf_pool);
@@ -377,7 +374,7 @@ read_ahead:
 	buf_LRU_stat_inc_io();
 
 	buf_pool->stat.n_ra_pages_read_rnd += count;
-	srv_buf_pool_reads += count;
+	srv_stats.buf_pool_reads.add(count);
 	return(count);
 }
 
@@ -397,7 +394,7 @@ buf_read_page(
 {
 	ib_int64_t	tablespace_version;
 	ulint		count;
-	ulint		err;
+	dberr_t		err;
 
 	tablespace_version = fil_space_get_version(space);
 
@@ -407,7 +404,7 @@ buf_read_page(
 	count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
 				  zip_size, FALSE,
 				  tablespace_version, offset);
-	srv_buf_pool_reads += count;
+	srv_stats.buf_pool_reads.add(count);
 	if (err == DB_TABLESPACE_DELETED) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
@@ -440,7 +437,7 @@ buf_read_page_async(
 	ulint		zip_size;
 	ib_int64_t	tablespace_version;
 	ulint		count;
-	ulint		err;
+	dberr_t		err;
 
 	zip_size = fil_space_get_zip_size(space);
 
@@ -455,7 +452,7 @@ buf_read_page_async(
 				  | BUF_READ_IGNORE_NONEXISTENT_PAGES,
 				  space, zip_size, FALSE,
 				  tablespace_version, offset);
-	srv_buf_pool_reads += count;
+	srv_stats.buf_pool_reads.add(count);
 
 	/* We do not increment number of I/O operations used for LRU policy
 	here (buf_LRU_stat_inc_io()). We use this in heuristics to decide
@@ -513,7 +510,7 @@ buf_read_ahead_linear(
 	ulint		fail_count;
 	ulint		ibuf_mode;
 	ulint		low, high;
-	ulint		err;
+	dberr_t		err;
 	ulint		i;
 	const ulint	buf_read_ahead_linear_area
 		= BUF_READ_AHEAD_AREA(buf_pool);
@@ -784,7 +781,7 @@ buf_read_ibuf_merge_pages(
 #endif
 
 	for (i = 0; i < n_stored; i++) {
-		ulint		err;
+		dberr_t		err;
 		buf_pool_t*	buf_pool;
 		ulint		zip_size = fil_space_get_zip_size(space_ids[i]);
 
@@ -850,7 +847,7 @@ buf_read_recv_pages(
 {
 	ib_int64_t	tablespace_version;
 	ulint		count;
-	ulint		err;
+	dberr_t		err;
 	ulint		i;
 
 	zip_size = fil_space_get_zip_size(space);
diff --git a/storage/innobase/dict/dict0boot.cc b/storage/innobase/dict/dict0boot.cc
index 8e305364ac8..eea10759fcd 100644
--- a/storage/innobase/dict/dict0boot.cc
+++ b/storage/innobase/dict/dict0boot.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -241,9 +241,10 @@ dict_hdr_create(
 
 /*****************************************************************//**
 Initializes the data dictionary memory structures when the database is
-started. This function is also called when the data dictionary is created. */
+started. This function is also called when the data dictionary is created.
+@return DB_SUCCESS or error code. */
 UNIV_INTERN
-void
+dberr_t
 dict_boot(void)
 /*===========*/
 {
@@ -252,7 +253,7 @@ dict_boot(void)
 	dict_hdr_t*	dict_hdr;
 	mem_heap_t*	heap;
 	mtr_t		mtr;
-	ulint		error;
+	dberr_t		error;
 
 	/* Be sure these constants do not ever change.  To avoid bloat,
 	only check the *NUM_FIELDS* in each table */
@@ -307,9 +308,7 @@ dict_boot(void)
 	dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
 	/* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
 	dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
-	/* If the format is UNIV_FORMAT_A, table->flags == 0, and
-	TYPE == 1, which is defined as SYS_TABLE_TYPE_ANTELOPE.
-	The low order bit of TYPE is always set to 1.  If the format
+	/* The low order bit of TYPE is always set to 1.  If the format
 	is UNIV_FORMAT_B or higher, this field matches table->flags. */
 	dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
@@ -454,14 +453,27 @@ dict_boot(void)
 
 	ibuf_init_at_db_start();
 
-	/* Load definitions of other indexes on system tables */
+	dberr_t	err = DB_SUCCESS;
+
+	if (srv_read_only_mode && !ibuf_is_empty()) {
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Change buffer must be empty when --innodb-read-only "
+			"is set!");
 
-	dict_load_sys_table(dict_sys->sys_tables);
-	dict_load_sys_table(dict_sys->sys_columns);
-	dict_load_sys_table(dict_sys->sys_indexes);
-	dict_load_sys_table(dict_sys->sys_fields);
+		err = DB_ERROR;
+	} else {
+		/* Load definitions of other indexes on system tables */
+
+		dict_load_sys_table(dict_sys->sys_tables);
+		dict_load_sys_table(dict_sys->sys_columns);
+		dict_load_sys_table(dict_sys->sys_indexes);
+		dict_load_sys_table(dict_sys->sys_fields);
+	}
 
 	mutex_exit(&(dict_sys->mutex));
+
+	return(err);
 }
 
 /*****************************************************************//**
@@ -476,9 +488,10 @@ dict_insert_initial_data(void)
 }
 
 /*****************************************************************//**
-Creates and initializes the data dictionary at the database creation. */
+Creates and initializes the data dictionary at the server bootstrap.
+@return DB_SUCCESS or error code. */
 UNIV_INTERN
-void
+dberr_t
 dict_create(void)
 /*=============*/
 {
@@ -490,7 +503,11 @@ dict_create(void)
 
 	mtr_commit(&mtr);
 
-	dict_boot();
+	dberr_t	err = dict_boot();
+
+	if (err == DB_SUCCESS) {
+		dict_insert_initial_data();
+	}
 
-	dict_insert_initial_data();
+	return(err);
 }
diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc
index d58b304ab92..864150b324a 100644
--- a/storage/innobase/dict/dict0crea.cc
+++ b/storage/innobase/dict/dict0crea.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -43,6 +43,7 @@ Created 1/8/1996 Heikki Tuuri
 #include "usr0sess.h"
 #include "ut0vec.h"
 #include "dict0priv.h"
+#include "fts0priv.h"
 
 /*****************************************************************//**
 Based on a table object, this function builds the entry to be inserted
@@ -244,8 +245,8 @@ dict_create_sys_columns_tuple(
 /***************************************************************//**
 Builds a table definition to insert.
 @return	DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 dict_build_table_def_step(
 /*======================*/
 	que_thr_t*	thr,	/*!< in: query thread */
@@ -253,9 +254,8 @@ dict_build_table_def_step(
 {
 	dict_table_t*	table;
 	dtuple_t*	row;
-	ulint		error;
-	const char*	path_or_name;
-	ibool		is_path;
+	dberr_t		error;
+	const char*	path;
 	mtr_t		mtr;
 	ulint		space = 0;
 	bool		use_tablespace;
@@ -263,7 +263,7 @@ dict_build_table_def_step(
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
 	table = node->table;
-	use_tablespace = !!(table->flags2 & DICT_TF2_USE_TABLESPACE);
+	use_tablespace = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_TABLESPACE);
 
 	dict_hdr_get_new_id(&table->id, NULL, NULL);
 
@@ -274,6 +274,11 @@ dict_build_table_def_step(
 		Get a new space id. */
 		dict_hdr_get_new_id(NULL, NULL, &space);
 
+		DBUG_EXECUTE_IF(
+			"ib_create_table_fail_out_of_space_ids",
+			space = ULINT_UNDEFINED;
+		);
+
 		if (UNIV_UNLIKELY(space == ULINT_UNDEFINED)) {
 			return(DB_ERROR);
 		}
@@ -286,26 +291,19 @@ dict_build_table_def_step(
 		- page 3 will contain the root of the clustered index of the
 		table we create here. */
 
-		if (table->dir_path_of_temp_table) {
-			/* We place tables created with CREATE TEMPORARY
-			TABLE in the tmp dir of mysqld server */
-
-			path_or_name = table->dir_path_of_temp_table;
-			is_path = TRUE;
-		} else {
-			path_or_name = table->name;
-			is_path = FALSE;
-		}
+		path = table->data_dir_path ? table->data_dir_path
+					    : table->dir_path_of_temp_table;
 
 		ut_ad(dict_table_get_format(table) <= UNIV_FORMAT_MAX);
 		ut_ad(!dict_table_zip_size(table)
 		      || dict_table_get_format(table) >= UNIV_FORMAT_B);
 
 		error = fil_create_new_single_table_tablespace(
-			space, path_or_name, is_path,
+			space, table->name, path,
 			dict_tf_to_fsp_flags(table->flags),
 			table->flags2,
 			FIL_IBD_FILE_INITIAL_SIZE);
+
 		table->space = (unsigned int) space;
 
 		if (error != DB_SUCCESS) {
@@ -333,10 +331,9 @@ dict_build_table_def_step(
 }
 
 /***************************************************************//**
-Builds a column definition to insert.
-@return	DB_SUCCESS */
+Builds a column definition to insert. */
 static
-ulint
+void
 dict_build_col_def_step(
 /*====================*/
 	tab_node_t*	node)	/*!< in: table create node */
@@ -346,8 +343,6 @@ dict_build_col_def_step(
 	row = dict_create_sys_columns_tuple(node->table, node->col_no,
 					    node->heap);
 	ins_node_set_new_row(node->col_def, row);
-
-	return(DB_SUCCESS);
 }
 
 /*****************************************************************//**
@@ -571,8 +566,8 @@ dict_create_search_tuple(
 /***************************************************************//**
 Builds an index definition row to insert.
 @return	DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 dict_build_index_def_step(
 /*======================*/
 	que_thr_t*	thr,	/*!< in: query thread */
@@ -595,7 +590,10 @@ dict_build_index_def_step(
 		return(DB_TABLE_NOT_FOUND);
 	}
 
-	trx->table_id = table->id;
+	if (!trx->table_id) {
+		/* Record only the first table id. */
+		trx->table_id = table->id;
+	}
 
 	node->table = table;
 
@@ -616,15 +614,16 @@ dict_build_index_def_step(
 
 	/* Note that the index was created by this transaction. */
 	index->trx_id = trx->id;
+	ut_ad(table->def_trx_id <= trx->id);
+	table->def_trx_id = trx->id;
 
 	return(DB_SUCCESS);
 }
 
 /***************************************************************//**
-Builds a field definition row to insert.
-@return	DB_SUCCESS */
+Builds a field definition row to insert. */
 static
-ulint
+void
 dict_build_field_def_step(
 /*======================*/
 	ind_node_t*	node)	/*!< in: index create node */
@@ -637,15 +636,13 @@ dict_build_field_def_step(
 	row = dict_create_sys_fields_tuple(index, node->field_no, node->heap);
 
 	ins_node_set_new_row(node->field_def, row);
-
-	return(DB_SUCCESS);
 }
 
 /***************************************************************//**
 Creates an index tree for the index if it is not a member of a cluster.
 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 dict_create_index_tree_step(
 /*========================*/
 	ind_node_t*	node)	/*!< in: index create node */
@@ -653,7 +650,6 @@ dict_create_index_tree_step(
 	dict_index_t*	index;
 	dict_table_t*	sys_indexes;
 	dtuple_t*	search_tuple;
-	ulint		zip_size;
 	btr_pcur_t	pcur;
 	mtr_t		mtr;
 
@@ -682,25 +678,37 @@ dict_create_index_tree_step(
 
 	btr_pcur_move_to_next_user_rec(&pcur, &mtr);
 
-	zip_size = dict_table_zip_size(index->table);
 
-	node->page_no = btr_create(index->type, index->space, zip_size,
-				   index->id, index, &mtr);
-	/* printf("Created a new index tree in space %lu root page %lu\n",
-	index->space, node->page_no); */
+	dberr_t		err = DB_SUCCESS;
+	ulint		zip_size = dict_table_zip_size(index->table);
 
-	page_rec_write_field(btr_pcur_get_rec(&pcur),
-			     DICT_FLD__SYS_INDEXES__PAGE_NO,
-			     node->page_no, &mtr);
-	btr_pcur_close(&pcur);
-	mtr_commit(&mtr);
+	if (node->index->table->ibd_file_missing
+	    || dict_table_is_discarded(node->index->table)) {
+
+		node->page_no = FIL_NULL;
+	} else {
+		node->page_no = btr_create(
+			index->type, index->space, zip_size,
+			index->id, index, &mtr);
 
-	if (node->page_no == FIL_NULL) {
+		if (node->page_no == FIL_NULL) {
+			err = DB_OUT_OF_FILE_SPACE;
+		}
 
-		return(DB_OUT_OF_FILE_SPACE);
+		DBUG_EXECUTE_IF("ib_import_create_index_failure_1",
+				node->page_no = FIL_NULL;
+				err = DB_OUT_OF_FILE_SPACE; );
 	}
 
-	return(DB_SUCCESS);
+	page_rec_write_field(
+		btr_pcur_get_rec(&pcur), DICT_FLD__SYS_INDEXES__PAGE_NO,
+		node->page_no, &mtr);
+
+	btr_pcur_close(&pcur);
+
+	mtr_commit(&mtr);
+
+	return(err);
 }
 
 /*******************************************************************//**
@@ -883,7 +891,7 @@ create:
 	for (index = UT_LIST_GET_FIRST(table->indexes);
 	     index;
 	     index = UT_LIST_GET_NEXT(indexes, index)) {
-		if (index->id == index_id) {
+		if (index->id == index_id && !(index->type & DICT_FTS)) {
 			root_page_no = btr_create(type, space, zip_size,
 						  index_id, index, mtr);
 			index->page = (unsigned int) root_page_no;
@@ -910,7 +918,9 @@ tab_create_graph_create(
 /*====================*/
 	dict_table_t*	table,	/*!< in: table to create, built as a memory data
 				structure */
-	mem_heap_t*	heap)	/*!< in: heap where created */
+	mem_heap_t*	heap,	/*!< in: heap where created */
+	bool		commit)	/*!< in: true if the commit node should be
+				added to the query graph */
 {
 	tab_node_t*	node;
 
@@ -932,8 +942,12 @@ tab_create_graph_create(
 					heap);
 	node->col_def->common.parent = node;
 
-	node->commit_node = trx_commit_node_create(heap);
-	node->commit_node->common.parent = node;
+	if (commit) {
+		node->commit_node = trx_commit_node_create(heap);
+		node->commit_node->common.parent = node;
+	} else {
+		node->commit_node = 0;
+	}
 
 	return(node);
 }
@@ -947,7 +961,9 @@ ind_create_graph_create(
 /*====================*/
 	dict_index_t*	index,	/*!< in: index to create, built as a memory data
 				structure */
-	mem_heap_t*	heap)	/*!< in: heap where created */
+	mem_heap_t*	heap,	/*!< in: heap where created */
+	bool		commit)	/*!< in: true if the commit node should be
+				added to the query graph */
 {
 	ind_node_t*	node;
 
@@ -970,8 +986,12 @@ ind_create_graph_create(
 					  dict_sys->sys_fields, heap);
 	node->field_def->common.parent = node;
 
-	node->commit_node = trx_commit_node_create(heap);
-	node->commit_node->common.parent = node;
+	if (commit) {
+		node->commit_node = trx_commit_node_create(heap);
+		node->commit_node->common.parent = node;
+	} else {
+		node->commit_node = 0;
+	}
 
 	return(node);
 }
@@ -986,7 +1006,7 @@ dict_create_table_step(
 	que_thr_t*	thr)	/*!< in: query thread */
 {
 	tab_node_t*	node;
-	ulint		err	= DB_ERROR;
+	dberr_t		err	= DB_ERROR;
 	trx_t*		trx;
 
 	ut_ad(thr);
@@ -1025,12 +1045,7 @@ dict_create_table_step(
 
 		if (node->col_no < (node->table)->n_def) {
 
-			err = dict_build_col_def_step(node);
-
-			if (err != DB_SUCCESS) {
-
-				goto function_exit;
-			}
+			dict_build_col_def_step(node);
 
 			node->col_no++;
 
@@ -1063,7 +1078,7 @@ dict_create_table_step(
 	}
 
 function_exit:
-	trx->error_state = (enum db_err) err;
+	trx->error_state = err;
 
 	if (err == DB_SUCCESS) {
 		/* Ok: do nothing */
@@ -1093,7 +1108,7 @@ dict_create_index_step(
 	que_thr_t*	thr)	/*!< in: query thread */
 {
 	ind_node_t*	node;
-	ulint		err	= DB_ERROR;
+	dberr_t		err	= DB_ERROR;
 	trx_t*		trx;
 
 	ut_ad(thr);
@@ -1130,12 +1145,7 @@ dict_create_index_step(
 
 		if (node->field_no < (node->index)->n_fields) {
 
-			err = dict_build_field_def_step(node);
-
-			if (err != DB_SUCCESS) {
-
-				goto function_exit;
-			}
+			dict_build_field_def_step(node);
 
 			node->field_no++;
 
@@ -1172,7 +1182,37 @@ dict_create_index_step(
 
 		err = dict_create_index_tree_step(node);
 
+		DBUG_EXECUTE_IF("ib_dict_create_index_tree_fail",
+				err = DB_OUT_OF_MEMORY;);
+
 		if (err != DB_SUCCESS) {
+			/* If this is a FTS index, we will need to remove
+			it from fts->cache->indexes list as well */
+			if ((node->index->type & DICT_FTS)
+			    && node->table->fts) {
+				fts_index_cache_t*	index_cache;
+
+				rw_lock_x_lock(
+					&node->table->fts->cache->init_lock);
+
+				index_cache = (fts_index_cache_t*)
+					 fts_find_index_cache(
+						node->table->fts->cache,
+						node->index);
+
+				if (index_cache->words) {
+					rbt_free(index_cache->words);
+					index_cache->words = 0;
+				}
+
+				ib_vector_remove(
+					node->table->fts->cache->indexes,
+					*reinterpret_cast<void**>(index_cache));
+
+				rw_lock_x_unlock(
+					&node->table->fts->cache->init_lock);
+			}
+
 			dict_index_remove_from_cache(node->table, node->index);
 			node->index = NULL;
 
@@ -1180,6 +1220,11 @@ dict_create_index_step(
 		}
 
 		node->index->page = node->page_no;
+		/* These should have been set in
+		dict_build_index_def_step() and
+		dict_index_add_to_cache(). */
+		ut_ad(node->index->trx_id == trx->id);
+		ut_ad(node->index->table->def_trx_id == trx->id);
 		node->state = INDEX_COMMIT_WORK;
 	}
 
@@ -1197,7 +1242,7 @@ dict_create_index_step(
 	}
 
 function_exit:
-	trx->error_state = static_cast<enum db_err>(err);
+	trx->error_state = err;
 
 	if (err == DB_SUCCESS) {
 		/* Ok: do nothing */
@@ -1217,93 +1262,107 @@ function_exit:
 }
 
 /****************************************************************//**
-Check whether the system foreign key tables exist. Additionally, If
-they exist then move them to non-LRU end of the table LRU list.
-@return TRUE if they exist. */
+Check whether a system table exists.  Additionally, if it exists,
+move it to the non-LRU end of the table LRU list.  This is oly used
+for system tables that can be upgraded or added to an older database,
+which include SYS_FOREIGN, SYS_FOREIGN_COLS, SYS_TABLESPACES and
+SYS_DATAFILES.
+@return DB_SUCCESS if the sys table exists, DB_CORRUPTION if it exists
+but is not current, DB_TABLE_NOT_FOUND if it does not exist*/
 static
-ibool
-dict_check_sys_foreign_tables_exist(void)
-/*=====================================*/
+dberr_t
+dict_check_if_system_table_exists(
+/*==============================*/
+	const char*	tablename,	/*!< in: name of table */
+	ulint		num_fields,	/*!< in: number of fields */
+	ulint		num_indexes)	/*!< in: number of indexes */
 {
-	dict_table_t*	sys_foreign;
-	ibool		exists = FALSE;
-	dict_table_t*	sys_foreign_cols;
+	dict_table_t*	sys_table;
+	dberr_t		error = DB_SUCCESS;
 
 	ut_a(srv_get_active_thread_type() == SRV_NONE);
 
 	mutex_enter(&dict_sys->mutex);
 
-	sys_foreign = dict_table_get_low("SYS_FOREIGN");
-	sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS");
+	sys_table = dict_table_get_low(tablename);
 
-	if (sys_foreign != NULL
-	    && sys_foreign_cols != NULL
-	    && UT_LIST_GET_LEN(sys_foreign->indexes) == 3
-	    && UT_LIST_GET_LEN(sys_foreign_cols->indexes) == 1) {
+	if (sys_table == NULL) {
+		error = DB_TABLE_NOT_FOUND;
 
-		/* Foreign constraint system tables have already been
-		created, and they are ok. Ensure that they can't be
-		evicted from the table LRU cache.  */
+	} else if (UT_LIST_GET_LEN(sys_table->indexes) != num_indexes
+		   || sys_table->n_cols != num_fields) {
+		error = DB_CORRUPTION;
 
-		dict_table_move_from_lru_to_non_lru(sys_foreign);
-		dict_table_move_from_lru_to_non_lru(sys_foreign_cols);
+	} else {
+		/* This table has already been created, and it is OK.
+		Ensure that it can't be evicted from the table LRU cache. */
 
-		exists = TRUE;
+		dict_table_move_from_lru_to_non_lru(sys_table);
 	}
 
 	mutex_exit(&dict_sys->mutex);
 
-	return(exists);
+	return(error);
 }
 
 /****************************************************************//**
 Creates the foreign key constraints system tables inside InnoDB
-at database creation or database start if they are not found or are
+at server bootstrap or server start if they are not found or are
 not of the right form.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 dict_create_or_check_foreign_constraint_tables(void)
 /*================================================*/
 {
 	trx_t*		trx;
-	ulint		error;
-	ibool		success;
-	ibool		srv_file_per_table_backup;
+	my_bool		srv_file_per_table_backup;
+	dberr_t		err;
+	dberr_t		sys_foreign_err;
+	dberr_t		sys_foreign_cols_err;
 
 	ut_a(srv_get_active_thread_type() == SRV_NONE);
 
 	/* Note: The master thread has not been started at this point. */
 
-	if (dict_check_sys_foreign_tables_exist()) {
+
+	sys_foreign_err = dict_check_if_system_table_exists(
+		"SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3);
+	sys_foreign_cols_err = dict_check_if_system_table_exists(
+		"SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1);
+
+	if (sys_foreign_err == DB_SUCCESS
+	    && sys_foreign_cols_err == DB_SUCCESS) {
 		return(DB_SUCCESS);
 	}
 
 	trx = trx_allocate_for_mysql();
 
+	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
 	trx->op_info = "creating foreign key sys tables";
 
 	row_mysql_lock_data_dictionary(trx);
 
 	/* Check which incomplete table definition to drop. */
 
-	if (dict_table_get_low("SYS_FOREIGN") != NULL) {
-		fprintf(stderr,
-			"InnoDB: dropping incompletely created"
-			" SYS_FOREIGN table\n");
+	if (sys_foreign_err == DB_CORRUPTION) {
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Dropping incompletely created "
+			"SYS_FOREIGN table.");
 		row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE);
 	}
 
-	if (dict_table_get_low("SYS_FOREIGN_COLS") != NULL) {
-		fprintf(stderr,
-			"InnoDB: dropping incompletely created"
-			" SYS_FOREIGN_COLS table\n");
+	if (sys_foreign_cols_err == DB_CORRUPTION) {
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Dropping incompletely created "
+			"SYS_FOREIGN_COLS table.");
 
 		row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE);
 	}
 
-	fprintf(stderr,
-		"InnoDB: Creating foreign key constraint system tables\n");
+	ib_logf(IB_LOG_LEVEL_WARN,
+		"Creating foreign key constraint system tables.");
 
 	/* NOTE: in dict_load_foreigns we use the fact that
 	there are 2 secondary indexes on SYS_FOREIGN, and they
@@ -1315,50 +1374,50 @@ dict_create_or_check_foreign_constraint_tables(void)
 	VARBINARY, like in other InnoDB system tables, to get a clean
 	design. */
 
-	srv_file_per_table_backup = (ibool) srv_file_per_table;
+	srv_file_per_table_backup = srv_file_per_table;
 
 	/* We always want SYSTEM tables to be created inside the system
 	tablespace. */
 
 	srv_file_per_table = 0;
 
-	error = que_eval_sql(NULL,
-			     "PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n"
-			     "BEGIN\n"
-			     "CREATE TABLE\n"
-			     "SYS_FOREIGN(ID CHAR, FOR_NAME CHAR,"
-			     " REF_NAME CHAR, N_COLS INT);\n"
-			     "CREATE UNIQUE CLUSTERED INDEX ID_IND"
-			     " ON SYS_FOREIGN (ID);\n"
-			     "CREATE INDEX FOR_IND"
-			     " ON SYS_FOREIGN (FOR_NAME);\n"
-			     "CREATE INDEX REF_IND"
-			     " ON SYS_FOREIGN (REF_NAME);\n"
-			     "CREATE TABLE\n"
-			     "SYS_FOREIGN_COLS(ID CHAR, POS INT,"
-			     " FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n"
-			     "CREATE UNIQUE CLUSTERED INDEX ID_IND"
-			     " ON SYS_FOREIGN_COLS (ID, POS);\n"
-			     "END;\n"
-			     , FALSE, trx);
-
-	if (error != DB_SUCCESS) {
-		fprintf(stderr, "InnoDB: error %lu in creation\n",
-			(ulong) error);
-
-		ut_a(error == DB_OUT_OF_FILE_SPACE
-		     || error == DB_TOO_MANY_CONCURRENT_TRXS);
-
-		fprintf(stderr,
-			"InnoDB: creation failed\n"
-			"InnoDB: tablespace is full\n"
-			"InnoDB: dropping incompletely created"
-			" SYS_FOREIGN tables\n");
+	err = que_eval_sql(
+		NULL,
+		"PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n"
+		"BEGIN\n"
+		"CREATE TABLE\n"
+		"SYS_FOREIGN(ID CHAR, FOR_NAME CHAR,"
+		" REF_NAME CHAR, N_COLS INT);\n"
+		"CREATE UNIQUE CLUSTERED INDEX ID_IND"
+		" ON SYS_FOREIGN (ID);\n"
+		"CREATE INDEX FOR_IND"
+		" ON SYS_FOREIGN (FOR_NAME);\n"
+		"CREATE INDEX REF_IND"
+		" ON SYS_FOREIGN (REF_NAME);\n"
+		"CREATE TABLE\n"
+		"SYS_FOREIGN_COLS(ID CHAR, POS INT,"
+		" FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n"
+		"CREATE UNIQUE CLUSTERED INDEX ID_IND"
+		" ON SYS_FOREIGN_COLS (ID, POS);\n"
+		"END;\n",
+		FALSE, trx);
+
+	if (err != DB_SUCCESS) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Creation of SYS_FOREIGN and SYS_FOREIGN_COLS "
+			"has failed with error %lu.  Tablespace is full. "
+			"Dropping incompletely created tables.",
+			(ulong) err);
+
+		ut_ad(err == DB_OUT_OF_FILE_SPACE
+		      || err == DB_TOO_MANY_CONCURRENT_TRXS);
 
 		row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE);
 		row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE);
 
-		error = DB_MUST_GET_MORE_FILE_SPACE;
+		if (err == DB_OUT_OF_FILE_SPACE) {
+			err = DB_MUST_GET_MORE_FILE_SPACE;
+		}
 	}
 
 	trx_commit_for_mysql(trx);
@@ -1367,28 +1426,31 @@ dict_create_or_check_foreign_constraint_tables(void)
 
 	trx_free_for_mysql(trx);
 
-	if (error == DB_SUCCESS) {
-		fprintf(stderr,
-			"InnoDB: Foreign key constraint system tables"
-			" created\n");
+	srv_file_per_table = srv_file_per_table_backup;
+
+	if (err == DB_SUCCESS) {
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"Foreign key constraint system tables created");
 	}
 
 	/* Note: The master thread has not been started at this point. */
 	/* Confirm and move to the non-LRU part of the table LRU list. */
+	sys_foreign_err = dict_check_if_system_table_exists(
+		"SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3);
+	ut_a(sys_foreign_err == DB_SUCCESS);
 
-	success = dict_check_sys_foreign_tables_exist();
-	ut_a(success);
-
-	srv_file_per_table = (my_bool) srv_file_per_table_backup;
+	sys_foreign_cols_err = dict_check_if_system_table_exists(
+		"SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1);
+	ut_a(sys_foreign_cols_err == DB_SUCCESS);
 
-	return(error);
+	return(err);
 }
 
 /****************************************************************//**
 Evaluate the given foreign key SQL statement.
 @return	error code or DB_SUCCESS */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 dict_foreign_eval_sql(
 /*==================*/
 	pars_info_t*	info,	/*!< in: info struct, or NULL */
@@ -1397,8 +1459,8 @@ dict_foreign_eval_sql(
 	dict_foreign_t*	foreign,/*!< in: foreign */
 	trx_t*		trx)	/*!< in: transaction */
 {
-	ulint		error;
-	FILE*		ef	= dict_foreign_err_file;
+	dberr_t	error;
+	FILE*	ef	= dict_foreign_err_file;
 
 	error = que_eval_sql(info, sql, FALSE, trx);
 
@@ -1453,8 +1515,8 @@ dict_foreign_eval_sql(
 Add a single foreign key field definition to the data dictionary tables in
 the database.
 @return	error code or DB_SUCCESS */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 dict_create_add_foreign_field_to_dictionary(
 /*========================================*/
 	ulint		field_nr,	/*!< in: foreign field number */
@@ -1492,17 +1554,17 @@ databasename/tablename_ibfk_NUMBER, where the numbers start from 1, and
 are given locally for this table, that is, the number is not global, as in
 the old format constraints < 4.0.18 it used to be.
 @return	error code or DB_SUCCESS */
-static
-ulint
+UNIV_INTERN
+dberr_t
 dict_create_add_foreign_to_dictionary(
 /*==================================*/
 	ulint*		id_nr,	/*!< in/out: number to use in id generation;
 				incremented if used */
 	dict_table_t*	table,	/*!< in: table */
 	dict_foreign_t*	foreign,/*!< in: foreign */
-	trx_t*		trx)	/*!< in: transaction */
+	trx_t*		trx)	/*!< in/out: dictionary transaction */
 {
-	ulint		error;
+	dberr_t		error;
 	ulint		i;
 
 	pars_info_t*	info = pars_info_create();
@@ -1553,12 +1615,6 @@ dict_create_add_foreign_to_dictionary(
 		}
 	}
 
-	trx->op_info = "committing foreign key definitions";
-
-	trx_commit(trx);
-
-	trx->op_info = "";
-
 	return(error);
 }
 
@@ -1566,7 +1622,7 @@ dict_create_add_foreign_to_dictionary(
 Adds foreign key definitions to data dictionary tables in the database.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 dict_create_add_foreigns_to_dictionary(
 /*===================================*/
 	ulint		start_id,/*!< in: if we are actually doing ALTER TABLE
@@ -1582,7 +1638,7 @@ dict_create_add_foreigns_to_dictionary(
 {
 	dict_foreign_t*	foreign;
 	ulint		number	= start_id + 1;
-	ulint		error;
+	dberr_t		error;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
@@ -1607,5 +1663,188 @@ dict_create_add_foreigns_to_dictionary(
 		}
 	}
 
+	trx->op_info = "committing foreign key definitions";
+
+	trx_commit(trx);
+
+	trx->op_info = "";
+
 	return(DB_SUCCESS);
 }
+
+/****************************************************************//**
+Creates the tablespaces and datafiles system tables inside InnoDB
+at server bootstrap or server start if they are not found or are
+not of the right form.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_create_or_check_sys_tablespace(void)
+/*=====================================*/
+{
+	trx_t*		trx;
+	my_bool		srv_file_per_table_backup;
+	dberr_t		err;
+	dberr_t		sys_tablespaces_err;
+	dberr_t		sys_datafiles_err;
+
+	ut_a(srv_get_active_thread_type() == SRV_NONE);
+
+	/* Note: The master thread has not been started at this point. */
+
+	sys_tablespaces_err = dict_check_if_system_table_exists(
+		"SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1);
+	sys_datafiles_err = dict_check_if_system_table_exists(
+		"SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1);
+
+	if (sys_tablespaces_err == DB_SUCCESS
+	    && sys_datafiles_err == DB_SUCCESS) {
+		return(DB_SUCCESS);
+	}
+
+	trx = trx_allocate_for_mysql();
+
+	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+	trx->op_info = "creating tablepace and datafile sys tables";
+
+	row_mysql_lock_data_dictionary(trx);
+
+	/* Check which incomplete table definition to drop. */
+
+	if (sys_tablespaces_err == DB_CORRUPTION) {
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Dropping incompletely created "
+			"SYS_TABLESPACES table.");
+		row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE);
+	}
+
+	if (sys_datafiles_err == DB_CORRUPTION) {
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Dropping incompletely created "
+			"SYS_DATAFILES table.");
+
+		row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE);
+	}
+
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Creating tablespace and datafile system tables.");
+
+	/* We always want SYSTEM tables to be created inside the system
+	tablespace. */
+	srv_file_per_table_backup = srv_file_per_table;
+	srv_file_per_table = 0;
+
+	err = que_eval_sql(
+		NULL,
+		"PROCEDURE CREATE_SYS_TABLESPACE_PROC () IS\n"
+		"BEGIN\n"
+		"CREATE TABLE SYS_TABLESPACES(\n"
+		" SPACE INT, NAME CHAR, FLAGS INT);\n"
+		"CREATE UNIQUE CLUSTERED INDEX SYS_TABLESPACES_SPACE"
+		" ON SYS_TABLESPACES (SPACE);\n"
+		"CREATE TABLE SYS_DATAFILES(\n"
+		" SPACE INT, PATH CHAR);\n"
+		"CREATE UNIQUE CLUSTERED INDEX SYS_DATAFILES_SPACE"
+		" ON SYS_DATAFILES (SPACE);\n"
+		"END;\n",
+		FALSE, trx);
+
+	if (err != DB_SUCCESS) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Creation of SYS_TABLESPACES and SYS_DATAFILES "
+			"has failed with error %lu.  Tablespace is full. "
+			"Dropping incompletely created tables.",
+			(ulong) err);
+
+		ut_a(err == DB_OUT_OF_FILE_SPACE
+		     || err == DB_TOO_MANY_CONCURRENT_TRXS);
+
+		row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE);
+		row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE);
+
+		if (err == DB_OUT_OF_FILE_SPACE) {
+			err = DB_MUST_GET_MORE_FILE_SPACE;
+		}
+	}
+
+	trx_commit_for_mysql(trx);
+
+	row_mysql_unlock_data_dictionary(trx);
+
+	trx_free_for_mysql(trx);
+
+	srv_file_per_table = srv_file_per_table_backup;
+
+	if (err == DB_SUCCESS) {
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"Tablespace and datafile system tables created.");
+	}
+
+	/* Note: The master thread has not been started at this point. */
+	/* Confirm and move to the non-LRU part of the table LRU list. */
+
+	sys_tablespaces_err = dict_check_if_system_table_exists(
+		"SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1);
+	ut_a(sys_tablespaces_err == DB_SUCCESS);
+
+	sys_datafiles_err = dict_check_if_system_table_exists(
+		"SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1);
+	ut_a(sys_datafiles_err == DB_SUCCESS);
+
+	return(err);
+}
+
+/********************************************************************//**
+Add a single tablespace definition to the data dictionary tables in the
+database.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_tablespace_to_dictionary(
+/*=====================================*/
+	ulint		space,		/*!< in: tablespace id */
+	const char*	name,		/*!< in: tablespace name */
+	ulint		flags,		/*!< in: tablespace flags */
+	const char*	path,		/*!< in: tablespace path */
+	trx_t*		trx,		/*!< in/out: transaction */
+	bool		commit)		/*!< in: if true then commit the
+					transaction */
+{
+	dberr_t		error;
+
+	pars_info_t*	info = pars_info_create();
+
+	ut_a(space > TRX_SYS_SPACE);
+
+	pars_info_add_int4_literal(info, "space", space);
+
+	pars_info_add_str_literal(info, "name", name);
+
+	pars_info_add_int4_literal(info, "flags", flags);
+
+	pars_info_add_str_literal(info, "path", path);
+
+	error = que_eval_sql(info,
+			     "PROCEDURE P () IS\n"
+			     "BEGIN\n"
+			     "INSERT INTO SYS_TABLESPACES VALUES"
+			     "(:space, :name, :flags);\n"
+			     "INSERT INTO SYS_DATAFILES VALUES"
+			     "(:space, :path);\n"
+			     "END;\n",
+			     FALSE, trx);
+
+	if (error != DB_SUCCESS) {
+		return(error);
+	}
+
+	if (commit) {
+		trx->op_info = "committing tablespace and datafile definition";
+		trx_commit(trx);
+	}
+
+	trx->op_info = "";
+
+	return(error);
+}
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index 8282dafda0c..8e111645880 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,6 +26,7 @@ Created 1/8/1996 Heikki Tuuri
 
 #include "dict0dict.h"
 #include "fts0fts.h"
+#include "fil0fil.h"
 
 #ifdef UNIV_NONINL
 #include "dict0dict.ic"
@@ -56,7 +58,6 @@ UNIV_INTERN dict_index_t*	dict_ind_compact;
 #include "rem0cmp.h"
 #include "fts0fts.h"
 #include "fts0types.h"
-#include "row0merge.h"
 #include "m_ctype.h" /* my_isspace() */
 #include "ha_prototypes.h" /* innobase_strcasecmp(), innobase_casedn_str() */
 #include "srv0mon.h"
@@ -64,6 +65,14 @@ UNIV_INTERN dict_index_t*	dict_ind_compact;
 #include "lock0lock.h"
 #include "dict0priv.h"
 #include "row0upd.h"
+#include "row0mysql.h"
+#include "row0merge.h"
+#include "row0log.h"
+#include "ut0ut.h" /* ut_format_name() */
+#include "m_string.h"
+#include "my_sys.h"
+#include "mysqld.h" /* system_charset_info */
+#include "strfunc.h" /* strconvert() */
 
 #include <ctype.h>
 
@@ -77,17 +86,27 @@ backround operations purge, rollback, foreign key checks reserve this
 in S-mode; we cannot trust that MySQL protects implicit or background
 operations a table drop since MySQL does not know of them; therefore
 we need this; NOTE: a transaction which reserves this must keep book
-on the mode in trx_struct::dict_operation_lock_mode */
+on the mode in trx_t::dict_operation_lock_mode */
 UNIV_INTERN rw_lock_t	dict_operation_lock;
 
+/** Percentage of compression failures that are allowed in a single
+round */
+UNIV_INTERN ulong	zip_failure_threshold_pct = 5;
+
+/** Maximum percentage of a page that can be allowed as a pad to avoid
+compression failures */
+UNIV_INTERN ulong	zip_pad_max = 50;
+
 /* Keys to register rwlocks and mutexes with performance schema */
 #ifdef UNIV_PFS_RWLOCK
 UNIV_INTERN mysql_pfs_key_t	dict_operation_lock_key;
 UNIV_INTERN mysql_pfs_key_t	index_tree_rw_lock_key;
+UNIV_INTERN mysql_pfs_key_t	index_online_log_key;
 UNIV_INTERN mysql_pfs_key_t	dict_table_stats_latch_key;
 #endif /* UNIV_PFS_RWLOCK */
 
 #ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t	zip_pad_mutex_key;
 UNIV_INTERN mysql_pfs_key_t	dict_sys_mutex_key;
 UNIV_INTERN mysql_pfs_key_t	dict_foreign_err_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
@@ -157,13 +176,6 @@ dict_index_build_internal_fts(
 	dict_table_t*	table,	/*!< in: table */
 	dict_index_t*	index);	/*!< in: user representation of an FTS index */
 /**********************************************************************//**
-Removes a foreign constraint struct from the dictionary cache. */
-static
-void
-dict_foreign_remove_from_cache(
-/*===========================*/
-	dict_foreign_t*	foreign);	/*!< in, own: foreign constraint */
-/**********************************************************************//**
 Prints a column data. */
 static
 void
@@ -185,14 +197,6 @@ void
 dict_field_print_low(
 /*=================*/
 	const dict_field_t*	field);	/*!< in: field */
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Frees a foreign key struct. */
-static
-void
-dict_foreign_free(
-/*==============*/
-	dict_foreign_t*	foreign);	/*!< in, own: foreign key struct */
 
 /**********************************************************************//**
 Removes an index from the dictionary cache. */
@@ -216,14 +220,14 @@ dict_table_remove_from_cache_low(
 /**********************************************************************//**
 Validate the dictionary table LRU list.
 @return TRUE if validate OK */
-UNIV_INTERN
+static
 ibool
 dict_lru_validate(void);
 /*===================*/
 /**********************************************************************//**
 Check if table is in the dictionary table LRU list.
 @return TRUE if table found */
-UNIV_INTERN
+static
 ibool
 dict_lru_find_table(
 /*================*/
@@ -239,11 +243,11 @@ dict_non_lru_find_table(
 #endif /* UNIV_DEBUG */
 
 /* Stream for storing detailed information about the latest foreign key
-and unique key errors */
+and unique key errors. Only created if !srv_read_only_mode */
 UNIV_INTERN FILE*	dict_foreign_err_file		= NULL;
 /* mutex protecting the foreign and unique error buffers */
-UNIV_INTERN mutex_t	dict_foreign_err_mutex;
-#endif /* !UNIV_HOTBACKUP */
+UNIV_INTERN ib_mutex_t	dict_foreign_err_mutex;
+
 /******************************************************************//**
 Makes all characters in a NUL-terminated UTF-8 string lower case. */
 UNIV_INTERN
@@ -330,7 +334,7 @@ dict_mutex_exit_for_mysql(void)
 
 /** Get the latch that protects the stats of a given table */
 #define GET_TABLE_STATS_LATCH(table) \
-	(&dict_table_stats_latches[ut_fold_ull(table->id) \
+	(&dict_table_stats_latches[ut_fold_ull((ib_uint64_t) table) \
 				   % DICT_TABLE_STATS_LATCHES_SIZE])
 
 /**********************************************************************//**
@@ -389,6 +393,75 @@ dict_table_stats_unlock(
 	}
 }
 
+/**********************************************************************//**
+Try to drop any indexes after an aborted index creation.
+This can also be after a server kill during DROP INDEX. */
+static
+void
+dict_table_try_drop_aborted(
+/*========================*/
+	dict_table_t*	table,		/*!< in: table, or NULL if it
+					needs to be looked up again */
+	table_id_t	table_id,	/*!< in: table identifier */
+	ulint		ref_count)	/*!< in: expected table->n_ref_count */
+{
+	trx_t*		trx;
+
+	trx = trx_allocate_for_background();
+	trx->op_info = "try to drop any indexes after an aborted index creation";
+	row_mysql_lock_data_dictionary(trx);
+	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+
+	if (table == NULL) {
+		table = dict_table_open_on_id_low(table_id);
+	} else {
+		ut_ad(table->id == table_id);
+	}
+
+	if (table && table->n_ref_count == ref_count && table->drop_aborted) {
+		/* Silence a debug assertion in row_merge_drop_indexes(). */
+		ut_d(table->n_ref_count++);
+		row_merge_drop_indexes(trx, table, TRUE);
+		ut_d(table->n_ref_count--);
+		ut_ad(table->n_ref_count == ref_count);
+		trx_commit_for_mysql(trx);
+	}
+
+	row_mysql_unlock_data_dictionary(trx);
+	trx_free_for_background(trx);
+}
+
+/**********************************************************************//**
+When opening a table,
+try to drop any indexes after an aborted index creation.
+Release the dict_sys->mutex. */
+static
+void
+dict_table_try_drop_aborted_and_mutex_exit(
+/*=======================================*/
+	dict_table_t*	table,		/*!< in: table (may be NULL) */
+	ibool		try_drop)	/*!< in: FALSE if should try to
+					drop indexes whose online creation
+					was aborted */
+{
+	if (try_drop
+	    && table != NULL
+	    && table->drop_aborted
+	    && table->n_ref_count == 1
+	    && dict_table_get_first_index(table)) {
+
+		/* Attempt to drop the indexes whose online creation
+		was aborted. */
+		table_id_t	table_id = table->id;
+
+		mutex_exit(&dict_sys->mutex);
+
+		dict_table_try_drop_aborted(table, table_id, 1);
+	} else {
+		mutex_exit(&dict_sys->mutex);
+	}
+}
+
 /********************************************************************//**
 Decrements the count of open handles to a table. */
 UNIV_INTERN
@@ -396,7 +469,10 @@ void
 dict_table_close(
 /*=============*/
 	dict_table_t*	table,		/*!< in/out: table */
-	ibool		dict_locked)	/*!< in: TRUE=data dictionary locked */
+	ibool		dict_locked,	/*!< in: TRUE=data dictionary locked */
+	ibool		try_drop)	/*!< in: TRUE=try to drop any orphan
+					indexes after an aborted online
+					index creation */
 {
 	if (!dict_locked) {
 		mutex_enter(&dict_sys->mutex);
@@ -407,6 +483,18 @@ dict_table_close(
 
 	--table->n_ref_count;
 
+	/* Force persistent stats re-read upon next open of the table
+	so that FLUSH TABLE can be used to forcibly fetch stats from disk
+	if they have been manually modified. We reset table->stat_initialized
+	only if table reference count is 0 because we do not want too frequent
+	stats re-reads (e.g. in other cases than FLUSH TABLE). */
+	if (strchr(table->name, '/') != NULL
+	    && table->n_ref_count == 0
+	    && dict_stats_is_persistent_enabled(table)) {
+
+		dict_stats_deinit(table);
+	}
+
 	MONITOR_DEC(MONITOR_TABLE_REFERENCE);
 
 	ut_ad(dict_lru_validate());
@@ -420,7 +508,19 @@ dict_table_close(
 #endif /* UNIV_DEBUG */
 
 	if (!dict_locked) {
+		table_id_t	table_id	= table->id;
+		ibool		drop_aborted;
+
+		drop_aborted = try_drop
+			&& table->drop_aborted
+			&& table->n_ref_count == 1
+			&& dict_table_get_first_index(table);
+
 		mutex_exit(&dict_sys->mutex);
+
+		if (drop_aborted) {
+			dict_table_try_drop_aborted(NULL, table_id, 0);
+		}
 	}
 }
 #endif /* !UNIV_HOTBACKUP */
@@ -550,33 +650,6 @@ dict_table_autoinc_unlock(
 {
 	mutex_exit(&table->autoinc_mutex);
 }
-
-/**********************************************************************//**
-Looks for an index with the given table and index id.
-Note: Does not reserve the dictionary mutex.
-@return	index or NULL if not found in cache */
-UNIV_INTERN
-dict_index_t*
-dict_index_get_on_id_low(
-/*=====================*/
-	dict_table_t*	table,	/*!< in: table */
-	index_id_t	id)	/*!< in: index id */
-{
-	dict_index_t*	index;
-
-	for (index = dict_table_get_first_index(table);
-	     index != NULL;
-	     index = dict_table_get_next_index(index)) {
-
-		if (id == index->id) {
-			/* Found */
-
-			return(index);
-		}
-	}
-
-	return(NULL);
-}
 #endif /* !UNIV_HOTBACKUP */
 
 /********************************************************************//**
@@ -712,7 +785,10 @@ dict_table_t*
 dict_table_open_on_id(
 /*==================*/
 	table_id_t	table_id,	/*!< in: table id */
-	ibool		dict_locked)	/*!< in: TRUE=data dictionary locked */
+	ibool		dict_locked,	/*!< in: TRUE=data dictionary locked */
+	ibool		try_drop)	/*!< in: TRUE=try to drop any orphan
+					indexes after an aborted online
+					index creation */
 {
 	dict_table_t*	table;
 
@@ -736,7 +812,7 @@ dict_table_open_on_id(
 	}
 
 	if (!dict_locked) {
-		mutex_exit(&dict_sys->mutex);
+		dict_table_try_drop_aborted_and_mutex_exit(table, try_drop);
 	}
 
 	return(table);
@@ -815,11 +891,13 @@ dict_init(void)
 	rw_lock_create(dict_operation_lock_key,
 		       &dict_operation_lock, SYNC_DICT_OPERATION);
 
-	dict_foreign_err_file = os_file_create_tmpfile();
-	ut_a(dict_foreign_err_file);
+	if (!srv_read_only_mode) {
+		dict_foreign_err_file = os_file_create_tmpfile();
+		ut_a(dict_foreign_err_file);
 
-	mutex_create(dict_foreign_err_mutex_key,
-		     &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK);
+		mutex_create(dict_foreign_err_mutex_key,
+			     &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK);
+	}
 
 	for (i = 0; i < DICT_TABLE_STATS_LATCHES_SIZE; i++) {
 		rw_lock_create(dict_table_stats_latch_key,
@@ -849,14 +927,20 @@ dict_move_to_mru(
 }
 
 /**********************************************************************//**
-Returns a table object and increments its open handle count.
+Returns a table object and increment its open handle count.
+NOTE! This is a high-level function to be used mainly from outside the
+'dict' module. Inside this directory dict_table_get_low
+is usually the appropriate function.
 @return	table, NULL if does not exist */
-static
+UNIV_INTERN
 dict_table_t*
-dict_table_open_on_name_low(
-/*========================*/
+dict_table_open_on_name(
+/*====================*/
 	const char*	table_name,	/*!< in: table name */
 	ibool		dict_locked,	/*!< in: TRUE=data dictionary locked */
+	ibool		try_drop,	/*!< in: TRUE=try to drop any orphan
+					indexes after an aborted online
+					index creation */
 	dict_err_ignore_t
 			ignore_err)	/*!< in: error to be ignored when
 					loading a table definition */
@@ -915,61 +999,11 @@ dict_table_open_on_name_low(
 	ut_ad(dict_lru_validate());
 
 	if (!dict_locked) {
-		mutex_exit(&(dict_sys->mutex));
+		dict_table_try_drop_aborted_and_mutex_exit(table, try_drop);
 	}
 
 	return(table);
 }
-
-/**********************************************************************//**
-Returns a table object and increment its open handle count.
-NOTE! This is a high-level function to be used mainly from outside the
-'dict' directory. Inside this directory dict_table_get_low
-is usually the appropriate function.
-@return	table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_name(
-/*====================*/
-	const char*	table_name,	/*!< in: table name */
-	ibool		dict_locked)	/*!< in: TRUE=data dictionary locked */
-{
-	dict_table_t*	table;
-
-	table = dict_table_open_on_name_low(table_name, dict_locked,
-					    DICT_ERR_IGNORE_NONE);
-
-	if (table != NULL) {
-		/* If table->ibd_file_missing == TRUE, this will
-		print an error message and return without doing
-		anything. */
-		dict_stats_update(table,
-				  DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY,
-				  dict_locked);
-	}
-
-	return(table);
-}
-
-/**********************************************************************//**
-Returns a table object and increment its open handle count. Table
-statistics will not be updated if they are not initialized.
-Call this function when dropping a table.
-@return	table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_name_no_stats(
-/*=============================*/
-	const char*	table_name,	/*!< in: table name */
-	ibool		dict_locked,	/*!< in: TRUE=data dictionary locked */
-	dict_err_ignore_t
-			ignore_err)	/*!< in: error to be ignored during
-					table open */
-{
-	return(dict_table_open_on_name_low(table_name, dict_locked,
-					   ignore_err));
-}
-
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
@@ -1156,7 +1190,7 @@ dict_table_can_be_evicted(
 		     index != NULL;
 		     index = dict_table_get_next_index(index)) {
 
-			btr_search_t*	info = index->search_info;
+			btr_search_t*	info = btr_search_get_info(index);
 
 			/* We are not allowed to free the in-memory index
 			struct dict_index_t until all entries in the adaptive
@@ -1358,7 +1392,7 @@ dict_index_find_on_id_low(
 Renames a table object.
 @return	TRUE if success */
 UNIV_INTERN
-ibool
+dberr_t
 dict_table_rename_in_cache(
 /*=======================*/
 	dict_table_t*	table,		/*!< in/out: table */
@@ -1372,7 +1406,6 @@ dict_table_rename_in_cache(
 	ulint		fold;
 	char		old_name[MAX_FULL_NAME_LEN + 1];
 
-	ut_ad(table);
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
 	/* store the old/current name to an automatic variable */
@@ -1389,28 +1422,59 @@ dict_table_rename_in_cache(
 	fold = ut_fold_string(new_name);
 
 	/* Look for a table with the same name: error if such exists */
-	{
-		dict_table_t*	table2;
-		HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
-			    dict_table_t*, table2, ut_ad(table2->cached),
-			    (ut_strcmp(table2->name, new_name) == 0));
-		if (UNIV_LIKELY_NULL(table2)) {
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: Error: dictionary cache"
-			      " already contains a table ", stderr);
-			ut_print_name(stderr, NULL, TRUE, new_name);
-			fputs("\n"
-			      "InnoDB: cannot rename table ", stderr);
-			ut_print_name(stderr, NULL, TRUE, old_name);
-			putc('\n', stderr);
-			return(FALSE);
-		}
+	dict_table_t*	table2;
+	HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
+			dict_table_t*, table2, ut_ad(table2->cached),
+			(ut_strcmp(table2->name, new_name) == 0));
+	DBUG_EXECUTE_IF("dict_table_rename_in_cache_failure",
+		if (table2 == NULL) {
+			table2 = (dict_table_t*) -1;
+		} );
+	if (table2) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Cannot rename table '%s' to '%s' since the "
+			"dictionary cache already contains '%s'.",
+			old_name, new_name, new_name);
+		return(DB_ERROR);
 	}
 
 	/* If the table is stored in a single-table tablespace, rename the
-	.ibd file */
+	.ibd file and rebuild the .isl file if needed. */
+
+	if (dict_table_is_discarded(table)) {
+		os_file_type_t	type;
+		ibool		exists;
+		char*		filepath;
+
+		ut_ad(table->space != TRX_SYS_SPACE);
+
+		if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+
+			dict_get_and_save_data_dir_path(table, true);
+			ut_a(table->data_dir_path);
+
+			filepath = os_file_make_remote_pathname(
+				table->data_dir_path, table->name, "ibd");
+		} else {
+			filepath = fil_make_ibd_name(table->name, false);
+		}
+
+		fil_delete_tablespace(table->space, BUF_REMOVE_FLUSH_NO_WRITE);
+
+		/* Delete any temp file hanging around. */
+		if (os_file_status(filepath, &exists, &type)
+		    && exists
+		    && !os_file_delete_if_exists(filepath)) {
+
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Delete of %s failed.", filepath);
+		}
+
+		mem_free(filepath);
+
+	} else if (table->space != TRX_SYS_SPACE) {
+		char*	new_path = NULL;
 
-	if (table->space != 0) {
 		if (table->dir_path_of_temp_table != NULL) {
 			ut_print_timestamp(stderr);
 			fputs("  InnoDB: Error: trying to rename a"
@@ -1420,10 +1484,40 @@ dict_table_rename_in_cache(
 			ut_print_filename(stderr,
 					  table->dir_path_of_temp_table);
 			fputs(" )\n", stderr);
-			return(FALSE);
-		} else if (!fil_rename_tablespace(old_name, table->space,
-						  new_name)) {
-			return(FALSE);
+			return(DB_ERROR);
+
+		} else if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+			char*		old_path;
+
+			old_path = fil_space_get_first_path(table->space);
+
+			new_path = os_file_make_new_pathname(
+				old_path, new_name);
+
+			mem_free(old_path);
+
+			dberr_t	err = fil_create_link_file(
+				new_name, new_path);
+
+			if (err != DB_SUCCESS) {
+				mem_free(new_path);
+				return(DB_TABLESPACE_EXISTS);
+			}
+		}
+
+		ibool	success = fil_rename_tablespace(
+			old_name, table->space, new_name, new_path);
+
+		/* If the tablespace is remote, a new .isl file was created
+		If success, delete the old one. If not, delete the new one.  */
+		if (new_path) {
+
+			mem_free(new_path);
+			fil_delete_link_file(success ? old_name : new_name);
+		}
+
+		if (!success) {
+			return(DB_ERROR);
 		}
 	}
 
@@ -1450,12 +1544,11 @@ dict_table_rename_in_cache(
 	ut_a(dict_sys->size > 0);
 
 	/* Update the table_name field in indexes */
-	index = dict_table_get_first_index(table);
+	for (index = dict_table_get_first_index(table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
 
-	while (index != NULL) {
 		index->table_name = table->name;
-
-		index = dict_table_get_next_index(index);
 	}
 
 	if (!rename_also_foreigns) {
@@ -1490,7 +1583,7 @@ dict_table_rename_in_cache(
 
 		UT_LIST_INIT(table->referenced_list);
 
-		return(TRUE);
+		return(DB_SUCCESS);
 	}
 
 	/* Update the table name fields in foreign constraints, and update also
@@ -1571,9 +1664,10 @@ dict_table_rename_in_cache(
 		foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
 	}
 
-	foreign = UT_LIST_GET_FIRST(table->referenced_list);
+	for (foreign = UT_LIST_GET_FIRST(table->referenced_list);
+	     foreign != NULL;
+	     foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
 
-	while (foreign != NULL) {
 		if (ut_strlen(foreign->referenced_table_name)
 		    < ut_strlen(table->name)) {
 			/* Allocate a longer name buffer;
@@ -1581,16 +1675,19 @@ dict_table_rename_in_cache(
 
 			foreign->referenced_table_name = mem_heap_strdup(
 				foreign->heap, table->name);
-			dict_mem_referenced_table_name_lookup_set(foreign, TRUE);
+
+			dict_mem_referenced_table_name_lookup_set(
+				foreign, TRUE);
 		} else {
 			/* Use the same buffer */
 			strcpy(foreign->referenced_table_name, table->name);
-			dict_mem_referenced_table_name_lookup_set(foreign, FALSE);
+
+			dict_mem_referenced_table_name_lookup_set(
+				foreign, FALSE);
 		}
-		foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
 	}
 
-	return(TRUE);
+	return(DB_SUCCESS);
 }
 
 /**********************************************************************//**
@@ -1692,6 +1789,30 @@ dict_table_remove_from_cache_low(
 
 	ut_ad(dict_lru_validate());
 
+	if (lru_evict && table->drop_aborted) {
+		/* Do as dict_table_try_drop_aborted() does. */
+
+		trx_t* trx = trx_allocate_for_background();
+
+		ut_ad(mutex_own(&dict_sys->mutex));
+#ifdef UNIV_SYNC_DEBUG
+		ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+		/* Mimic row_mysql_lock_data_dictionary(). */
+		trx->dict_operation_lock_mode = RW_X_LATCH;
+
+		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+
+		/* Silence a debug assertion in row_merge_drop_indexes(). */
+		ut_d(table->n_ref_count++);
+		row_merge_drop_indexes(trx, table, TRUE);
+		ut_d(table->n_ref_count--);
+		ut_ad(table->n_ref_count == 0);
+		trx_commit_for_mysql(trx);
+		trx->dict_operation_lock_mode = 0;
+		trx_free_for_background(trx);
+	}
+
 	size = mem_heap_get_size(table->heap) + strlen(table->name) + 1;
 
 	ut_ad(dict_sys->size >= size);
@@ -1777,6 +1898,12 @@ dict_index_too_big_for_undo(
 		+ 10 + FIL_PAGE_DATA_END /* trx_undo_left() */
 		+ 2/* pointer to previous undo log record */;
 
+	/* FTS index consists of auxiliary tables, they shall be excluded from
+	index row size check */
+	if (new_index->type & DICT_FTS) {
+		return(false);
+	}
+
 	if (!clust_index) {
 		ut_a(dict_index_is_clust(new_index));
 		clust_index = new_index;
@@ -1900,6 +2027,12 @@ dict_index_too_big_for_tree(
 	/* maximum allowed size of a node pointer record */
 	ulint	page_ptr_max;
 
+	/* FTS index consists of auxiliary tables, they shall be excluded from
+	index row size check */
+	if (new_index->type & DICT_FTS) {
+		return(false);
+	}
+
 	comp = dict_table_is_comp(table);
 	zip_size = dict_table_zip_size(table);
 
@@ -2032,7 +2165,7 @@ add_field_size:
 Adds an index to the dictionary cache.
 @return	DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
 UNIV_INTERN
-ulint
+dberr_t
 dict_index_add_to_cache(
 /*====================*/
 	dict_table_t*	table,	/*!< in: table on which the index is */
@@ -2051,6 +2184,7 @@ dict_index_add_to_cache(
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 	ut_ad(index->n_def == index->n_fields);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	ut_ad(!dict_index_is_online_ddl(index));
 
 	ut_ad(mem_heap_validate(index->heap));
 	ut_a(!dict_index_is_clust(index)
@@ -2077,6 +2211,7 @@ dict_index_add_to_cache(
 	number of fields in the cache internal representation */
 
 	new_index->n_fields = new_index->n_def;
+	new_index->trx_id = index->trx_id;
 
 	if (strict && dict_index_too_big_for_tree(table, new_index)) {
 too_big:
@@ -2169,51 +2304,41 @@ undo_size_ok:
 		}
 	}
 
-	/* Add the new index as the last index for the table */
-
-	UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
-	new_index->table = table;
-	new_index->table_name = table->name;
-
-	new_index->search_info = btr_search_info_create(new_index->heap);
-
-	new_index->stat_index_size = 1;
-	new_index->stat_n_leaf_pages = 1;
-
-	new_index->page = page_no;
-	rw_lock_create(index_tree_rw_lock_key, &new_index->lock,
-		       dict_index_is_ibuf(index)
-		       ? SYNC_IBUF_INDEX_TREE : SYNC_INDEX_TREE);
-
 	if (!dict_index_is_univ(new_index)) {
 
 		new_index->stat_n_diff_key_vals =
-			static_cast<ib_uint64_t*>(mem_heap_alloc(
+			static_cast<ib_uint64_t*>(mem_heap_zalloc(
 			new_index->heap,
-			(1 + dict_index_get_n_unique(new_index))
+			dict_index_get_n_unique(new_index)
 			* sizeof(*new_index->stat_n_diff_key_vals)));
 
 		new_index->stat_n_sample_sizes =
-			static_cast<ib_uint64_t*>(mem_heap_alloc(
+			static_cast<ib_uint64_t*>(mem_heap_zalloc(
 			new_index->heap,
-			(1 + dict_index_get_n_unique(new_index))
+			dict_index_get_n_unique(new_index)
 			* sizeof(*new_index->stat_n_sample_sizes)));
 
 		new_index->stat_n_non_null_key_vals =
 			static_cast<ib_uint64_t*>(mem_heap_zalloc(
 			new_index->heap,
-			(1 + dict_index_get_n_unique(new_index))
+			dict_index_get_n_unique(new_index)
 			* sizeof(*new_index->stat_n_non_null_key_vals)));
+	}
 
-		/* Give some sensible values to stat_n_... in case we do
-		not calculate statistics quickly enough */
+	new_index->stat_index_size = 1;
+	new_index->stat_n_leaf_pages = 1;
 
-		for (i = 0; i <= dict_index_get_n_unique(new_index); i++) {
+	/* Add the new index as the last index for the table */
 
-			new_index->stat_n_diff_key_vals[i] = 100;
-			new_index->stat_n_sample_sizes[i] = 0;
-		}
-	}
+	UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
+	new_index->table = table;
+	new_index->table_name = table->name;
+	new_index->search_info = btr_search_info_create(new_index->heap);
+
+	new_index->page = page_no;
+	rw_lock_create(index_tree_rw_lock_key, &new_index->lock,
+		       dict_index_is_ibuf(index)
+		       ? SYNC_IBUF_INDEX_TREE : SYNC_INDEX_TREE);
 
 	dict_sys->size += mem_heap_get_size(new_index->heap);
 
@@ -2242,9 +2367,17 @@ dict_index_remove_from_cache_low(
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
+	/* No need to acquire the dict_index_t::lock here because
+	there can't be any active operations on this index (or table). */
+
+	if (index->online_log) {
+		ut_ad(index->online_status == ONLINE_INDEX_CREATION);
+		row_log_free(index->online_log);
+	}
+
 	/* We always create search info whether or not adaptive
 	hash index is enabled or not. */
-	info = index->search_info;
+	info = btr_search_get_info(index);
 	ut_ad(info);
 
 	/* We are not allowed to free the in-memory index struct
@@ -2270,15 +2403,15 @@ dict_index_remove_from_cache_low(
 		if (retries % 500 == 0) {
 			/* No luck after 5 seconds of wait. */
 			fprintf(stderr, "InnoDB: Error: Waited for"
-					" %lu secs for hash index"
-					" ref_count (%lu) to drop"
-					" to 0.\n"
-					"index: \"%s\""
-					" table: \"%s\"\n",
-					retries/100,
-					ref_count,
-					index->name,
-					table->name);
+				" %lu secs for hash index"
+				" ref_count (%lu) to drop"
+				" to 0.\n"
+				"index: \"%s\""
+				" table: \"%s\"\n",
+				retries/100,
+				ref_count,
+				index->name,
+				table->name);
 		}
 
 		/* To avoid a hang here we commit suicide if the
@@ -2821,8 +2954,6 @@ dict_index_build_internal_fts(
 
 	return(new_index);
 }
-
-#ifndef UNIV_HOTBACKUP
 /*====================== FOREIGN KEY PROCESSING ========================*/
 
 /*********************************************************************//**
@@ -2889,8 +3020,7 @@ dict_table_get_foreign_constraint(
 	     foreign;
 	     foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
 
-		if (foreign->foreign_index == index
-		    || foreign->referenced_index == index) {
+		if (foreign->foreign_index == index) {
 
 			return(foreign);
 		}
@@ -2901,7 +3031,7 @@ dict_table_get_foreign_constraint(
 
 /*********************************************************************//**
 Frees a foreign key struct. */
-static
+UNIV_INTERN
 void
 dict_foreign_free(
 /*==============*/
@@ -2912,7 +3042,7 @@ dict_foreign_free(
 
 /**********************************************************************//**
 Removes a foreign constraint struct from the dictionary cache. */
-static
+UNIV_INTERN
 void
 dict_foreign_remove_from_cache(
 /*===========================*/
@@ -2976,84 +3106,50 @@ dict_foreign_find(
 	return(NULL);
 }
 
+
 /*********************************************************************//**
 Tries to find an index whose first fields are the columns in the array,
 in the same order and is not marked for deletion and is not the same
 as types_idx.
 @return	matching index, NULL if not found */
-static
+UNIV_INTERN
 dict_index_t*
 dict_foreign_find_index(
 /*====================*/
-	dict_table_t*	table,	/*!< in: table */
-	const char**	columns,/*!< in: array of column names */
-	ulint		n_cols,	/*!< in: number of columns */
-	dict_index_t*	types_idx, /*!< in: NULL or an index to whose types the
-				   column types must match */
-	ibool		check_charsets,
-				/*!< in: whether to check charsets.
-				only has an effect if types_idx != NULL */
-	ulint		check_null)
-				/*!< in: nonzero if none of the columns must
-				be declared NOT NULL */
+	const dict_table_t*	table,	/*!< in: table */
+	const char**		columns,/*!< in: array of column names */
+	ulint			n_cols,	/*!< in: number of columns */
+	const dict_index_t*	types_idx,
+					/*!< in: NULL or an index
+					whose types the column types
+					must match */
+	ibool			check_charsets,
+					/*!< in: whether to check
+					charsets.  only has an effect
+					if types_idx != NULL */
+	ulint			check_null)
+					/*!< in: nonzero if none of
+					the columns must be declared
+					NOT NULL */
 {
 	dict_index_t*	index;
 
+	ut_ad(mutex_own(&dict_sys->mutex));
+
 	index = dict_table_get_first_index(table);
 
 	while (index != NULL) {
 		/* Ignore matches that refer to the same instance
-		or the index is to be dropped */
-		if (index->to_be_dropped || types_idx == index
-		    || index->type & DICT_FTS) {
+		(or the index is to be dropped) */
+		if (types_idx == index || index->type & DICT_FTS
+		    || index->to_be_dropped) {
 
 			goto next_rec;
 
-		} else if (dict_index_get_n_fields(index) >= n_cols) {
-			ulint		i;
-
-			for (i = 0; i < n_cols; i++) {
-				dict_field_t*	field;
-				const char*	col_name;
-
-				field = dict_index_get_nth_field(index, i);
-
-				col_name = dict_table_get_col_name(
-					table, dict_col_get_no(field->col));
-
-				if (field->prefix_len != 0) {
-					/* We do not accept column prefix
-					indexes here */
-
-					break;
-				}
-
-				if (0 != innobase_strcasecmp(columns[i],
-							     col_name)) {
-					break;
-				}
-
-				if (check_null
-				    && (field->col->prtype & DATA_NOT_NULL)) {
-
-					return(NULL);
-				}
-
-				if (types_idx && !cmp_cols_are_equal(
-					    dict_index_get_nth_col(index, i),
-					    dict_index_get_nth_col(types_idx,
-								   i),
-					    check_charsets)) {
-
-					break;
-				}
-			}
-
-			if (i == n_cols) {
-				/* We found a matching index */
-
-				return(index);
-			}
+		} else if (dict_foreign_qualify_index(
+			table, columns, n_cols, index, types_idx,
+			check_charsets, check_null)) {
+			return(index);
 		}
 
 next_rec:
@@ -3064,90 +3160,6 @@ next_rec:
 }
 
 /**********************************************************************//**
-Find an index that is equivalent to the one passed in and is not marked
-for deletion.
-@return	index equivalent to foreign->foreign_index, or NULL */
-UNIV_INTERN
-dict_index_t*
-dict_foreign_find_equiv_index(
-/*==========================*/
-	dict_foreign_t*	foreign)/*!< in: foreign key */
-{
-	ut_a(foreign != NULL);
-
-	/* Try to find an index which contains the columns as the
-	first fields and in the right order, and the types are the
-	same as in foreign->foreign_index */
-
-	return(dict_foreign_find_index(
-		       foreign->foreign_table,
-		       foreign->foreign_col_names, foreign->n_fields,
-		       foreign->foreign_index, TRUE, /* check types */
-		       FALSE/* allow columns to be NULL */));
-}
-
-#endif /* !UNIV_HOTBACKUP */
-/**********************************************************************//**
-Returns an index object by matching on the name and column names and
-if more than one index matches return the index with the max id
-@return	matching index, NULL if not found */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_by_max_id(
-/*===========================*/
-	dict_table_t*	table,	/*!< in: table */
-	const char*	name,	/*!< in: the index name to find */
-	const char**	columns,/*!< in: array of column names */
-	ulint		n_cols)	/*!< in: number of columns */
-{
-	dict_index_t*	index;
-	dict_index_t*	found;
-
-	found = NULL;
-	index = dict_table_get_first_index(table);
-
-	while (index != NULL) {
-		if (ut_strcmp(index->name, name) == 0
-		    && dict_index_get_n_ordering_defined_by_user(index)
-		    == n_cols) {
-
-			ulint		i;
-
-			for (i = 0; i < n_cols; i++) {
-				dict_field_t*	field;
-				const char*	col_name;
-
-				field = dict_index_get_nth_field(index, i);
-
-				col_name = dict_table_get_col_name(
-					table, dict_col_get_no(field->col));
-
-				if (0 != innobase_strcasecmp(
-					    columns[i], col_name)) {
-
-					break;
-				}
-			}
-
-			if (i == n_cols) {
-				/* We found a matching index, select
-				the index with the higher id*/
-
-				if (!found || index->id > found->id) {
-
-					found = index;
-				}
-			}
-		}
-
-		index = dict_table_get_next_index(index);
-	}
-
-	return(found);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
 Report an error in a foreign key definition. */
 static
 void
@@ -3196,7 +3208,7 @@ At least one of the foreign table and the referenced table must already
 be in the dictionary cache!
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 dict_foreign_add_to_cache(
 /*======================*/
 	dict_foreign_t*	foreign,	/*!< in, own: foreign key constraint */
@@ -3325,7 +3337,6 @@ dict_foreign_add_to_cache(
 	return(DB_SUCCESS);
 }
 
-#endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Scans from pointer onwards. Stops if is at the start of a copy of
 'string' where characters are compared without case sensitivity, and
@@ -3579,6 +3590,67 @@ dict_scan_col(
 	return(ptr);
 }
 
+
+/*********************************************************************//**
+Open a table from its database and table name, this is currently used by
+foreign constraint parser to get the referenced table.
+@return complete table name with database and table name, allocated from
+heap memory passed in */
+UNIV_INTERN
+char*
+dict_get_referenced_table(
+/*======================*/
+	const char*	name,		/*!< in: foreign key table name */
+	const char*	database_name,	/*!< in: table db name */
+	ulint		database_name_len, /*!< in: db name length */
+	const char*	table_name,	/*!< in: table name */
+	ulint		table_name_len, /*!< in: table name length */
+	dict_table_t**	table,		/*!< out: table object or NULL */
+	mem_heap_t*	heap)		/*!< in/out: heap memory */
+{
+	char*		ref;
+	const char*	db_name;
+
+	if (!database_name) {
+		/* Use the database name of the foreign key table */
+
+		db_name = name;
+		database_name_len = dict_get_db_name_len(name);
+	} else {
+		db_name = database_name;
+	}
+
+	/* Copy database_name, '/', table_name, '\0' */
+	ref = static_cast<char*>(
+		mem_heap_alloc(heap, database_name_len + table_name_len + 2));
+
+	memcpy(ref, db_name, database_name_len);
+	ref[database_name_len] = '/';
+	memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
+
+	/* Values;  0 = Store and compare as given; case sensitive
+	            1 = Store and compare in lower; case insensitive
+	            2 = Store as given, compare in lower; case semi-sensitive */
+	if (innobase_get_lower_case_table_names() == 2) {
+		innobase_casedn_str(ref);
+		*table = dict_table_get_low(ref);
+		memcpy(ref, db_name, database_name_len);
+		ref[database_name_len] = '/';
+		memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
+
+	} else {
+#ifndef __WIN__
+		if (innobase_get_lower_case_table_names() == 1) {
+			innobase_casedn_str(ref);
+		}
+#else
+		innobase_casedn_str(ref);
+#endif /* !__WIN__ */
+		*table = dict_table_get_low(ref);
+	}
+
+	return(ref);
+}
 /*********************************************************************//**
 Scans a table name from an SQL string.
 @return	scanned to */
@@ -3598,9 +3670,7 @@ dict_scan_table_name(
 	const char*	database_name	= NULL;
 	ulint		database_name_len = 0;
 	const char*	table_name	= NULL;
-	ulint		table_name_len;
 	const char*	scan_name;
-	char*		ref;
 
 	*success = FALSE;
 	*table = NULL;
@@ -3648,46 +3718,11 @@ dict_scan_table_name(
 		table_name = scan_name;
 	}
 
-	if (database_name == NULL) {
-		/* Use the database name of the foreign key table */
-
-		database_name = name;
-		database_name_len = dict_get_db_name_len(name);
-	}
-
-	table_name_len = strlen(table_name);
-
-	/* Copy database_name, '/', table_name, '\0' */
-	ref = static_cast<char*>(
-		mem_heap_alloc(heap, database_name_len + table_name_len + 2));
-
-	memcpy(ref, database_name, database_name_len);
-	ref[database_name_len] = '/';
-	memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
-
-	/* Values;  0 = Store and compare as given; case sensitive
-	            1 = Store and compare in lower; case insensitive
-	            2 = Store as given, compare in lower; case semi-sensitive */
-	if (innobase_get_lower_case_table_names() == 2) {
-		innobase_casedn_str(ref);
-		*table = dict_table_get_low(ref);
-		memcpy(ref, database_name, database_name_len);
-		ref[database_name_len] = '/';
-		memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
-
-	} else {
-#ifndef __WIN__
-		if (innobase_get_lower_case_table_names() == 1) {
-			innobase_casedn_str(ref);
-		}
-#else
-		innobase_casedn_str(ref);
-#endif /* !__WIN__ */
-		*table = dict_table_get_low(ref);
-	}
+	*ref_name = dict_get_referenced_table(
+		name, database_name, database_name_len,
+		table_name, strlen(table_name), table, heap);
 
 	*success = TRUE;
-	*ref_name = ref;
 	return(ptr);
 }
 
@@ -3810,13 +3845,12 @@ end_of_string:
 	}
 }
 
-#ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Finds the highest [number] for foreign key constraints of the table. Looks
 only at the >= 4.0.18-format id's, which are of the form
 databasename/tablename_ibfk_[number].
 @return	highest number, 0 if table has no new format foreign key constraints */
-static
+UNIV_INTERN
 ulint
 dict_table_get_highest_foreign_id(
 /*==============================*/
@@ -3871,6 +3905,8 @@ dict_foreign_report_syntax_err(
 					in the SQL string */
 	const char*	ptr)		/*!< in: place of the syntax error */
 {
+	ut_ad(!srv_read_only_mode);
+
 	FILE*	ef = dict_foreign_err_file;
 
 	mutex_enter(&dict_foreign_err_mutex);
@@ -3888,7 +3924,7 @@ be accompanied with indexes in both participating tables. The indexes are
 allowed to contain more fields than mentioned in the constraint.
 @return	error code or DB_SUCCESS */
 static
-ulint
+dberr_t
 dict_create_foreign_constraints_low(
 /*================================*/
 	trx_t*		trx,	/*!< in: transaction */
@@ -3919,7 +3955,7 @@ dict_create_foreign_constraints_low(
 	FILE*		ef			= dict_foreign_err_file;
 	const char*	constraint_name;
 	ibool		success;
-	ulint		error;
+	dberr_t		error;
 	const char*	ptr1;
 	const char*	ptr2;
 	ulint		i;
@@ -3931,6 +3967,7 @@ dict_create_foreign_constraints_low(
 	const char*	column_names[500];
 	const char*	referenced_table_name;
 
+	ut_ad(!srv_read_only_mode);
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
 	table = dict_table_get_low(name);
@@ -4470,11 +4507,11 @@ UNIV_INTERN
 ibool
 dict_str_starts_with_keyword(
 /*=========================*/
-	void*		mysql_thd,	/*!< in: MySQL thread handle */
+	THD*		thd,		/*!< in: MySQL thread handle */
 	const char*	str,		/*!< in: string to scan for keyword */
 	const char*	keyword)	/*!< in: keyword to look for */
 {
-	struct charset_info_st*	cs = innobase_get_charset(mysql_thd);
+	struct charset_info_st*	cs = innobase_get_charset(thd);
 	ibool			success;
 
 	dict_accept(cs, str, keyword, &success);
@@ -4489,7 +4526,7 @@ be accompanied with indexes in both participating tables. The indexes are
 allowed to contain more fields than mentioned in the constraint.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 dict_create_foreign_constraints(
 /*============================*/
 	trx_t*		trx,		/*!< in: transaction */
@@ -4509,9 +4546,9 @@ dict_create_foreign_constraints(
 					code DB_CANNOT_ADD_CONSTRAINT if
 					any foreign keys are found. */
 {
-	char*			str;
-	ulint			err;
-	mem_heap_t*		heap;
+	char*		str;
+	dberr_t		err;
+	mem_heap_t*	heap;
 
 	ut_a(trx);
 	ut_a(trx->mysql_thd);
@@ -4534,7 +4571,7 @@ Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
 @return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
 constraint id does not match */
 UNIV_INTERN
-ulint
+dberr_t
 dict_foreign_parse_drop_constraints(
 /*================================*/
 	mem_heap_t*	heap,			/*!< in: heap from which we can
@@ -4552,7 +4589,6 @@ dict_foreign_parse_drop_constraints(
 	size_t			len;
 	const char*		ptr;
 	const char*		id;
-	FILE*			ef	= dict_foreign_err_file;
 	struct charset_info_st*	cs;
 
 	ut_a(trx);
@@ -4618,10 +4654,11 @@ loop:
 	foreign = UT_LIST_GET_FIRST(table->foreign_list);
 
 	while (foreign != NULL) {
-		if (0 == strcmp(foreign->id, id)
+		if (0 == innobase_strcasecmp(foreign->id, id)
 		    || (strchr(foreign->id, '/')
-			&& 0 == strcmp(id,
-				       dict_remove_db_name(foreign->id)))) {
+			&& 0 == innobase_strcasecmp(
+				id,
+				dict_remove_db_name(foreign->id)))) {
 			/* Found */
 			break;
 		}
@@ -4629,20 +4666,26 @@ loop:
 		foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
 	}
 
+
 	if (foreign == NULL) {
-		mutex_enter(&dict_foreign_err_mutex);
-		rewind(ef);
-		ut_print_timestamp(ef);
-		fputs(" Error in dropping of a foreign key constraint"
-		      " of table ", ef);
-		ut_print_name(ef, NULL, TRUE, table->name);
-		fputs(",\n"
-		      "in SQL command\n", ef);
-		fputs(str, ef);
-		fputs("\nCannot find a constraint with the given id ", ef);
-		ut_print_name(ef, NULL, FALSE, id);
-		fputs(".\n", ef);
-		mutex_exit(&dict_foreign_err_mutex);
+
+		if (!srv_read_only_mode) {
+			FILE*	ef = dict_foreign_err_file;
+
+			mutex_enter(&dict_foreign_err_mutex);
+			rewind(ef);
+			ut_print_timestamp(ef);
+			fputs(" Error in dropping of a foreign key "
+			      "constraint of table ", ef);
+			ut_print_name(ef, NULL, TRUE, table->name);
+			fputs(",\nin SQL command\n", ef);
+			fputs(str, ef);
+			fputs("\nCannot find a constraint with the "
+			      "given id ", ef);
+			ut_print_name(ef, NULL, FALSE, id);
+			fputs(".\n", ef);
+			mutex_exit(&dict_foreign_err_mutex);
+		}
 
 		mem_free(str);
 
@@ -4652,15 +4695,19 @@ loop:
 	goto loop;
 
 syntax_error:
-	mutex_enter(&dict_foreign_err_mutex);
-	rewind(ef);
-	ut_print_timestamp(ef);
-	fputs(" Syntax error in dropping of a"
-	      " foreign key constraint of table ", ef);
-	ut_print_name(ef, NULL, TRUE, table->name);
-	fprintf(ef, ",\n"
-		"close to:\n%s\n in SQL command\n%s\n", ptr, str);
-	mutex_exit(&dict_foreign_err_mutex);
+	if (!srv_read_only_mode) {
+		FILE*	ef = dict_foreign_err_file;
+
+		mutex_enter(&dict_foreign_err_mutex);
+		rewind(ef);
+		ut_print_timestamp(ef);
+		fputs(" Syntax error in dropping of a"
+		      " foreign key constraint of table ", ef);
+		ut_print_name(ef, NULL, TRUE, table->name);
+		fprintf(ef, ",\n"
+			"close to:\n%s\n in SQL command\n%s\n", ptr, str);
+		mutex_exit(&dict_foreign_err_mutex);
+	}
 
 	mem_free(str);
 
@@ -4668,7 +4715,7 @@ syntax_error:
 }
 
 /*==================== END OF FOREIGN KEY PROCESSING ====================*/
-#endif /* !UNIV_HOTBACKUP */
+
 /**********************************************************************//**
 Returns an index object if it is found in the dictionary cache.
 Assumes that dict_sys->mutex is already being held.
@@ -4908,7 +4955,6 @@ dict_index_calc_min_rec_len(
 	return(sum);
 }
 
-#ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Prints info of a foreign key constraint. */
 static
@@ -4939,7 +4985,6 @@ dict_foreign_print_low(
 	fputs(" )\n", stderr);
 }
 
-#endif /* !UNIV_HOTBACKUP */
 /**********************************************************************//**
 Prints a table data. */
 UNIV_INTERN
@@ -4948,60 +4993,29 @@ dict_table_print(
 /*=============*/
 	dict_table_t*	table)	/*!< in: table */
 {
-	mutex_enter(&(dict_sys->mutex));
-	dict_table_print_low(table);
-	mutex_exit(&(dict_sys->mutex));
-}
-
-/**********************************************************************//**
-Prints a table data when we know the table name. */
-UNIV_INTERN
-void
-dict_table_print_by_name(
-/*=====================*/
-	const char*	name)	/*!< in: table name */
-{
-	dict_table_t*	table;
-
-	mutex_enter(&(dict_sys->mutex));
-
-	table = dict_table_get_low(name);
-
-	ut_a(table);
-
-	dict_table_print_low(table);
-	mutex_exit(&(dict_sys->mutex));
-}
-
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print_low(
-/*=================*/
-	dict_table_t*	table)	/*!< in: table */
-{
 	dict_index_t*	index;
 	dict_foreign_t*	foreign;
 	ulint		i;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
-	dict_stats_update(table, DICT_STATS_FETCH, TRUE);
+	dict_table_stats_lock(table, RW_X_LATCH);
 
-	dict_table_stats_lock(table, RW_S_LATCH);
+	if (!table->stat_initialized) {
+		dict_stats_update_transient(table);
+	}
 
 	fprintf(stderr,
 		"--------------------------------------\n"
 		"TABLE: name %s, id %llu, flags %lx, columns %lu,"
-		" indexes %lu, appr.rows %lu\n"
+		" indexes %lu, appr.rows " UINT64PF "\n"
 		"  COLUMNS: ",
 		table->name,
 		(ullint) table->id,
 		(ulong) table->flags,
 		(ulong) table->n_cols,
 		(ulong) UT_LIST_GET_LEN(table->indexes),
-		(ulong) table->stat_n_rows);
+		table->stat_n_rows);
 
 	for (i = 0; i < (ulint) table->n_cols; i++) {
 		dict_col_print_low(table, dict_table_get_nth_col(table, i));
@@ -5017,7 +5031,9 @@ dict_table_print_low(
 		index = UT_LIST_GET_NEXT(indexes, index);
 	}
 
-	dict_table_stats_unlock(table, RW_S_LATCH);
+	table->stat_initialized = FALSE;
+
+	dict_table_stats_unlock(table, RW_X_LATCH);
 
 	foreign = UT_LIST_GET_FIRST(table->foreign_list);
 
@@ -5065,13 +5081,15 @@ dict_index_print_low(
 	ib_int64_t	n_vals;
 	ulint		i;
 
+	ut_a(index->table->stat_initialized);
+
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
 	if (index->n_user_defined_cols > 0) {
 		n_vals = index->stat_n_diff_key_vals[
-			index->n_user_defined_cols];
+			index->n_user_defined_cols - 1];
 	} else {
-		n_vals = index->stat_n_diff_key_vals[1];
+		n_vals = index->stat_n_diff_key_vals[0];
 	}
 
 	fprintf(stderr,
@@ -5121,7 +5139,6 @@ dict_field_print_low(
 	}
 }
 
-#ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Outputs info on a foreign key of a table in a format suitable for
 CREATE TABLE. */
@@ -5310,7 +5327,6 @@ dict_print_info_on_foreign_keys(
 	mutex_exit(&(dict_sys->mutex));
 }
 
-#endif /* !UNIV_HOTBACKUP */
 /********************************************************************//**
 Displays the names of the index and the table. */
 UNIV_INTERN
@@ -5318,7 +5334,7 @@ void
 dict_index_name_print(
 /*==================*/
 	FILE*			file,	/*!< in: output stream */
-	trx_t*			trx,	/*!< in: transaction */
+	const trx_t*		trx,	/*!< in: transaction */
 	const dict_index_t*	index)	/*!< in: index to print */
 {
 	fputs("index ", file);
@@ -5393,7 +5409,9 @@ UNIV_INTERN
 void
 dict_set_corrupted(
 /*===============*/
-	dict_index_t*	index)		/*!< in/out: index */
+	dict_index_t*	index,	/*!< in/out: index */
+	trx_t*		trx,	/*!< in/out: transaction */
+	const char*	ctx)	/*!< in: context */
 {
 	mem_heap_t*	heap;
 	mtr_t		mtr;
@@ -5401,8 +5419,14 @@ dict_set_corrupted(
 	dtuple_t*	tuple;
 	dfield_t*	dfield;
 	byte*		buf;
+	char*		table_name;
 	const char*	status;
 	btr_cur_t	cursor;
+	bool		locked	= RW_X_LATCH == trx->dict_operation_lock_mode;
+
+	if (!locked) {
+		row_mysql_lock_data_dictionary(trx);
+	}
 
 	ut_ad(index);
 	ut_ad(mutex_own(&dict_sys->mutex));
@@ -5422,7 +5446,7 @@ dict_set_corrupted(
 	if (index->type & DICT_CORRUPT) {
 		/* The index was already flagged corrupted. */
 		ut_ad(!dict_index_is_clust(index) || index->table->corrupted);
-		return;
+		goto func_exit;
 	}
 
 	heap = mem_heap_create(sizeof(dtuple_t) + 2 * (sizeof(dfield_t)
@@ -5463,19 +5487,29 @@ dict_set_corrupted(
 			goto fail;
 		}
 		mlog_write_ulint(field, index->type, MLOG_4BYTES, &mtr);
-		status = "  InnoDB: Flagged corruption of ";
+		status = "Flagged";
 	} else {
 fail:
-		status = "  InnoDB: Unable to flag corruption of ";
+		status = "Unable to flag";
 	}
 
 	mtr_commit(&mtr);
+	mem_heap_empty(heap);
+	table_name = static_cast<char*>(mem_heap_alloc(heap, FN_REFLEN + 1));
+	*innobase_convert_name(
+		table_name, FN_REFLEN,
+		index->table_name, strlen(index->table_name),
+		NULL, TRUE) = 0;
+
+	ib_logf(IB_LOG_LEVEL_ERROR, "%s corruption of %s in table %s in %s",
+		status, index->name, table_name, ctx);
+
 	mem_heap_free(heap);
 
-	ut_print_timestamp(stderr);
-	fputs(status, stderr);
-	dict_index_name_print(stderr, NULL, index);
-	putc('\n', stderr);
+func_exit:
+	if (!locked) {
+		row_mysql_unlock_data_dictionary(trx);
+	}
 }
 
 /**********************************************************************//**
@@ -5582,7 +5616,7 @@ dict_table_get_index_on_name(
 
 	/* If name is NULL, just return */
 	if (!name) {
-		return NULL;
+		return(NULL);
 	}
 
 	index = dict_table_get_first_index(table);
@@ -5597,42 +5631,47 @@ dict_table_get_index_on_name(
 	}
 
 	return(NULL);
-
 }
 
 /**********************************************************************//**
-Replace the index passed in with another equivalent index in the tables
-foreign key list. */
+Replace the index passed in with another equivalent index in the
+foreign key lists of the table. */
 UNIV_INTERN
 void
-dict_table_replace_index_in_foreign_list(
-/*=====================================*/
-	dict_table_t*	table,  /*!< in/out: table */
-	dict_index_t*	index,	/*!< in: index to be replaced */
-	const trx_t*	trx)	/*!< in: transaction handle */
+dict_foreign_replace_index(
+/*=======================*/
+	dict_table_t*		table,  /*!< in/out: table */
+	const dict_index_t*	index,	/*!< in: index to be replaced */
+	const trx_t*		trx)	/*!< in: transaction handle */
 {
 	dict_foreign_t*	foreign;
 
+	ut_ad(index->to_be_dropped);
+
 	for (foreign = UT_LIST_GET_FIRST(table->foreign_list);
 	     foreign;
 	     foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
 
-		if (foreign->foreign_index == index) {
-			dict_index_t*	new_index
-				= dict_foreign_find_equiv_index(foreign);
+		dict_index_t*	new_index;
 
-			/* There must exist an alternative index if
-			check_foreigns (FOREIGN_KEY_CHECKS) is on,
-			since ha_innobase::prepare_drop_index had done
-			the check before we reach here. */
+		if (foreign->foreign_index == index) {
+			ut_ad(foreign->foreign_table == index->table);
 
+			new_index = dict_foreign_find_index(
+				foreign->foreign_table,
+				foreign->foreign_col_names,
+				foreign->n_fields, index,
+				/*check_charsets=*/TRUE, /*check_null=*/FALSE);
+			/* There must exist an alternative index,
+			since this must have been checked earlier. */
 			ut_a(new_index || !trx->check_foreigns);
+			ut_ad(!new_index || new_index->table == index->table);
+			ut_ad(!new_index || !new_index->to_be_dropped);
 
 			foreign->foreign_index = new_index;
 		}
 	}
 
-
 	for (foreign = UT_LIST_GET_FIRST(table->referenced_list);
 	     foreign;
 	     foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
@@ -5647,8 +5686,11 @@ dict_table_replace_index_in_foreign_list(
 				foreign->referenced_col_names,
 				foreign->n_fields, index,
 				/*check_charsets=*/TRUE, /*check_null=*/FALSE);
-			ut_ad(new_index || !trx->check_foreigns);
+			/* There must exist an alternative index,
+			since this must have been checked earlier. */
+			ut_a(new_index || !trx->check_foreigns);
 			ut_ad(!new_index || new_index->table == index->table);
+			ut_ad(!new_index || !new_index->to_be_dropped);
 
 			foreign->referenced_index = new_index;
 		}
@@ -5696,8 +5738,8 @@ dict_table_check_for_dup_indexes(
 /*=============================*/
 	const dict_table_t*	table,	/*!< in: Check for dup indexes
 					in this table */
-	ibool			tmp_ok)	/*!< in: TRUE=allow temporary
-					index names */
+	enum check_name		check)	/*!< in: whether and when to allow
+					temporary index names */
 {
 	/* Check for duplicates, ignoring indexes that are marked
 	as to be dropped */
@@ -5713,17 +5755,32 @@ dict_table_check_for_dup_indexes(
 	index1 = UT_LIST_GET_FIRST(table->indexes);
 
 	do {
-		ut_ad(tmp_ok || *index1->name != TEMP_INDEX_PREFIX);
-
-		index2 = UT_LIST_GET_NEXT(indexes, index1);
-
-		while (index2) {
-
-			if (!index2->to_be_dropped) {
-				ut_ad(ut_strcmp(index1->name, index2->name));
+		if (*index1->name == TEMP_INDEX_PREFIX) {
+			ut_a(!dict_index_is_clust(index1));
+
+			switch (check) {
+			case CHECK_ALL_COMPLETE:
+				ut_error;
+			case CHECK_ABORTED_OK:
+				switch (dict_index_get_online_status(index1)) {
+				case ONLINE_INDEX_COMPLETE:
+				case ONLINE_INDEX_CREATION:
+					ut_error;
+					break;
+				case ONLINE_INDEX_ABORTED:
+				case ONLINE_INDEX_ABORTED_DROPPED:
+					break;
+				}
+				/* fall through */
+			case CHECK_PARTIAL_OK:
+				break;
 			}
+		}
 
-			index2 = UT_LIST_GET_NEXT(indexes, index2);
+		for (index2 = UT_LIST_GET_NEXT(indexes, index1);
+		     index2 != NULL;
+		     index2 = UT_LIST_GET_NEXT(indexes, index2)) {
+			ut_ad(ut_strcmp(index1->name, index2->name));
 		}
 
 		index1 = UT_LIST_GET_NEXT(indexes, index1);
@@ -5739,17 +5796,17 @@ The caller must own the dictionary mutex.
 dict_table_schema_check() @{
 @return DB_SUCCESS if the table exists and contains the necessary columns */
 UNIV_INTERN
-enum db_err
+dberr_t
 dict_table_schema_check(
 /*====================*/
 	dict_table_schema_t*	req_schema,	/*!< in/out: required table
 						schema */
 	char*			errstr,		/*!< out: human readable error
-						message if != DB_SUCCESS and
-						!= DB_TABLE_NOT_FOUND is
+						message if != DB_SUCCESS is
 						returned */
 	size_t			errstr_sz)	/*!< in: errstr size */
 {
+	char		buf[MAX_FULL_NAME_LEN];
 	dict_table_t*	table;
 	ulint		i;
 
@@ -5757,8 +5814,24 @@ dict_table_schema_check(
 
 	table = dict_table_get_low(req_schema->table_name);
 
-	if (table == NULL || table->ibd_file_missing) {
-		/* no such table or missing tablespace */
+	if (table == NULL) {
+		/* no such table */
+
+		ut_snprintf(errstr, errstr_sz,
+			    "Table %s not found.",
+			    ut_format_name(req_schema->table_name,
+					   TRUE, buf, sizeof(buf)));
+
+		return(DB_TABLE_NOT_FOUND);
+	}
+
+	if (table->ibd_file_missing) {
+		/* missing tablespace */
+
+		ut_snprintf(errstr, errstr_sz,
+			    "Tablespace for table %s is missing.",
+			    ut_format_name(req_schema->table_name,
+					   TRUE, buf, sizeof(buf)));
 
 		return(DB_TABLE_NOT_FOUND);
 	}
@@ -5769,7 +5842,8 @@ dict_table_schema_check(
 
 		ut_snprintf(errstr, errstr_sz,
 			    "%s has %d columns but should have %lu.",
-			    req_schema->table_name,
+			    ut_format_name(req_schema->table_name,
+					   TRUE, buf, sizeof(buf)),
 			    table->n_def - DATA_N_SYS_COLS,
 			    req_schema->n_cols);
 
@@ -5814,9 +5888,12 @@ dict_table_schema_check(
 			if (j == table->n_def) {
 
 				ut_snprintf(errstr, errstr_sz,
-					    "required column %s.%s not found.",
-					    req_schema->table_name,
-					    req_schema->columns[i].name);
+					    "required column %s "
+					    "not found in table %s.",
+					    req_schema->columns[i].name,
+					    ut_format_name(
+						    req_schema->table_name,
+						    TRUE, buf, sizeof(buf)));
 
 				return(DB_ERROR);
 			}
@@ -5839,10 +5916,11 @@ dict_table_schema_check(
 		if (req_schema->columns[i].len != table->cols[j].len) {
 
 			ut_snprintf(errstr, errstr_sz,
-				    "Column %s.%s is %s but should be %s "
-				    "(length mismatch).",
-				    req_schema->table_name,
+				    "Column %s in table %s is %s "
+				    "but should be %s (length mismatch).",
 				    req_schema->columns[i].name,
+				    ut_format_name(req_schema->table_name,
+						   TRUE, buf, sizeof(buf)),
 				    actual_type, req_type);
 
 			return(DB_ERROR);
@@ -5852,10 +5930,11 @@ dict_table_schema_check(
 		if (req_schema->columns[i].mtype != table->cols[j].mtype) {
 
 			ut_snprintf(errstr, errstr_sz,
-				    "Column %s.%s is %s but should be %s "
-				    "(type mismatch).",
-				    req_schema->table_name,
+				    "Column %s in table %s is %s "
+				    "but should be %s (type mismatch).",
 				    req_schema->columns[i].name,
+				    ut_format_name(req_schema->table_name,
+						   TRUE, buf, sizeof(buf)),
 				    actual_type, req_type);
 
 			return(DB_ERROR);
@@ -5868,20 +5947,110 @@ dict_table_schema_check(
 		       != req_schema->columns[i].prtype_mask) {
 
 			ut_snprintf(errstr, errstr_sz,
-				    "Column %s.%s is %s but should be %s "
-				    "(flags mismatch).",
-				    req_schema->table_name,
+				    "Column %s in table %s is %s "
+				    "but should be %s (flags mismatch).",
 				    req_schema->columns[i].name,
+				    ut_format_name(req_schema->table_name,
+						   TRUE, buf, sizeof(buf)),
 				    actual_type, req_type);
 
 			return(DB_ERROR);
 		}
 	}
 
+	if (req_schema->n_foreign != UT_LIST_GET_LEN(table->foreign_list)) {
+		ut_snprintf(
+			errstr, errstr_sz,
+			"Table %s has %lu foreign key(s) pointing to other "
+			"tables, but it must have %lu.",
+			ut_format_name(req_schema->table_name,
+				       TRUE, buf, sizeof(buf)),
+			UT_LIST_GET_LEN(table->foreign_list),
+			req_schema->n_foreign);
+		return(DB_ERROR);
+	}
+
+	if (req_schema->n_referenced != UT_LIST_GET_LEN(table->referenced_list)) {
+		ut_snprintf(
+			errstr, errstr_sz,
+			"There are %lu foreign key(s) pointing to %s, "
+			"but there must be %lu.",
+			UT_LIST_GET_LEN(table->referenced_list),
+			ut_format_name(req_schema->table_name,
+				       TRUE, buf, sizeof(buf)),
+			req_schema->n_referenced);
+		return(DB_ERROR);
+	}
+
 	return(DB_SUCCESS);
 }
 /* @} */
 
+/*********************************************************************//**
+Converts a database and table name from filesystem encoding
+(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
+strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
+at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
+UNIV_INTERN
+void
+dict_fs2utf8(
+/*=========*/
+	const char*	db_and_table,	/*!< in: database and table names,
+					e.g. d@i1b/a@q1b@1Kc */
+	char*		db_utf8,	/*!< out: database name, e.g. dцb */
+	size_t		db_utf8_size,	/*!< in: dbname_utf8 size */
+	char*		table_utf8,	/*!< out: table name, e.g. aюbØc */
+	size_t		table_utf8_size)/*!< in: table_utf8 size */
+{
+	char	db[MAX_DATABASE_NAME_LEN + 1];
+	ulint	db_len;
+	uint	errors;
+
+	db_len = dict_get_db_name_len(db_and_table);
+
+	ut_a(db_len <= sizeof(db));
+
+	memcpy(db, db_and_table, db_len);
+	db[db_len] = '\0';
+
+	strconvert(
+		&my_charset_filename, db,
+		system_charset_info, db_utf8, db_utf8_size,
+		&errors);
+
+	/* convert each # to @0023 in table name and store the result in buf */
+	const char*	table = dict_remove_db_name(db_and_table);
+	const char*	table_p;
+	char		buf[MAX_TABLE_NAME_LEN * 5 + 1];
+	char*		buf_p;
+	for (table_p = table, buf_p = buf; table_p[0] != '\0'; table_p++) {
+		if (table_p[0] != '#') {
+			buf_p[0] = table_p[0];
+			buf_p++;
+		} else {
+			buf_p[0] = '@';
+			buf_p[1] = '0';
+			buf_p[2] = '0';
+			buf_p[3] = '2';
+			buf_p[4] = '3';
+			buf_p += 5;
+		}
+		ut_a((size_t) (buf_p - buf) < sizeof(buf));
+	}
+	buf_p[0] = '\0';
+
+	errors = 0;
+	strconvert(
+		&my_charset_filename, buf,
+		system_charset_info, table_utf8, table_utf8_size,
+		&errors);
+
+	if (errors != 0) {
+		ut_snprintf(table_utf8, table_utf8_size, "%s%s",
+			    srv_mysql50_table_name_prefix, table);
+	}
+}
+
 /**********************************************************************//**
 Closes the data dictionary module. */
 UNIV_INTERN
@@ -5929,7 +6098,9 @@ dict_close(void)
 	rw_lock_free(&dict_operation_lock);
 	memset(&dict_operation_lock, 0x0, sizeof(dict_operation_lock));
 
-	mutex_free(&dict_foreign_err_mutex);
+	if (!srv_read_only_mode) {
+		mutex_free(&dict_foreign_err_mutex);
+	}
 
 	mem_free(dict_sys);
 	dict_sys = NULL;
@@ -5943,7 +6114,7 @@ dict_close(void)
 /**********************************************************************//**
 Validate the dictionary table LRU list.
 @return TRUE if valid  */
-UNIV_INTERN
+static
 ibool
 dict_lru_validate(void)
 /*===================*/
@@ -5972,7 +6143,7 @@ dict_lru_validate(void)
 /**********************************************************************//**
 Check if a table exists in the dict table LRU list.
 @return TRUE if table found in LRU list */
-UNIV_INTERN
+static
 ibool
 dict_lru_find_table(
 /*================*/
@@ -6025,4 +6196,279 @@ dict_non_lru_find_table(
 	return(FALSE);
 }
 # endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Check an index to see whether its first fields are the columns in the array,
+in the same order and is not marked for deletion and is not the same
+as types_idx.
+@return true if the index qualifies, otherwise false */
+UNIV_INTERN
+bool
+dict_foreign_qualify_index(
+/*=======================*/
+        const dict_table_t*     table,  /*!< in: table */
+        const char**            columns,/*!< in: array of column names */
+        ulint                   n_cols, /*!< in: number of columns */
+        const dict_index_t*     index,  /*!< in: index to check */
+        const dict_index_t*     types_idx,
+                                        /*!< in: NULL or an index
+                                        whose types the column types
+                                        must match */
+        ibool                   check_charsets,
+                                        /*!< in: whether to check
+                                        charsets.  only has an effect
+                                        if types_idx != NULL */
+        ulint                   check_null)
+                                        /*!< in: nonzero if none of
+                                        the columns must be declared
+                                        NOT NULL */
+{
+        ulint   i;
+
+        if (dict_index_get_n_fields(index) < n_cols) {
+                return(false);
+        }
+
+        for (i= 0; i < n_cols; i++) {
+                dict_field_t*   field;
+                const char*     col_name;
+
+                field = dict_index_get_nth_field(index, i);
+
+                col_name = dict_table_get_col_name(
+                        table, dict_col_get_no(field->col));
+
+                if (field->prefix_len != 0) {
+                        /* We do not accept column prefix
+                        indexes here */
+
+                        break;
+                }
+
+                if (0 != innobase_strcasecmp(columns[i],
+                                             col_name)) {
+                        break;
+                }
+
+                if (check_null
+                    && (field->col->prtype & DATA_NOT_NULL)) {
+
+                        break;
+                }
+
+                if (types_idx && !cmp_cols_are_equal(
+                            dict_index_get_nth_col(index, i),
+                            dict_index_get_nth_col(types_idx,
+                                                   i),
+                            check_charsets)) {
+
+                        break;
+                }
+        }
+
+        return((i == n_cols) ? true : false);
+}
+
+/*********************************************************************//**
+Update the state of compression failure padding heuristics. This is
+called whenever a compression operation succeeds or fails.
+The caller must be holding info->mutex */
+static
+void
+dict_index_zip_pad_update(
+/*======================*/
+	zip_pad_info_t*	info,	/*<! in/out: info to be updated */
+	ulint	zip_threshold)	/*<! in: zip threshold value */
+{
+	ulint	total;
+	ulint	fail_pct;
+
+	ut_ad(info);
+
+	total = info->success + info->failure;
+
+	ut_ad(total > 0);
+
+	if(zip_threshold == 0) {
+		/* User has just disabled the padding. */
+		return;
+	}
+
+	if (total < ZIP_PAD_ROUND_LEN) {
+		/* We are in middle of a round. Do nothing. */
+		return;
+	}
+
+	/* We are at a 'round' boundary. Reset the values but first
+	calculate fail rate for our heuristic. */
+	fail_pct = (info->failure * 100) / total;
+	info->failure = 0;
+	info->success = 0;
+
+	if (fail_pct > zip_threshold) {
+		/* Compression failures are more then user defined
+		threshold. Increase the pad size to reduce chances of
+		compression failures. */
+		ut_ad(info->pad % ZIP_PAD_INCR == 0);
+
+		/* Only do increment if it won't increase padding
+		beyond max pad size. */
+		if (info->pad + ZIP_PAD_INCR
+		    < (UNIV_PAGE_SIZE * zip_pad_max) / 100) {
+#ifdef HAVE_ATOMIC_BUILTINS
+			/* Use atomics even though we have the mutex.
+			This is to ensure that we are able to read
+			info->pad atomically where atomics are
+			supported. */
+			os_atomic_increment_ulint(&info->pad, ZIP_PAD_INCR);
+#else /* HAVE_ATOMIC_BUILTINS */
+			info->pad += ZIP_PAD_INCR;
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+			MONITOR_INC(MONITOR_PAD_INCREMENTS);
+		}
+
+		info->n_rounds = 0;
+
+	} else {
+		/* Failure rate was OK. Another successful round
+		completed. */
+		++info->n_rounds;
+
+		/* If enough successful rounds are completed with
+		compression failure rate in control, decrease the
+		padding. */
+		if (info->n_rounds >= ZIP_PAD_SUCCESSFUL_ROUND_LIMIT
+		    && info->pad > 0) {
+
+			ut_ad(info->pad % ZIP_PAD_INCR == 0);
+#ifdef HAVE_ATOMIC_BUILTINS
+			/* Use atomics even though we have the mutex.
+			This is to ensure that we are able to read
+			info->pad atomically where atomics are
+			supported. */
+			os_atomic_decrement_ulint(&info->pad, ZIP_PAD_INCR);
+#else /* HAVE_ATOMIC_BUILTINS */
+			info->pad -= ZIP_PAD_INCR;
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+			info->n_rounds = 0;
+
+			MONITOR_INC(MONITOR_PAD_DECREMENTS);
+		}
+	}
+}
+
+/*********************************************************************//**
+This function should be called whenever a page is successfully
+compressed. Updates the compression padding information. */
+UNIV_INTERN
+void
+dict_index_zip_success(
+/*===================*/
+	dict_index_t*	index)	/*!< in/out: index to be updated. */
+{
+	ut_ad(index);
+
+	ulint zip_threshold = zip_failure_threshold_pct;
+	if (!zip_threshold) {
+		/* Disabled by user. */
+		return;
+	}
+
+	os_fast_mutex_lock(&index->zip_pad.mutex);
+	++index->zip_pad.success;
+	dict_index_zip_pad_update(&index->zip_pad, zip_threshold);
+	os_fast_mutex_unlock(&index->zip_pad.mutex);
+}
+
+/*********************************************************************//**
+This function should be called whenever a page compression attempt
+fails. Updates the compression padding information. */
+UNIV_INTERN
+void
+dict_index_zip_failure(
+/*===================*/
+	dict_index_t*	index)	/*!< in/out: index to be updated. */
+{
+	ut_ad(index);
+
+	ulint zip_threshold = zip_failure_threshold_pct;
+	if (!zip_threshold) {
+		/* Disabled by user. */
+		return;
+	}
+
+	os_fast_mutex_lock(&index->zip_pad.mutex);
+	++index->zip_pad.failure;
+	dict_index_zip_pad_update(&index->zip_pad, zip_threshold);
+	os_fast_mutex_unlock(&index->zip_pad.mutex);
+}
+
+
+/*********************************************************************//**
+Return the optimal page size, for which page will likely compress.
+@return page size beyond which page might not compress */
+UNIV_INTERN
+ulint
+dict_index_zip_pad_optimal_page_size(
+/*=================================*/
+	dict_index_t*	index)	/*!< in: index for which page size
+				is requested */
+{
+	ulint	pad;
+	ulint	min_sz;
+	ulint	sz;
+
+	ut_ad(index);
+
+	if (!zip_failure_threshold_pct) {
+		/* Disabled by user. */
+		return(UNIV_PAGE_SIZE);
+	}
+
+	/* We use atomics to read index->zip_pad.pad. Here we use zero
+	as increment as are not changing the value of the 'pad'. On
+	platforms where atomics are not available we grab the mutex. */
+
+#ifdef HAVE_ATOMIC_BUILTINS
+	pad = os_atomic_increment_ulint(&index->zip_pad.pad, 0);
+#else /* HAVE_ATOMIC_BUILTINS */
+	os_fast_mutex_lock(&index->zip_pad.mutex);
+	pad = index->zip_pad.pad;
+	os_fast_mutex_unlock(&index->zip_pad.mutex);
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+	ut_ad(pad < UNIV_PAGE_SIZE);
+	sz = UNIV_PAGE_SIZE - pad;
+
+	/* Min size allowed by user. */
+	ut_ad(zip_pad_max < 100);
+	min_sz = (UNIV_PAGE_SIZE * (100 - zip_pad_max)) / 100;
+
+	return(ut_max(sz, min_sz));
+}
+
+/*************************************************************//**
+Convert table flag to row format string.
+@return row format name. */
+UNIV_INTERN
+const char*
+dict_tf_to_row_format_string(
+/*=========================*/
+	ulint	table_flag)		/*!< in: row format setting */
+{
+	switch (dict_tf_get_rec_format(table_flag)) {
+	case REC_FORMAT_REDUNDANT:
+		return("ROW_TYPE_REDUNDANT");
+	case REC_FORMAT_COMPACT:
+		return("ROW_TYPE_COMPACT");
+	case REC_FORMAT_COMPRESSED:
+		return("ROW_TYPE_COMPRESSED");
+	case REC_FORMAT_DYNAMIC:
+		return("ROW_TYPE_DYNAMIC");
+	}
+
+	ut_error;
+	return(0);
+}
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
index ff93be3e76a..46d72786ac6 100644
--- a/storage/innobase/dict/dict0load.cc
+++ b/storage/innobase/dict/dict0load.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -41,18 +41,22 @@ Created 4/24/1996 Heikki Tuuri
 #include "rem0cmp.h"
 #include "srv0start.h"
 #include "srv0srv.h"
+#include "dict0crea.h"
 #include "dict0priv.h"
 #include "ha_prototypes.h" /* innobase_casedn_str() */
 #include "fts0priv.h"
 
-/** Following are six InnoDB system tables */
+/** Following are the InnoDB system tables. The positions in
+this array are referenced by enum dict_system_table_id. */
 static const char* SYSTEM_TABLE_NAME[] = {
 	"SYS_TABLES",
 	"SYS_INDEXES",
 	"SYS_COLUMNS",
 	"SYS_FIELDS",
 	"SYS_FOREIGN",
-	"SYS_FOREIGN_COLS"
+	"SYS_FOREIGN_COLS",
+	"SYS_TABLESPACES",
+	"SYS_DATAFILES"
 };
 
 /* If this flag is TRUE, then we will load the cluster index's (and tables')
@@ -183,7 +187,8 @@ dict_print(void)
 
 	os_increment_counter_by_amount(
 		server_mutex,
-		srv_fatal_semaphore_wait_threshold, 7200/*2 hours*/);
+		srv_fatal_semaphore_wait_threshold,
+		SRV_SEMAPHORE_WAIT_EXTENSION);
 
 	heap = mem_heap_create(1000);
 	mutex_enter(&(dict_sys->mutex));
@@ -196,13 +201,11 @@ dict_print(void)
 
 		err_msg = static_cast<const char*>(
 			dict_process_sys_tables_rec_and_mtr_commit(
-			heap, rec, &table,
-			static_cast<dict_table_info_t>(
-				DICT_TABLE_LOAD_FROM_CACHE
-				| DICT_TABLE_UPDATE_STATS), &mtr));
+				heap, rec, &table, DICT_TABLE_LOAD_FROM_CACHE,
+				&mtr));
 
 		if (!err_msg) {
-			dict_table_print_low(table);
+			dict_table_print(table);
 		} else {
 			ut_print_timestamp(stderr);
 			fprintf(stderr, "  InnoDB: %s\n", err_msg);
@@ -221,7 +224,8 @@ dict_print(void)
 	/* Restore the fatal semaphore wait timeout */
 	os_decrement_counter_by_amount(
 		server_mutex,
-		srv_fatal_semaphore_wait_threshold, 7200/*2 hours*/);
+		srv_fatal_semaphore_wait_threshold,
+		SRV_SEMAPHORE_WAIT_EXTENSION);
 }
 
 /********************************************************************//**
@@ -278,8 +282,8 @@ dict_startscan_system(
 
 	clust_index = UT_LIST_GET_FIRST(system_table->indexes);
 
-	btr_pcur_open_at_index_side(TRUE, clust_index, BTR_SEARCH_LEAF, pcur,
-				    TRUE, mtr);
+	btr_pcur_open_at_index_side(true, clust_index, BTR_SEARCH_LEAF, pcur,
+				    true, 0, mtr);
 
 	rec = dict_getnext_system_low(pcur, mtr);
 
@@ -307,6 +311,7 @@ dict_getnext_system(
 
 	return(rec);
 }
+
 /********************************************************************//**
 This function processes one SYS_TABLES record and populate the dict_table_t
 struct for the table. Extracted out of dict_print() to be used by
@@ -362,15 +367,6 @@ dict_process_sys_tables_rec_and_mtr_commit(
 		return(err_msg);
 	}
 
-	if ((status & DICT_TABLE_UPDATE_STATS)
-	    && dict_table_get_first_index(*table)) {
-
-		/* Update statistics member fields in *table if
-		DICT_TABLE_UPDATE_STATS is set */
-		ut_ad(mutex_own(&dict_sys->mutex));
-		dict_stats_update(*table, DICT_STATS_FETCH, TRUE);
-	}
-
 	return(NULL);
 }
 
@@ -401,6 +397,7 @@ dict_process_sys_indexes_rec(
 
 	return(err_msg);
 }
+
 /********************************************************************//**
 This function parses a SYS_COLUMNS record and populate a dict_column_t
 structure with the information from the record.
@@ -423,6 +420,7 @@ dict_process_sys_columns_rec(
 
 	return(err_msg);
 }
+
 /********************************************************************//**
 This function parses a SYS_FIELDS record and populates a dict_field_t
 structure with the information from the record.
@@ -475,7 +473,7 @@ dict_process_sys_foreign_rec(
 	const byte*	field;
 	ulint		n_fields_and_type;
 
-	if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, 0))) {
+	if (rec_get_deleted_flag(rec, 0)) {
 		return("delete-marked record in SYS_FOREIGN");
 	}
 
@@ -485,7 +483,7 @@ dict_process_sys_foreign_rec(
 
 	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_FOREIGN__ID, &len);
-	if (UNIV_UNLIKELY(len < 1 || len == UNIV_SQL_NULL)) {
+	if (len == 0 || len == UNIV_SQL_NULL) {
 err_len:
 		return("incorrect column length in SYS_FOREIGN");
 	}
@@ -512,7 +510,7 @@ err_len:
 
 	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_FOREIGN__FOR_NAME, &len);
-	if (len < 1 || len == UNIV_SQL_NULL) {
+	if (len == 0 || len == UNIV_SQL_NULL) {
 		goto err_len;
 	}
 	foreign->foreign_table_name = mem_heap_strdupl(
@@ -520,7 +518,7 @@ err_len:
 
 	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_FOREIGN__REF_NAME, &len);
-	if (len < 1 || len == UNIV_SQL_NULL) {
+	if (len == 0 || len == UNIV_SQL_NULL) {
 		goto err_len;
 	}
 	foreign->referenced_table_name = mem_heap_strdupl(
@@ -568,7 +566,7 @@ dict_process_sys_foreign_col_rec(
 
 	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_FOREIGN_COLS__ID, &len);
-	if (len < 1 || len == UNIV_SQL_NULL) {
+	if (len == 0 || len == UNIV_SQL_NULL) {
 err_len:
 		return("incorrect column length in SYS_FOREIGN_COLS");
 	}
@@ -594,14 +592,14 @@ err_len:
 
 	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME, &len);
-	if (len < 1 || len == UNIV_SQL_NULL) {
+	if (len == 0 || len == UNIV_SQL_NULL) {
 		goto err_len;
 	}
 	*for_col_name = mem_heap_strdupl(heap, (char*) field, len);
 
 	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME, &len);
-	if (len < 1 || len == UNIV_SQL_NULL) {
+	if (len == 0 || len == UNIV_SQL_NULL) {
 		goto err_len;
 	}
 	*ref_col_name = mem_heap_strdupl(heap, (char*) field, len);
@@ -610,6 +608,127 @@ err_len:
 }
 
 /********************************************************************//**
+This function parses a SYS_TABLESPACES record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_tablespaces(
+/*=========================*/
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	const rec_t*	rec,		/*!< in: current SYS_TABLESPACES rec */
+	ulint*		space,		/*!< out: space id */
+	const char**	name,		/*!< out: tablespace name */
+	ulint*		flags)		/*!< out: tablespace flags */
+{
+	ulint		len;
+	const byte*	field;
+
+	/* Initialize the output values */
+	*space = ULINT_UNDEFINED;
+	*name = NULL;
+	*flags = ULINT_UNDEFINED;
+
+	if (rec_get_deleted_flag(rec, 0)) {
+		return("delete-marked record in SYS_TABLESPACES");
+	}
+
+	if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLESPACES) {
+		return("wrong number of columns in SYS_TABLESPACES record");
+	}
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_TABLESPACES__SPACE, &len);
+	if (len != DICT_FLD_LEN_SPACE) {
+err_len:
+		return("incorrect column length in SYS_TABLESPACES");
+	}
+	*space = mach_read_from_4(field);
+
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_TABLESPACES__DB_TRX_ID, &len);
+	if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+		goto err_len;
+	}
+
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_TABLESPACES__DB_ROLL_PTR, &len);
+	if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+		goto err_len;
+	}
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_TABLESPACES__NAME, &len);
+	if (len == 0 || len == UNIV_SQL_NULL) {
+		goto err_len;
+	}
+	*name = mem_heap_strdupl(heap, (char*) field, len);
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_TABLESPACES__FLAGS, &len);
+	if (len != DICT_FLD_LEN_FLAGS) {
+		goto err_len;
+	}
+	*flags = mach_read_from_4(field);
+
+	return(NULL);
+}
+
+/********************************************************************//**
+This function parses a SYS_DATAFILES record, extracts necessary
+information from the record and returns it to the caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_datafiles(
+/*=======================*/
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	const rec_t*	rec,		/*!< in: current SYS_DATAFILES rec */
+	ulint*		space,		/*!< out: space id */
+	const char**	path)		/*!< out: datafile paths */
+{
+	ulint		len;
+	const byte*	field;
+
+	if (rec_get_deleted_flag(rec, 0)) {
+		return("delete-marked record in SYS_DATAFILES");
+	}
+
+	if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_DATAFILES) {
+		return("wrong number of columns in SYS_DATAFILES record");
+	}
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_DATAFILES__SPACE, &len);
+	if (len != DICT_FLD_LEN_SPACE) {
+err_len:
+		return("incorrect column length in SYS_DATAFILES");
+	}
+	*space = mach_read_from_4(field);
+
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_DATAFILES__DB_TRX_ID, &len);
+	if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+		goto err_len;
+	}
+
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_DATAFILES__DB_ROLL_PTR, &len);
+	if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+		goto err_len;
+	}
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_DATAFILES__PATH, &len);
+	if (len == 0 || len == UNIV_SQL_NULL) {
+		goto err_len;
+	}
+	*path = mem_heap_strdupl(heap, (char*) field, len);
+
+	return(NULL);
+}
+
+/********************************************************************//**
 Determine the flags of a table as stored in SYS_TABLES.TYPE and N_COLS.
 @return  ULINT_UNDEFINED if error, else a valid dict_table_t::flags. */
 static
@@ -629,11 +748,9 @@ dict_sys_tables_get_flags(
 	ut_a(len == 4);
 	type = mach_read_from_4(field);
 
-	/* The low order bit of SYS_TABLES.TYPE is always set to 1. If no
-	other bits are used, that is defined as SYS_TABLE_TYPE_ANTELOPE.
-	But in dict_table_t::flags the low order bit is used to determine
-	if the row format is Redundant or Compact when the format is
-	Antelope.
+	/* The low order bit of SYS_TABLES.TYPE is always set to 1. But in
+	dict_table_t::flags the low order bit is used to determine if the
+	row format is Redundant or Compact when the format is Antelope.
 	Read the 4 byte N_COLS field and look at the high order bit.  It
 	should be set for COMPACT and later.  It should not be set for
 	REDUNDANT. */
@@ -645,10 +762,193 @@ dict_sys_tables_get_flags(
 	/* This validation function also combines the DICT_N_COLS_COMPACT
 	flag in n_cols into the type field to effectively make it a
 	dict_table_t::flags. */
-	return(dict_sys_tables_type_validate(type, n_cols));
+
+	if (ULINT_UNDEFINED == dict_sys_tables_type_validate(type, n_cols)) {
+		return(ULINT_UNDEFINED);
+	}
+
+	return(dict_sys_tables_type_to_tf(type, n_cols));
 }
 
 /********************************************************************//**
+Gets the filepath for a spaceid from SYS_DATAFILES and checks it against
+the contents of a link file. This function is called when there is no
+fil_node_t entry for this space ID so both durable locations on  disk
+must be checked and compared.
+We use a temporary heap here for the table lookup, but not for the path
+returned which the caller must free.
+This function can return NULL if the space ID is not found in SYS_DATAFILES,
+then the caller will assume that the ibd file is in the normal datadir.
+@return	own: A copy of the first datafile found in SYS_DATAFILES.PATH for
+the given space ID. NULL if space ID is zero or not found. */
+UNIV_INTERN
+char*
+dict_get_first_path(
+/*================*/
+	ulint		space,	/*!< in: space id */
+	const char*	name)	/*!< in: tablespace name */
+{
+	mtr_t		mtr;
+	dict_table_t*	sys_datafiles;
+	dict_index_t*	sys_index;
+	dtuple_t*	tuple;
+	dfield_t*	dfield;
+	byte*		buf;
+	btr_pcur_t	pcur;
+	const rec_t*	rec;
+	const byte*	field;
+	ulint		len;
+	char*		dict_filepath = NULL;
+	mem_heap_t*	heap = mem_heap_create(1024);
+
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+
+	mtr_start(&mtr);
+
+	sys_datafiles = dict_table_get_low("SYS_DATAFILES");
+	sys_index = UT_LIST_GET_FIRST(sys_datafiles->indexes);
+	ut_ad(!dict_table_is_comp(sys_datafiles));
+	ut_ad(name_of_col_is(sys_datafiles, sys_index,
+			     DICT_FLD__SYS_DATAFILES__SPACE, "SPACE"));
+	ut_ad(name_of_col_is(sys_datafiles, sys_index,
+			     DICT_FLD__SYS_DATAFILES__PATH, "PATH"));
+
+	tuple = dtuple_create(heap, 1);
+	dfield = dtuple_get_nth_field(tuple, DICT_FLD__SYS_DATAFILES__SPACE);
+
+	buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
+	mach_write_to_4(buf, space);
+
+	dfield_set_data(dfield, buf, 4);
+	dict_index_copy_types(tuple, sys_index, 1);
+
+	btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+				  BTR_SEARCH_LEAF, &pcur, &mtr);
+
+	rec = btr_pcur_get_rec(&pcur);
+
+	/* If the file-per-table tablespace was created with
+	an earlier version of InnoDB, then this record is not
+	in SYS_DATAFILES.  But a link file still might exist. */
+
+	if (btr_pcur_is_on_user_rec(&pcur)) {
+		/* A record for this space ID was found. */
+		field = rec_get_nth_field_old(
+			rec, DICT_FLD__SYS_DATAFILES__PATH, &len);
+		ut_a(len > 0 || len == UNIV_SQL_NULL);
+		ut_a(len < OS_FILE_MAX_PATH);
+		dict_filepath = mem_strdupl((char*) field, len);
+		ut_a(dict_filepath);
+	}
+
+	btr_pcur_close(&pcur);
+	mtr_commit(&mtr);
+	mem_heap_free(heap);
+
+	return(dict_filepath);
+}
+
+/********************************************************************//**
+Update the record for space_id in SYS_TABLESPACES to this filepath.
+@return	DB_SUCCESS if OK, dberr_t if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_update_filepath(
+/*=================*/
+	ulint		space_id,	/*!< in: space id */
+	const char*	filepath)	/*!< in: filepath */
+{
+	dberr_t		err = DB_SUCCESS;
+	trx_t*		trx;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+
+	trx = trx_allocate_for_background();
+	trx->op_info = "update filepath";
+	trx->dict_operation_lock_mode = RW_X_LATCH;
+	trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+	pars_info_t*	info = pars_info_create();
+
+	pars_info_add_int4_literal(info, "space", space_id);
+	pars_info_add_str_literal(info, "path", filepath);
+
+	err = que_eval_sql(info,
+			   "PROCEDURE UPDATE_FILEPATH () IS\n"
+			   "BEGIN\n"
+			   "UPDATE SYS_DATAFILES"
+			   " SET PATH = :path\n"
+			   " WHERE SPACE = :space;\n"
+			   "END;\n", FALSE, trx);
+
+	trx_commit_for_mysql(trx);
+	trx->dict_operation_lock_mode = 0;
+	trx_free_for_background(trx);
+
+	if (err == DB_SUCCESS) {
+		/* We just updated SYS_DATAFILES due to the contents in
+		a link file.  Make a note that we did this. */
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"The InnoDB data dictionary table SYS_DATAFILES "
+			"for tablespace ID %lu was updated to use file %s.",
+			(ulong) space_id, filepath);
+	} else {
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Problem updating InnoDB data dictionary table "
+			"SYS_DATAFILES for tablespace ID %lu to file %s.",
+			(ulong) space_id, filepath);
+	}
+
+	return(err);
+}
+
+/********************************************************************//**
+Insert records into SYS_TABLESPACES and SYS_DATAFILES.
+@return	DB_SUCCESS if OK, dberr_t if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_insert_tablespace_and_filepath(
+/*================================*/
+	ulint		space,		/*!< in: space id */
+	const char*	name,		/*!< in: talespace name */
+	const char*	filepath,	/*!< in: filepath */
+	ulint		fsp_flags)	/*!< in: tablespace flags */
+{
+	dberr_t		err = DB_SUCCESS;
+	trx_t*		trx;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(filepath);
+
+	trx = trx_allocate_for_background();
+	trx->op_info = "insert tablespace and filepath";
+	trx->dict_operation_lock_mode = RW_X_LATCH;
+	trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+	/* A record for this space ID was not found in
+	SYS_DATAFILES. Assume the record is also missing in
+	SYS_TABLESPACES.  Insert records onto them both. */
+	err = dict_create_add_tablespace_to_dictionary(
+		space, name, fsp_flags, filepath, trx, false);
+
+	trx_commit_for_mysql(trx);
+	trx->dict_operation_lock_mode = 0;
+	trx_free_for_background(trx);
+
+	return(err);
+}
+
+/********************************************************************//**
+This function looks at each table defined in SYS_TABLES.  It checks the
+tablespace for any table with a space_id > 0.  It looks up the tablespace
+in SYS_DATAFILES to ensure the correct path.
+
 In a crash recovery we already have all the tablespace objects created.
 This function compares the space id information in the InnoDB data dictionary
 to what we already read with fil_load_single_table_tablespaces().
@@ -669,6 +969,7 @@ dict_check_tablespaces_and_store_max_id(
 	ulint		max_space_id;
 	mtr_t		mtr;
 
+	rw_lock_x_lock(&dict_operation_lock);
 	mutex_enter(&(dict_sys->mutex));
 
 	mtr_start(&mtr);
@@ -682,8 +983,8 @@ dict_check_tablespaces_and_store_max_id(
 				      MLOG_4BYTES, &mtr);
 	fil_set_max_space_id_if_bigger(max_space_id);
 
-	btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur,
-				    TRUE, &mtr);
+	btr_pcur_open_at_index_side(true, sys_index, BTR_SEARCH_LEAF, &pcur,
+				    true, 0, &mtr);
 loop:
 	btr_pcur_move_to_next_user_rec(&pcur, &mtr);
 
@@ -703,6 +1004,7 @@ loop:
 		fil_set_max_space_id_if_bigger(max_space_id);
 
 		mutex_exit(&(dict_sys->mutex));
+		rw_lock_x_unlock(&dict_operation_lock);
 
 		return;
 	}
@@ -718,8 +1020,14 @@ loop:
 
 		field = rec_get_nth_field_old(
 			rec, DICT_FLD__SYS_TABLES__NAME, &len);
+
 		name = mem_strdupl((char*) field, len);
 
+		char	table_name[MAX_FULL_NAME_LEN + 1];
+
+		innobase_format_name(
+			table_name, sizeof(table_name), name, FALSE);
+
 		flags = dict_sys_tables_get_flags(rec);
 		if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
 			/* Read again the 4 bytes from rec. */
@@ -728,13 +1036,9 @@ loop:
 			ut_ad(len == 4); /* this was checked earlier */
 			flags = mach_read_from_4(field);
 
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: Error: table ", stderr);
-			ut_print_filename(stderr, name);
-			fprintf(stderr, "\n"
-				"InnoDB: in InnoDB data dictionary"
-				" has unknown type %lx.\n",
-				(ulong) flags);
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Table '%s' in InnoDB data dictionary"
+				" has unknown type %lx", table_name, flags);
 
 			goto loop;
 		}
@@ -749,43 +1053,84 @@ loop:
 
 		mtr_commit(&mtr);
 
+		/* For tables created with old versions of InnoDB,
+		SYS_TABLES.MIX_LEN may contain garbage.  Such tables
+		would always be in ROW_FORMAT=REDUNDANT. Pretend that
+		all such tables are non-temporary. That is, do not
+		suppress error printouts about temporary or discarded
+		tablespaces not being found. */
+
+		field = rec_get_nth_field_old(
+			rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
+
+		bool		is_temp = false;
+		bool		discarded = false;
+		ib_uint32_t	flags2 = mach_read_from_4(field);
+
+		/* Check that the tablespace (the .ibd file) really
+		exists; print a warning to the .err log if not.
+		Do not print warnings for temporary tables or for
+		tablespaces that have been discarded. */
+
+		field = rec_get_nth_field_old(
+			rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
+
+		/* MIX_LEN valid only for ROW_FORMAT > REDUNDANT. */
+		if (mach_read_from_4(field) & DICT_N_COLS_COMPACT) {
+
+			is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
+			discarded = !!(flags2 & DICT_TF2_DISCARDED);
+		}
+
 		if (space_id == 0) {
 			/* The system tablespace always exists. */
+			ut_ad(!discarded);
 		} else if (in_crash_recovery) {
-			/* Check that the tablespace (the .ibd file) really
-			exists; print a warning to the .err log if not.
-			Do not print warnings for temporary tables. */
-			ibool	is_temp;
+			/* All tablespaces should have been found in
+			fil_load_single_table_tablespaces(). */
 
-			field = rec_get_nth_field_old(
-				rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
-			if (mach_read_from_4(field) & DICT_N_COLS_COMPACT) {
-				/* ROW_FORMAT=COMPACT: read the is_temp
-				flag from SYS_TABLES.MIX_LEN. */
-				field = rec_get_nth_field_old(
-					rec, 7/*MIX_LEN*/, &len);
-				is_temp = !!(mach_read_from_4(field)
-					     & DICT_TF2_TEMPORARY);
-			} else {
-				/* For tables created with old versions
-				of InnoDB, SYS_TABLES.MIX_LEN may contain
-				garbage.  Such tables would always be
-				in ROW_FORMAT=REDUNDANT.  Pretend that
-				all such tables are non-temporary.  That is,
-				do not suppress error printouts about
-				temporary tables not being found. */
-				is_temp = FALSE;
+			fil_space_for_table_exists_in_mem(
+				space_id, name, TRUE, !(is_temp || discarded),
+				false, NULL, 0);
+
+		} else if (!discarded) {
+
+			/* It is a normal database startup: create the
+			space object and check that the .ibd file exists.
+			If the table uses a remote tablespace, look for the
+			space_id in SYS_DATAFILES to find the filepath */
+
+			/* Use the remote filepath if known. */
+			char*	filepath = NULL;
+			if (DICT_TF_HAS_DATA_DIR(flags)) {
+				filepath = dict_get_first_path(
+					space_id, name);
 			}
 
-			fil_space_for_table_exists_in_mem(
-				space_id, name, TRUE, !is_temp);
-		} else {
-			/* It is a normal database startup: create the space
-			object and check that the .ibd file exists. */
+			/* We set the 2nd param (fix_dict = true)
+			here because we already have an x-lock on
+			dict_operation_lock and dict_sys->mutex. Besides,
+			this is at startup and we are now single threaded.
+			If the filepath is not known, it will need to
+			be discovered. */
+			dberr_t	err = fil_open_single_table_tablespace(
+				false, srv_read_only_mode ? false : true,
+				space_id, dict_tf_to_fsp_flags(flags),
+				name, filepath);
+
+			if (err != DB_SUCCESS) {
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Tablespace open failed for '%s', "
+					"ignored.", table_name);
+			}
 
-			fil_open_single_table_tablespace(
-				FALSE, space_id,
-				dict_tf_to_fsp_flags(flags), name);
+			if (filepath) {
+				mem_free(filepath);
+			}
+		} else {
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"DISCARD flag set for table '%s', ignored.",
+				table_name);
 		}
 
 		mem_free(name);
@@ -879,7 +1224,7 @@ err_len:
 
 	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_COLUMNS__NAME, &len);
-	if (len < 1 || len == UNIV_SQL_NULL) {
+	if (len == 0 || len == UNIV_SQL_NULL) {
 		goto err_len;
 	}
 
@@ -1003,6 +1348,11 @@ dict_load_columns(
 		err_msg = dict_load_column_low(table, heap, NULL, NULL,
 					       &name, rec);
 
+		if (err_msg) {
+			fprintf(stderr, "InnoDB: %s\n", err_msg);
+			ut_error;
+		}
+
 		/* Note: Currently we have one DOC_ID column that is
 		shared by all FTS indexes on a table. */
 		if (innobase_strcasecmp(name,
@@ -1037,11 +1387,6 @@ dict_load_columns(
 			table->fts->doc_col = i;
 		}
 
-		if (err_msg) {
-			fprintf(stderr, "InnoDB: %s\n", err_msg);
-			ut_error;
-		}
-
 		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
 	}
 
@@ -1154,7 +1499,7 @@ err_len:
 
 	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_FIELDS__COL_NAME, &len);
-	if (len < 1 || len == UNIV_SQL_NULL) {
+	if (len == 0 || len == UNIV_SQL_NULL) {
 		goto err_len;
 	}
 
@@ -1194,7 +1539,7 @@ dict_load_fields(
 	byte*		buf;
 	ulint		i;
 	mtr_t		mtr;
-	ulint		error;
+	dberr_t		error;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
@@ -1394,8 +1739,8 @@ Loads definitions for table indexes. Adds them to the data dictionary
 cache.
 @return DB_SUCCESS if ok, DB_CORRUPTION if corruption of dictionary
 table or DB_UNSUPPORTED if table has unknown index type */
-static
-ulint
+static __attribute__((nonnull))
+dberr_t
 dict_load_indexes(
 /*==============*/
 	dict_table_t*	table,	/*!< in/out: table */
@@ -1412,7 +1757,7 @@ dict_load_indexes(
 	const rec_t*	rec;
 	byte*		buf;
 	mtr_t		mtr;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
@@ -1443,6 +1788,21 @@ dict_load_indexes(
 
 		if (!btr_pcur_is_on_user_rec(&pcur)) {
 
+			/* We should allow the table to open even
+			without index when DICT_ERR_IGNORE_CORRUPT is set.
+			DICT_ERR_IGNORE_CORRUPT is currently only set
+			for drop table */
+			if (dict_table_get_first_index(table) == NULL
+			    && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) {
+				ib_logf(IB_LOG_LEVEL_WARN,
+					"Cannot load table %s "
+					"because it has no indexes in "
+					"InnoDB internal data dictionary.",
+					table->name);
+				error = DB_CORRUPTION;
+				goto func_exit;
+			}
+
 			break;
 		}
 
@@ -1456,6 +1816,20 @@ dict_load_indexes(
 		if (err_msg == dict_load_index_id_err) {
 			/* TABLE_ID mismatch means that we have
 			run out of index definitions for the table. */
+
+			if (dict_table_get_first_index(table) == NULL
+			    && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) {
+				ib_logf(IB_LOG_LEVEL_WARN,
+					"Failed to load the "
+					"clustered index for table %s "
+					"because of the following error: %s. "
+					"Refusing to load the rest of the "
+					"indexes (if any) and the whole table "
+					"altogether.", table->name, err_msg);
+				error = DB_CORRUPTION;
+				goto func_exit;
+			}
+
 			break;
 		} else if (err_msg == dict_load_index_del) {
 			/* Skip delete-marked records. */
@@ -1510,15 +1884,15 @@ dict_load_indexes(
 		subsequent checks are relevant for the supported types. */
 		if (index->type & ~(DICT_CLUSTERED | DICT_UNIQUE
 				    | DICT_CORRUPT | DICT_FTS)) {
-			fprintf(stderr,
-				"InnoDB: Error: unknown type %lu"
-				" of index %s of table %s\n",
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Unknown type %lu of index %s of table %s",
 				(ulong) index->type, index->name, table->name);
 
 			error = DB_UNSUPPORTED;
 			dict_mem_index_free(index);
 			goto func_exit;
 		} else if (index->page == FIL_NULL
+			   && !table->ibd_file_missing
 			   && (!(index->type & DICT_FTS))) {
 
 			fprintf(stderr,
@@ -1560,7 +1934,7 @@ corrupted:
 			      " is not clustered!\n", stderr);
 
 			goto corrupted;
-		} else if (table->id < DICT_HDR_FIRST_ID
+		} else if (dict_is_sys_table(table->id)
 			   && (dict_index_is_clust(index)
 			       || ((table == dict_sys->sys_tables)
 				   && !strcmp("ID_IND", index->name)))) {
@@ -1570,8 +1944,10 @@ corrupted:
 			dict_mem_index_free(index);
 		} else {
 			dict_load_fields(index, heap);
-			error = dict_index_add_to_cache(table, index,
-							index->page, FALSE);
+
+			error = dict_index_add_to_cache(
+				table, index, index->page, FALSE);
+
 			/* The data dictionary tables should never contain
 			invalid index definitions.  If we ignored this error
 			and simply did not load this index definition, the
@@ -1629,7 +2005,7 @@ dict_load_table_low(
 
 	rec_get_nth_field_offs_old(
 		rec, DICT_FLD__SYS_TABLES__NAME, &len);
-	if (len < 1 || len == UNIV_SQL_NULL) {
+	if (len == 0 || len == UNIV_SQL_NULL) {
 err_len:
 		return("incorrect column length in SYS_TABLES");
 	}
@@ -1751,6 +2127,77 @@ err_len:
 }
 
 /********************************************************************//**
+Using the table->heap, copy the null-terminated filepath into
+table->data_dir_path and replace the 'databasename/tablename.ibd'
+portion with 'tablename'.
+This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path.
+Make this data directory path only if it has not yet been saved. */
+UNIV_INTERN
+void
+dict_save_data_dir_path(
+/*====================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	char*		filepath)	/*!< in: filepath of tablespace */
+{
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_a(DICT_TF_HAS_DATA_DIR(table->flags));
+
+	ut_a(!table->data_dir_path);
+	ut_a(filepath);
+
+	/* Be sure this filepath is not the default filepath. */
+	char*	default_filepath = fil_make_ibd_name(table->name, false);
+	if (strcmp(filepath, default_filepath)) {
+		ulint pathlen = strlen(filepath);
+		ut_a(pathlen < OS_FILE_MAX_PATH);
+		ut_a(0 == strcmp(filepath + pathlen - 4, ".ibd"));
+
+		table->data_dir_path = mem_heap_strdup(table->heap, filepath);
+		os_file_make_data_dir_path(table->data_dir_path);
+	} else {
+		/* This does not change SYS_DATAFILES or SYS_TABLES
+		or FSP_FLAGS on the header page of the tablespace,
+		but it makes dict_table_t consistent */
+		table->flags &= ~DICT_TF_MASK_DATA_DIR;
+	}
+	mem_free(default_filepath);
+}
+
+/*****************************************************************//**
+Make sure the data_file_name is saved in dict_table_t if needed. Try to
+read it from the file dictionary first, then from SYS_DATAFILES. */
+UNIV_INTERN
+void
+dict_get_and_save_data_dir_path(
+/*============================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	bool		dict_mutex_own)	/*!< in: true if dict_sys->mutex
+					is owned already */
+{
+	if (DICT_TF_HAS_DATA_DIR(table->flags)
+	    && (!table->data_dir_path)) {
+		char*	path = fil_space_get_first_path(table->space);
+
+		if (!dict_mutex_own) {
+			dict_mutex_enter_for_mysql();
+		}
+		if (!path) {
+			path = dict_get_first_path(
+				table->space, table->name);
+		}
+
+		if (path) {
+			dict_save_data_dir_path(table, path);
+			mem_free(path);
+		}
+
+		if (!dict_mutex_own) {
+			dict_mutex_exit_for_mysql();
+		}
+	}
+}
+
+/********************************************************************//**
 Loads a table definition and also all its index definitions, and also
 the cluster definition if the table is a member in a cluster. Also loads
 all foreign key constraints where the foreign key is in the table or where
@@ -1770,6 +2217,7 @@ dict_load_table(
 				/*!< in: error to be ignored when loading
 				table and its indexes' definition */
 {
+	dberr_t		err;
 	dict_table_t*	table;
 	dict_table_t*	sys_tables;
 	btr_pcur_t	pcur;
@@ -1780,7 +2228,7 @@ dict_load_table(
 	const rec_t*	rec;
 	const byte*	field;
 	ulint		len;
-	ulint		err;
+	char*		filepath = NULL;
 	const char*	err_msg;
 	mtr_t		mtr;
 
@@ -1843,39 +2291,71 @@ err_exit:
 		goto err_exit;
 	}
 
+	char	table_name[MAX_FULL_NAME_LEN + 1];
+
+	innobase_format_name(table_name, sizeof(table_name), name, FALSE);
+
+	btr_pcur_close(&pcur);
+	mtr_commit(&mtr);
+
 	if (table->space == 0) {
 		/* The system tablespace is always available. */
+	} else if (table->flags2 & DICT_TF2_DISCARDED) {
+
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Table '%s' tablespace is set as discarded.",
+			table_name);
+
+		table->ibd_file_missing = TRUE;
+
 	} else if (!fil_space_for_table_exists_in_mem(
-			table->space, name, FALSE, FALSE)) {
+			table->space, name, FALSE, FALSE, true, heap,
+			table->id)) {
 
-		if (table->flags2 & DICT_TF2_TEMPORARY) {
+		if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) {
 			/* Do not bother to retry opening temporary tables. */
 			table->ibd_file_missing = TRUE;
+
 		} else {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: error: space object of table ");
-			ut_print_filename(stderr, name);
-			fprintf(stderr, ",\n"
-				"InnoDB: space id %lu did not exist in memory."
-				" Retrying an open.\n",
-				(ulong) table->space);
-			/* Try to open the tablespace */
-			if (!fil_open_single_table_tablespace(
-				TRUE, table->space,
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Failed to find tablespace for table '%s' "
+				"in the cache. Attempting to load the "
+				"tablespace with space id %lu.",
+				table_name, (ulong) table->space);
+
+			/* Use the remote filepath if needed. */
+			if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+				/* This needs to be added to the table
+				from SYS_DATAFILES */
+				dict_get_and_save_data_dir_path(table, true);
+
+				if (table->data_dir_path) {
+					filepath = os_file_make_remote_pathname(
+						table->data_dir_path,
+						table->name, "ibd");
+				}
+			}
+
+			/* Try to open the tablespace.  We set the
+			2nd param (fix_dict = false) here because we
+			do not have an x-lock on dict_operation_lock */
+			err = fil_open_single_table_tablespace(
+				true, false, table->space,
 				dict_tf_to_fsp_flags(table->flags),
-				name)) {
+				name, filepath);
+
+			if (err != DB_SUCCESS) {
 				/* We failed to find a sensible
 				tablespace file */
 
 				table->ibd_file_missing = TRUE;
 			}
+			if (filepath) {
+				mem_free(filepath);
+			}
 		}
 	}
 
-	btr_pcur_close(&pcur);
-	mtr_commit(&mtr);
-
 	dict_load_columns(table, heap);
 
 	if (cached) {
@@ -1886,7 +2366,15 @@ err_exit:
 
 	mem_heap_empty(heap);
 
-	err = dict_load_indexes(table, heap, ignore_err);
+	/* If there is no tablespace for the table then we only need to
+	load the index definitions. So that we can IMPORT the tablespace
+	later. */
+	if (table->ibd_file_missing) {
+		err = dict_load_indexes(
+			table, heap, DICT_ERR_IGNORE_ALL);
+	} else {
+		err = dict_load_indexes(table, heap, ignore_err);
+	}
 
 	if (err == DB_INDEX_CORRUPT) {
 		/* Refuse to load the table if the table has a corrupted
@@ -1920,7 +2408,8 @@ err_exit:
 	of the error condition, since the user may want to dump data from the
 	clustered index. However we load the foreign key information only if
 	all indexes were loaded. */
-	if (!cached) {
+	if (!cached || table->ibd_file_missing) {
+		/* Don't attempt to load the indexes from disk. */
 	} else if (err == DB_SUCCESS) {
 		err = dict_load_foreigns(table->name, TRUE, TRUE);
 
@@ -1937,11 +2426,15 @@ err_exit:
 		Otherwise refuse to load the table */
 		index = dict_table_get_first_index(table);
 
-		if (!srv_force_recovery || !index
+		if (!srv_force_recovery
+		    || !index
 		    || !dict_index_is_clust(index)) {
+
 			dict_table_remove_from_cache(table);
 			table = NULL;
-		} else if (dict_index_is_corrupted(index)) {
+
+		} else if (dict_index_is_corrupted(index)
+			   && !table->ibd_file_missing) {
 
 			/* It is possible we force to load a corrupted
 			clustered index if srv_load_corrupted is set.
@@ -1949,36 +2442,28 @@ err_exit:
 			table->corrupted = TRUE;
 		}
 	}
-#if 0
-	if (err != DB_SUCCESS && table != NULL) {
 
-		mutex_enter(&dict_foreign_err_mutex);
-
-		ut_print_timestamp(stderr);
-
-		fprintf(stderr,
-			"  InnoDB: Error: could not make a foreign key"
-			" definition to match\n"
-			"InnoDB: the foreign key table"
-			" or the referenced table!\n"
-			"InnoDB: The data dictionary of InnoDB is corrupt."
-			" You may need to drop\n"
-			"InnoDB: and recreate the foreign key table"
-			" or the referenced table.\n"
-			"InnoDB: Submit a detailed bug report"
-			" to http://bugs.mysql.com\n"
-			"InnoDB: Latest foreign key error printout:\n%s\n",
-			dict_foreign_err_buf);
-
-		mutex_exit(&dict_foreign_err_mutex);
-	}
-#endif /* 0 */
 func_exit:
 	mem_heap_free(heap);
 
-	ut_ad(!table || ignore_err != DICT_ERR_IGNORE_NONE
+	ut_ad(!table
+	      || ignore_err != DICT_ERR_IGNORE_NONE
+	      || table->ibd_file_missing
 	      || !table->corrupted);
 
+	if (table && table->fts) {
+		if (!(dict_table_has_fts_index(table)
+		      || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
+		      || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID))) {
+			/* the table->fts could be created in dict_load_column
+			when a user defined FTS_DOC_ID is present, but no
+			FTS */
+			fts_free(table);
+		} else {
+			fts_optimize_add_table(table);
+		}
+	}
+
 	return(table);
 }
 
@@ -2019,6 +2504,7 @@ dict_load_table_on_id(
 	sys_table_ids = dict_table_get_next_index(
 		dict_table_get_first_index(sys_tables));
 	ut_ad(!dict_table_is_comp(sys_tables));
+	ut_ad(!dict_index_is_clust(sys_table_ids));
 	heap = mem_heap_create(256);
 
 	tuple  = dtuple_create(heap, 1);
@@ -2099,15 +2585,20 @@ dict_load_sys_table(
 }
 
 /********************************************************************//**
-Loads foreign key constraint col names (also for the referenced table). */
+Loads foreign key constraint col names (also for the referenced table).
+Members that must be set (and valid) in foreign:
+foreign->heap
+foreign->n_fields
+foreign->id ('\0'-terminated)
+Members that will be created and set by this function:
+foreign->foreign_col_names[i]
+foreign->referenced_col_names[i]
+(for i=0..foreign->n_fields-1) */
 static
 void
 dict_load_foreign_cols(
 /*===================*/
-	const char*	id,	/*!< in: foreign constraint id, not
-				necessary '\0'-terminated */
-	ulint		id_len,	/*!< in: id length */
-	dict_foreign_t*	foreign)/*!< in: foreign constraint object */
+	dict_foreign_t*	foreign)/*!< in/out: foreign constraint object */
 {
 	dict_table_t*	sys_foreign_cols;
 	dict_index_t*	sys_index;
@@ -2119,9 +2610,12 @@ dict_load_foreign_cols(
 	ulint		len;
 	ulint		i;
 	mtr_t		mtr;
+	size_t		id_len;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
+	id_len = strlen(foreign->id);
+
 	foreign->foreign_col_names = static_cast<const char**>(
 		mem_heap_alloc(foreign->heap,
 			       foreign->n_fields * sizeof(void*)));
@@ -2140,7 +2634,7 @@ dict_load_foreign_cols(
 	tuple = dtuple_create(foreign->heap, 1);
 	dfield = dtuple_get_nth_field(tuple, 0);
 
-	dfield_set_data(dfield, id, id_len);
+	dfield_set_data(dfield, foreign->id, id_len);
 	dict_index_copy_types(tuple, sys_index, 1);
 
 	btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
@@ -2154,8 +2648,42 @@ dict_load_foreign_cols(
 
 		field = rec_get_nth_field_old(
 			rec, DICT_FLD__SYS_FOREIGN_COLS__ID, &len);
-		ut_a(len == id_len);
-		ut_a(ut_memcmp(id, field, len) == 0);
+
+		if (len != id_len || ut_memcmp(foreign->id, field, len) != 0) {
+			const rec_t*	pos;
+			ulint		pos_len;
+			const rec_t*	for_col_name;
+			ulint		for_col_name_len;
+			const rec_t*	ref_col_name;
+			ulint		ref_col_name_len;
+
+			pos = rec_get_nth_field_old(
+				rec, DICT_FLD__SYS_FOREIGN_COLS__POS,
+				&pos_len);
+
+			for_col_name = rec_get_nth_field_old(
+				rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME,
+				&for_col_name_len);
+
+			ref_col_name = rec_get_nth_field_old(
+				rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME,
+				&ref_col_name_len);
+
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Unable to load columns names for foreign "
+				"key '%s' because it was not found in "
+				"InnoDB internal table SYS_FOREIGN_COLS. The "
+				"closest entry we found is: "
+				"(ID='%.*s', POS=%lu, FOR_COL_NAME='%.*s', "
+				"REF_COL_NAME='%.*s')",
+				foreign->id,
+				(int) len, field,
+				mach_read_from_4(pos),
+				(int) for_col_name_len, for_col_name,
+				(int) ref_col_name_len, ref_col_name);
+
+			ut_error;
+		}
 
 		field = rec_get_nth_field_old(
 			rec, DICT_FLD__SYS_FOREIGN_COLS__POS, &len);
@@ -2182,13 +2710,12 @@ dict_load_foreign_cols(
 /***********************************************************************//**
 Loads a foreign key constraint to the dictionary cache.
 @return	DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 dict_load_foreign(
 /*==============*/
-	const char*	id,	/*!< in: foreign constraint id, not
-				necessary '\0'-terminated */
-	ulint		id_len,	/*!< in: id length */
+	const char*	id,	/*!< in: foreign constraint id, must be
+				'\0'-terminated */
 	ibool		check_charsets,
 				/*!< in: TRUE=check charset compatibility */
 	ibool		check_recursive)
@@ -2210,9 +2737,12 @@ dict_load_foreign(
 	mtr_t		mtr;
 	dict_table_t*	for_table;
 	dict_table_t*	ref_table;
+	size_t		id_len;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
+	id_len = strlen(id);
+
 	heap2 = mem_heap_create(1000);
 
 	mtr_start(&mtr);
@@ -2238,8 +2768,8 @@ dict_load_foreign(
 
 		fprintf(stderr,
 			"InnoDB: Error: cannot load foreign constraint "
-			"%.*s: could not find the relevant record in "
-			"SYS_FOREIGN\n", (int) id_len, id);
+			"%s: could not find the relevant record in "
+			"SYS_FOREIGN\n", id);
 
 		btr_pcur_close(&pcur);
 		mtr_commit(&mtr);
@@ -2255,8 +2785,8 @@ dict_load_foreign(
 
 		fprintf(stderr,
 			"InnoDB: Error: cannot load foreign constraint "
-			"%.*s: found %.*s instead in SYS_FOREIGN\n",
-			(int) id_len, id, (int) len, field);
+			"%s: found %.*s instead in SYS_FOREIGN\n",
+			id, (int) len, field);
 
 		btr_pcur_close(&pcur);
 		mtr_commit(&mtr);
@@ -2301,7 +2831,7 @@ dict_load_foreign(
 	btr_pcur_close(&pcur);
 	mtr_commit(&mtr);
 
-	dict_load_foreign_cols(id, id_len, foreign);
+	dict_load_foreign_cols(foreign);
 
 	ref_table = dict_table_check_if_in_cache_low(
 			foreign->referenced_table_name_lookup);
@@ -2371,7 +2901,7 @@ cache already contains all constraints where the other relevant table is
 already in the dictionary cache.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 dict_load_foreigns(
 /*===============*/
 	const char*	table_name,	/*!< in: table name */
@@ -2389,7 +2919,7 @@ dict_load_foreigns(
 	const rec_t*	rec;
 	const byte*	field;
 	ulint		len;
-	ulint		err;
+	dberr_t		err;
 	mtr_t		mtr;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -2414,6 +2944,7 @@ dict_load_foreigns(
 
 	sec_index = dict_table_get_next_index(
 		dict_table_get_first_index(sys_foreign));
+	ut_ad(!dict_index_is_clust(sec_index));
 start_load:
 
 	tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1);
@@ -2436,7 +2967,6 @@ loop:
 	/* Now we have the record in the secondary index containing a table
 	name and a foreign constraint ID */
 
-	rec = btr_pcur_get_rec(&pcur);
 	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_FOREIGN_FOR_NAME__NAME, &len);
 
@@ -2475,14 +3005,21 @@ loop:
 	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_FOREIGN_FOR_NAME__ID, &len);
 
+	/* Copy the string because the page may be modified or evicted
+	after mtr_commit() below. */
+	char	fk_id[MAX_TABLE_NAME_LEN + 1];
+
+	ut_a(len <= MAX_TABLE_NAME_LEN);
+	memcpy(fk_id, field, len);
+	fk_id[len] = '\0';
+
 	btr_pcur_store_position(&pcur, &mtr);
 
 	mtr_commit(&mtr);
 
 	/* Load the foreign constraint definition to the dictionary cache */
 
-	err = dict_load_foreign((char*) field, len, check_charsets,
-				check_recursive);
+	err = dict_load_foreign(fk_id, check_charsets, check_recursive);
 
 	if (err != DB_SUCCESS) {
 		btr_pcur_close(&pcur);
diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc
index 28b935d2e58..116a6a6d96a 100644
--- a/storage/innobase/dict/dict0mem.cc
+++ b/storage/innobase/dict/dict0mem.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -35,8 +36,9 @@ Created 1/8/1996 Heikki Tuuri
 #include "dict0dict.h"
 #include "fts0priv.h"
 #ifndef UNIV_HOTBACKUP
-#include "ha_prototypes.h"	/* innobase_casedn_str(),
+# include "ha_prototypes.h"	/* innobase_casedn_str(),
 				innobase_get_lower_case_table_names */
+# include "mysql_com.h"		/* NAME_LEN */
 # include "lock0lock.h"
 #endif /* !UNIV_HOTBACKUP */
 #ifdef UNIV_BLOB_DEBUG
@@ -51,6 +53,10 @@ Created 1/8/1996 Heikki Tuuri
 UNIV_INTERN mysql_pfs_key_t	autoinc_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
 
+/** Prefix for tmp tables, adopted from sql/table.h */
+#define tmp_file_prefix		"#sql"
+#define tmp_file_prefix_length	4
+
 /**********************************************************************//**
 Creates a table memory object.
 @return	own: table object */
@@ -60,9 +66,7 @@ dict_mem_table_create(
 /*==================*/
 	const char*	name,	/*!< in: table name */
 	ulint		space,	/*!< in: space where the clustered index of
-				the table is placed; this parameter is
-				ignored if the table is made a member of
-				a cluster */
+				the table is placed */
 	ulint		n_cols,	/*!< in: number of columns */
 	ulint		flags,	/*!< in: table flags */
 	ulint		flags2)	/*!< in: table flags2 */
@@ -71,7 +75,7 @@ dict_mem_table_create(
 	mem_heap_t*	heap;
 
 	ut_ad(name);
-	dict_tf_validate(flags);
+	ut_a(dict_tf_is_valid(flags));
 	ut_a(!(flags2 & ~DICT_TF2_BIT_MASK));
 
 	heap = mem_heap_create(DICT_HEAP_SIZE);
@@ -115,7 +119,6 @@ dict_mem_table_create(
 	    || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
 		table->fts = fts_create(table);
 		table->fts->cache = fts_cache_create(table);
-		fts_optimize_add_table(table);
 	} else {
 		table->fts = NULL;
 	}
@@ -243,6 +246,156 @@ dict_mem_table_add_col(
 	dict_mem_fill_column_struct(col, i, mtype, prtype, len);
 }
 
+/**********************************************************************//**
+Renames a column of a table in the data dictionary cache. */
+static __attribute__((nonnull))
+void
+dict_mem_table_col_rename_low(
+/*==========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	unsigned	i,	/*!< in: column offset corresponding to s */
+	const char*	to,	/*!< in: new column name */
+	const char*	s)	/*!< in: pointer to table->col_names */
+{
+	size_t from_len = strlen(s), to_len = strlen(to);
+
+	ut_ad(i < table->n_def);
+	ut_ad(from_len <= NAME_LEN);
+	ut_ad(to_len <= NAME_LEN);
+
+	if (from_len == to_len) {
+		/* The easy case: simply replace the column name in
+		table->col_names. */
+		strcpy(const_cast<char*>(s), to);
+	} else {
+		/* We need to adjust all affected index->field
+		pointers, as in dict_index_add_col(). First, copy
+		table->col_names. */
+		ulint	prefix_len	= s - table->col_names;
+
+		for (; i < table->n_def; i++) {
+			s += strlen(s) + 1;
+		}
+
+		ulint	full_len	= s - table->col_names;
+		char*	col_names;
+
+		if (to_len > from_len) {
+			col_names = static_cast<char*>(
+				mem_heap_alloc(
+					table->heap,
+					full_len + to_len - from_len));
+
+			memcpy(col_names, table->col_names, prefix_len);
+		} else {
+			col_names = const_cast<char*>(table->col_names);
+		}
+
+		memcpy(col_names + prefix_len, to, to_len);
+		memmove(col_names + prefix_len + to_len,
+			table->col_names + (prefix_len + from_len),
+			full_len - (prefix_len + from_len));
+
+		/* Replace the field names in every index. */
+		for (dict_index_t* index = dict_table_get_first_index(table);
+		     index != NULL;
+		     index = dict_table_get_next_index(index)) {
+			ulint	n_fields = dict_index_get_n_fields(index);
+
+			for (ulint i = 0; i < n_fields; i++) {
+				dict_field_t*	field
+					= dict_index_get_nth_field(
+						index, i);
+				ulint		name_ofs
+					= field->name - table->col_names;
+				if (name_ofs <= prefix_len) {
+					field->name = col_names + name_ofs;
+				} else {
+					ut_a(name_ofs < full_len);
+					field->name = col_names
+						+ name_ofs + to_len - from_len;
+				}
+			}
+		}
+
+		table->col_names = col_names;
+	}
+
+	/* Replace the field names in every foreign key constraint. */
+	for (dict_foreign_t* foreign = UT_LIST_GET_FIRST(table->foreign_list);
+	     foreign != NULL;
+	     foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
+		for (unsigned f = 0; f < foreign->n_fields; f++) {
+			/* These can point straight to
+			table->col_names, because the foreign key
+			constraints will be freed at the same time
+			when the table object is freed. */
+			foreign->foreign_col_names[f]
+				= dict_index_get_nth_field(
+					foreign->foreign_index, f)->name;
+		}
+	}
+
+	for (dict_foreign_t* foreign = UT_LIST_GET_FIRST(
+		     table->referenced_list);
+	     foreign != NULL;
+	     foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
+		for (unsigned f = 0; f < foreign->n_fields; f++) {
+			/* foreign->referenced_col_names[] need to be
+			copies, because the constraint may become
+			orphan when foreign_key_checks=0 and the
+			parent table is dropped. */
+
+			const char* col_name = dict_index_get_nth_field(
+				foreign->referenced_index, f)->name;
+
+			if (strcmp(foreign->referenced_col_names[f],
+				   col_name)) {
+				char**	rc = const_cast<char**>(
+					foreign->referenced_col_names + f);
+				size_t	col_name_len_1 = strlen(col_name) + 1;
+
+				if (col_name_len_1 <= strlen(*rc) + 1) {
+					memcpy(*rc, col_name, col_name_len_1);
+				} else {
+					*rc = static_cast<char*>(
+						mem_heap_dup(
+							foreign->heap,
+							col_name,
+							col_name_len_1));
+				}
+			}
+		}
+	}
+}
+
+/**********************************************************************//**
+Renames a column of a table in the data dictionary cache. */
+UNIV_INTERN
+void
+dict_mem_table_col_rename(
+/*======================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	unsigned	nth_col,/*!< in: column index */
+	const char*	from,	/*!< in: old column name */
+	const char*	to)	/*!< in: new column name */
+{
+	const char*	s = table->col_names;
+
+	ut_ad(nth_col < table->n_def);
+
+	for (unsigned i = 0; i < nth_col; i++) {
+		size_t	len = strlen(s);
+		ut_ad(len > 0);
+		s += len + 1;
+	}
+
+	/* This could fail if the data dictionaries are out of sync.
+	Proceed with the renaming anyway. */
+	ut_ad(!strcmp(from, s));
+
+	dict_mem_table_col_rename_low(table, nth_col, to, s);
+}
 
 /**********************************************************************//**
 This function populates a dict_col_t memory structure with
@@ -304,6 +457,8 @@ dict_mem_index_create(
 	dict_mem_fill_index_struct(index, heap, table_name, index_name,
 				   space, type, n_fields);
 
+	os_fast_mutex_init(zip_pad_mutex_key, &index->zip_pad.mutex);
+
 	return(index);
 }
 
@@ -436,5 +591,31 @@ dict_mem_index_free(
 	}
 #endif /* UNIV_BLOB_DEBUG */
 
+	os_fast_mutex_free(&index->zip_pad.mutex);
+
 	mem_heap_free(index->heap);
 }
+
+/*******************************************************************//**
+Create a temporary tablename.
+@return temporary tablename suitable for InnoDB use */
+UNIV_INTERN
+char*
+dict_mem_create_temporary_tablename(
+/*================================*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	const char*	dbtab,	/*!< in: database/table name */
+	table_id_t	id)	/*!< in: InnoDB table id */
+{
+	const char*	dbend   = strchr(dbtab, '/');
+	ut_ad(dbend);
+	size_t		dblen   = dbend - dbtab + 1;
+	size_t		size = tmp_file_prefix_length + 4 + 9 + 9 + dblen;
+
+	char*	name = static_cast<char*>(mem_heap_alloc(heap, size));
+	memcpy(name, dbtab, dblen);
+	ut_snprintf(name + dblen, size - dblen,
+		    tmp_file_prefix "-ib" UINT64PF, id);
+	return(name);
+}
+
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index eebf6b1ec26..ff7e1ce642c 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2009, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2009, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,27 +29,27 @@ Created Jan 06, 2010 Vasil Dimov
 
 #include "btr0btr.h" /* btr_get_size() */
 #include "btr0cur.h" /* btr_estimate_number_of_different_key_vals() */
-#include "dict0dict.h" /* dict_table_get_first_index() */
+#include "dict0dict.h" /* dict_table_get_first_index(), dict_fs2utf8() */
 #include "dict0mem.h" /* DICT_TABLE_MAGIC_N */
 #include "dict0stats.h"
 #include "data0type.h" /* dtype_t */
-#include "db0err.h" /* db_err */
+#include "db0err.h" /* dberr_t */
 #include "dyn0dyn.h" /* dyn_array* */
+#include "page0page.h" /* page_align() */
 #include "pars0pars.h" /* pars_info_create() */
 #include "pars0types.h" /* pars_info_t */
 #include "que0que.h" /* que_eval_sql() */
 #include "rem0cmp.h" /* REC_MAX_N_FIELDS,cmp_rec_rec_with_match() */
-#include "row0sel.h" /* sel_node_struct */
+#include "row0sel.h" /* sel_node_t */
 #include "row0types.h" /* sel_node_t */
 #include "trx0trx.h" /* trx_create() */
 #include "trx0roll.h" /* trx_rollback_to_savepoint() */
 #include "ut0rnd.h" /* ut_rnd_interval() */
-
-#include "ha_prototypes.h" /* innobase_strcasecmp() */
+#include "ut0ut.h" /* ut_format_name(), ut_time() */
 
 /* Sampling algorithm description @{
 
-The algorithm is controlled by one number - srv_stats_persistent_sample_pages,
+The algorithm is controlled by one number - N_SAMPLE_PAGES(index),
 let it be A, which is the number of leaf pages to analyze for a given index
 for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be
 analyzed).
@@ -124,126 +124,34 @@ where n=1..n_uniq.
 #define DEBUG_PRINTF(fmt, ...)	/* noop */
 #endif /* UNIV_STATS_DEBUG */
 
-/* number of distinct records on a given level that are required to stop
-descending to lower levels and fetch
-srv_stats_persistent_sample_pages records from that level */
-#define N_DIFF_REQUIRED	(srv_stats_persistent_sample_pages * 10)
+/* Gets the number of leaf pages to sample in persistent stats estimation */
+#define N_SAMPLE_PAGES(index)				\
+	((index)->table->stats_sample_pages != 0 ?	\
+	 (index)->table->stats_sample_pages :		\
+	 srv_stats_persistent_sample_pages)
 
-/** Open handles on the stats tables. Currently this is used to increase the
-reference count of the stats tables. */
-typedef struct dict_stats_struct {
-	dict_table_t*	table_stats;	/*!< Handle to open TABLE_STATS_NAME */
-	dict_table_t*	index_stats;	/*!< Handle to open INDEX_STATS_NAME */
-} dict_stats_t;
+/* number of distinct records on a given level that are required to stop
+descending to lower levels and fetch N_SAMPLE_PAGES(index) records
+from that level */
+#define N_DIFF_REQUIRED(index)	(N_SAMPLE_PAGES(index) * 10)
 
 /*********************************************************************//**
-Calculates new estimates for table and index statistics. This function
-is relatively quick and is used to calculate transient statistics that
-are not saved on disk.
-This was the only way to calculate statistics before the
-Persistent Statistics feature was introduced.
-dict_stats_update_transient() @{ */
-static
-void
-dict_stats_update_transient(
-/*========================*/
-	dict_table_t*	table)	/*!< in/out: table */
+Checks whether an index should be ignored in stats manipulations:
+* stats fetch
+* stats recalc
+* stats save
+dict_stats_should_ignore_index() @{
+@return true if exists and all tables are ok */
+UNIV_INLINE
+bool
+dict_stats_should_ignore_index(
+/*===========================*/
+	const dict_index_t*	index)	/*!< in: index */
 {
-	dict_index_t*	index;
-	ulint		sum_of_index_sizes	= 0;
-
-	/* Find out the sizes of the indexes and how many different values
-	for the key they approximately have */
-
-	index = dict_table_get_first_index(table);
-
-	if (index == NULL) {
-		/* Table definition is corrupt */
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: table %s has no indexes. "
-			"Cannot calculate statistics.\n", table->name);
-		return;
-	}
-
-	do {
-
-		if (index->type & DICT_FTS) {
-			index = dict_table_get_next_index(index);
-			continue;
-		}
-
-		if (UNIV_LIKELY
-		    (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE
-		     || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO
-			 && dict_index_is_clust(index)))) {
-			mtr_t	mtr;
-			ulint	size;
-
-			mtr_start(&mtr);
-			mtr_s_lock(dict_index_get_lock(index), &mtr);
-
-			size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
-
-			if (size != ULINT_UNDEFINED) {
-				index->stat_index_size = size;
-
-				size = btr_get_size(
-					index, BTR_N_LEAF_PAGES, &mtr);
-			}
-
-			mtr_commit(&mtr);
-
-			switch (size) {
-			case ULINT_UNDEFINED:
-				goto fake_statistics;
-			case 0:
-				/* The root node of the tree is a leaf */
-				size = 1;
-			}
-
-			sum_of_index_sizes += index->stat_index_size;
-
-			index->stat_n_leaf_pages = size;
-
-			btr_estimate_number_of_different_key_vals(index);
-		} else {
-			/* If we have set a high innodb_force_recovery
-			level, do not calculate statistics, as a badly
-			corrupted index can cause a crash in it.
-			Initialize some bogus index cardinality
-			statistics, so that the data can be queried in
-			various means, also via secondary indexes. */
-			ulint	i;
-
-fake_statistics:
-			sum_of_index_sizes++;
-			index->stat_index_size = index->stat_n_leaf_pages = 1;
-
-			for (i = dict_index_get_n_unique(index); i; ) {
-				index->stat_n_diff_key_vals[i--] = 1;
-			}
-
-			memset(index->stat_n_non_null_key_vals, 0,
-			       (1 + dict_index_get_n_unique(index))
-			       * sizeof(*index->stat_n_non_null_key_vals));
-		}
-
-		index = dict_table_get_next_index(index);
-	} while (index);
-
-	index = dict_table_get_first_index(table);
-
-	table->stat_n_rows = index->stat_n_diff_key_vals[
-		dict_index_get_n_unique(index)];
-
-	table->stat_clustered_index_size = index->stat_index_size;
-
-	table->stat_sum_of_other_index_sizes = sum_of_index_sizes
-		- index->stat_index_size;
-
-	table->stat_modified_counter = 0;
-
-	table->stat_initialized = TRUE;
+	return((index->type & DICT_FTS)
+	       || dict_index_is_corrupted(index)
+	       || index->to_be_dropped
+	       || *index->name == TEMP_INDEX_PREFIX);
 }
 /* @} */
 
@@ -251,24 +159,24 @@ fake_statistics:
 Checks whether the persistent statistics storage exists and that all
 tables have the proper structure.
 dict_stats_persistent_storage_check() @{
-@return TRUE if exists and all tables are ok */
+@return true if exists and all tables are ok */
 static
-ibool
+bool
 dict_stats_persistent_storage_check(
 /*================================*/
-	ibool	caller_has_dict_sys_mutex)	/*!< in: TRUE if the caller
+	bool	caller_has_dict_sys_mutex)	/*!< in: true if the caller
 						owns dict_sys->mutex */
 {
 	/* definition for the table TABLE_STATS_NAME */
 	dict_col_meta_t	table_stats_columns[] = {
 		{"database_name", DATA_VARMYSQL,
-			DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+			DATA_NOT_NULL, 192},
 
 		{"table_name", DATA_VARMYSQL,
-			DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+			DATA_NOT_NULL, 192},
 
-		{"last_update", DATA_INT,
-			DATA_NOT_NULL | DATA_UNSIGNED, 4},
+		{"last_update", DATA_FIXBINARY,
+			DATA_NOT_NULL, 4},
 
 		{"n_rows", DATA_INT,
 			DATA_NOT_NULL | DATA_UNSIGNED, 8},
@@ -282,22 +190,24 @@ dict_stats_persistent_storage_check(
 	dict_table_schema_t	table_stats_schema = {
 		TABLE_STATS_NAME,
 		UT_ARR_SIZE(table_stats_columns),
-		table_stats_columns
+		table_stats_columns,
+		0 /* n_foreign */,
+		0 /* n_referenced */
 	};
 
 	/* definition for the table INDEX_STATS_NAME */
 	dict_col_meta_t	index_stats_columns[] = {
 		{"database_name", DATA_VARMYSQL,
-			DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+			DATA_NOT_NULL, 192},
 
 		{"table_name", DATA_VARMYSQL,
-			DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+			DATA_NOT_NULL, 192},
 
 		{"index_name", DATA_VARMYSQL,
-			DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+			DATA_NOT_NULL, 192},
 
-		{"last_update", DATA_INT,
-			DATA_NOT_NULL | DATA_UNSIGNED, 4},
+		{"last_update", DATA_FIXBINARY,
+			DATA_NOT_NULL, 4},
 
 		{"stat_name", DATA_VARMYSQL,
 			DATA_NOT_NULL, 64*3},
@@ -314,11 +224,13 @@ dict_stats_persistent_storage_check(
 	dict_table_schema_t	index_stats_schema = {
 		INDEX_STATS_NAME,
 		UT_ARR_SIZE(index_stats_columns),
-		index_stats_columns
+		index_stats_columns,
+		0 /* n_foreign */,
+		0 /* n_referenced */
 	};
 
 	char		errstr[512];
-	enum db_err	ret;
+	dberr_t		ret;
 
 	if (!caller_has_dict_sys_mutex) {
 		mutex_enter(&(dict_sys->mutex));
@@ -339,24 +251,660 @@ dict_stats_persistent_storage_check(
 		mutex_exit(&(dict_sys->mutex));
 	}
 
-	if (ret != DB_SUCCESS && ret != DB_TABLE_NOT_FOUND) {
+	if (ret != DB_SUCCESS) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr, " InnoDB: Error: %s\n", errstr);
+		return(false);
+	}
+	/* else */
+
+	return(true);
+}
+/* @} */
+
+/*********************************************************************//**
+Executes a given SQL statement using the InnoDB internal SQL parser
+in its own transaction and commits it.
+This function will free the pinfo object.
+@return DB_SUCCESS or error code */
+static
+dberr_t
+dict_stats_exec_sql(
+/*================*/
+	pars_info_t*	pinfo,	/*!< in/out: pinfo to pass to que_eval_sql()
+				must already have any literals bound to it */
+	const char*	sql)	/*!< in: SQL string to execute */
+{
+	trx_t*	trx;
+	dberr_t	err;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	if (!dict_stats_persistent_storage_check(true)) {
+		pars_info_free(pinfo);
+		return(DB_STATS_DO_NOT_EXIST);
+	}
+
+	trx = trx_allocate_for_background();
+	trx_start_if_not_started(trx);
+
+	err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */
+
+	if (err == DB_SUCCESS) {
+		trx_commit_for_mysql(trx);
+	} else {
+		trx->op_info = "rollback of internal trx on stats tables";
+		trx->dict_operation_lock_mode = RW_X_LATCH;
+		trx_rollback_to_savepoint(trx, NULL);
+		trx->dict_operation_lock_mode = 0;
+		trx->op_info = "";
+		ut_a(trx->error_state == DB_SUCCESS);
+	}
+
+	trx_free_for_background(trx);
+
+	return(err);
+}
+
+/*********************************************************************//**
+Duplicate a table object and its indexes.
+This function creates a dummy dict_table_t object and initializes the
+following table and index members:
+dict_table_t::id (copied)
+dict_table_t::heap (newly created)
+dict_table_t::name (copied)
+dict_table_t::corrupted (copied)
+dict_table_t::indexes<> (newly created)
+dict_table_t::magic_n
+for each entry in dict_table_t::indexes, the following are initialized:
+(indexes that have DICT_FTS set in index->type are skipped)
+dict_index_t::id (copied)
+dict_index_t::name (copied)
+dict_index_t::table_name (points to the copied table name)
+dict_index_t::table (points to the above semi-initialized object)
+dict_index_t::type (copied)
+dict_index_t::to_be_dropped (copied)
+dict_index_t::online_status (copied)
+dict_index_t::n_uniq (copied)
+dict_index_t::fields[] (newly created, only first n_uniq, only fields[i].name)
+dict_index_t::indexes<> (newly created)
+dict_index_t::stat_n_diff_key_vals[] (only allocated, left uninitialized)
+dict_index_t::stat_n_sample_sizes[] (only allocated, left uninitialized)
+dict_index_t::stat_n_non_null_key_vals[] (only allocated, left uninitialized)
+dict_index_t::magic_n
+The returned object should be freed with dict_stats_table_clone_free()
+when no longer needed.
+@return incomplete table object */
+static
+dict_table_t*
+dict_stats_table_clone_create(
+/*==========================*/
+	const dict_table_t*	table)	/*!< in: table whose stats to copy */
+{
+	size_t		heap_size;
+	dict_index_t*	index;
+
+	/* Estimate the size needed for the table and all of its indexes */
+
+	heap_size = 0;
+	heap_size += sizeof(dict_table_t);
+	heap_size += strlen(table->name) + 1;
+
+	for (index = dict_table_get_first_index(table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+
+		if (dict_stats_should_ignore_index(index)) {
+			continue;
+		}
+
+		ut_ad(!dict_index_is_univ(index));
+
+		ulint	n_uniq = dict_index_get_n_unique(index);
+
+		heap_size += sizeof(dict_index_t);
+		heap_size += strlen(index->name) + 1;
+		heap_size += n_uniq * sizeof(index->fields[0]);
+		for (ulint i = 0; i < n_uniq; i++) {
+			heap_size += strlen(index->fields[i].name) + 1;
+		}
+		heap_size += n_uniq * sizeof(index->stat_n_diff_key_vals[0]);
+		heap_size += n_uniq * sizeof(index->stat_n_sample_sizes[0]);
+		heap_size += n_uniq * sizeof(index->stat_n_non_null_key_vals[0]);
+	}
+
+	/* Allocate the memory and copy the members */
+
+	mem_heap_t*	heap;
+
+	heap = mem_heap_create(heap_size);
+
+	dict_table_t*	t;
+
+	t = (dict_table_t*) mem_heap_alloc(heap, sizeof(*t));
+
+	UNIV_MEM_ASSERT_RW_ABORT(&table->id, sizeof(table->id));
+	t->id = table->id;
+
+	t->heap = heap;
+
+	UNIV_MEM_ASSERT_RW_ABORT(table->name, strlen(table->name) + 1);
+	t->name = (char*) mem_heap_strdup(heap, table->name);
+
+	t->corrupted = table->corrupted;
+
+	UT_LIST_INIT(t->indexes);
+
+	for (index = dict_table_get_first_index(table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+
+		if (dict_stats_should_ignore_index(index)) {
+			continue;
+		}
+
+		ut_ad(!dict_index_is_univ(index));
+
+		dict_index_t*	idx;
+
+		idx = (dict_index_t*) mem_heap_alloc(heap, sizeof(*idx));
+
+		UNIV_MEM_ASSERT_RW_ABORT(&index->id, sizeof(index->id));
+		idx->id = index->id;
+
+		UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name) + 1);
+		idx->name = (char*) mem_heap_strdup(heap, index->name);
+
+		idx->table_name = t->name;
+
+		idx->table = t;
+
+		idx->type = index->type;
+
+		idx->to_be_dropped = 0;
+
+		idx->online_status = ONLINE_INDEX_COMPLETE;
+
+		idx->n_uniq = index->n_uniq;
+
+		idx->fields = (dict_field_t*) mem_heap_alloc(
+			heap, idx->n_uniq * sizeof(idx->fields[0]));
+
+		for (ulint i = 0; i < idx->n_uniq; i++) {
+			UNIV_MEM_ASSERT_RW_ABORT(index->fields[i].name, strlen(index->fields[i].name) + 1);
+			idx->fields[i].name = (char*) mem_heap_strdup(
+				heap, index->fields[i].name);
+		}
+
+		/* hook idx into t->indexes */
+		UT_LIST_ADD_LAST(indexes, t->indexes, idx);
+
+		idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
+			heap,
+			idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[0]));
+
+		idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc(
+			heap,
+			idx->n_uniq * sizeof(idx->stat_n_sample_sizes[0]));
+
+		idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc(
+			heap,
+			idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0]));
+		ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
+	}
+
+	ut_d(t->magic_n = DICT_TABLE_MAGIC_N);
+
+	return(t);
+}
+
+/*********************************************************************//**
+Free the resources occupied by an object returned by
+dict_stats_table_clone_create().
+dict_stats_table_clone_free() @{ */
+static
+void
+dict_stats_table_clone_free(
+/*========================*/
+	dict_table_t*	t)	/*!< in: dummy table object to free */
+{
+	mem_heap_free(t->heap);
+}
+/* @} */
+
+/*********************************************************************//**
+Write all zeros (or 1 where it makes sense) into an index
+statistics members. The resulting stats correspond to an empty index.
+The caller must own index's table stats latch in X mode
+(dict_table_stats_lock(table, RW_X_LATCH))
+dict_stats_empty_index() @{ */
+static
+void
+dict_stats_empty_index(
+/*===================*/
+	dict_index_t*	index)	/*!< in/out: index */
+{
+	ut_ad(!(index->type & DICT_FTS));
+	ut_ad(!dict_index_is_univ(index));
+
+	ulint	n_uniq = index->n_uniq;
+
+	for (ulint i = 0; i < n_uniq; i++) {
+		index->stat_n_diff_key_vals[i] = 0;
+		index->stat_n_sample_sizes[i] = 1;
+		index->stat_n_non_null_key_vals[i] = 0;
+	}
+
+	index->stat_index_size = 1;
+	index->stat_n_leaf_pages = 1;
+}
+/* @} */
+
+/*********************************************************************//**
+Write all zeros (or 1 where it makes sense) into a table and its indexes'
+statistics members. The resulting stats correspond to an empty table.
+dict_stats_empty_table() @{ */
+static
+void
+dict_stats_empty_table(
+/*===================*/
+	dict_table_t*	table)	/*!< in/out: table */
+{
+	/* Zero the stats members */
+
+	dict_table_stats_lock(table, RW_X_LATCH);
+
+	table->stat_n_rows = 0;
+	table->stat_clustered_index_size = 1;
+	/* 1 page for each index, not counting the clustered */
+	table->stat_sum_of_other_index_sizes
+		= UT_LIST_GET_LEN(table->indexes) - 1;
+	table->stat_modified_counter = 0;
+
+	dict_index_t*	index;
+
+	for (index = dict_table_get_first_index(table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+
+		if (index->type & DICT_FTS) {
+			continue;
+		}
+
+		ut_ad(!dict_index_is_univ(index));
+
+		dict_stats_empty_index(index);
+	}
+
+	table->stat_initialized = TRUE;
+
+	dict_table_stats_unlock(table, RW_X_LATCH);
+}
+/* @} */
+
+/*********************************************************************//**
+Check whether index's stats are initialized (assert if they are not). */
+static
+void
+dict_stats_assert_initialized_index(
+/*================================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	UNIV_MEM_ASSERT_RW_ABORT(
+		index->stat_n_diff_key_vals,
+		index->n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
+
+	UNIV_MEM_ASSERT_RW_ABORT(
+		index->stat_n_sample_sizes,
+		index->n_uniq * sizeof(index->stat_n_sample_sizes[0]));
+
+	UNIV_MEM_ASSERT_RW_ABORT(
+		index->stat_n_non_null_key_vals,
+		index->n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
+
+	UNIV_MEM_ASSERT_RW_ABORT(
+		&index->stat_index_size,
+		sizeof(index->stat_index_size));
+
+	UNIV_MEM_ASSERT_RW_ABORT(
+		&index->stat_n_leaf_pages,
+		sizeof(index->stat_n_leaf_pages));
+}
+/*********************************************************************//**
+Check whether table's stats are initialized (assert if they are not). */
+static
+void
+dict_stats_assert_initialized(
+/*==========================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_a(table->stat_initialized);
+
+	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_last_recalc,
+			   sizeof(table->stats_last_recalc));
+
+	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_persistent,
+			   sizeof(table->stat_persistent));
+
+	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_auto_recalc,
+			   sizeof(table->stats_auto_recalc));
+
+	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_sample_pages,
+			   sizeof(table->stats_sample_pages));
+
+	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_n_rows,
+			   sizeof(table->stat_n_rows));
+
+	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_clustered_index_size,
+			   sizeof(table->stat_clustered_index_size));
+
+	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_sum_of_other_index_sizes,
+			   sizeof(table->stat_sum_of_other_index_sizes));
+
+	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_modified_counter,
+			   sizeof(table->stat_modified_counter));
+
+	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_bg_flag,
+			   sizeof(table->stats_bg_flag));
+
+	for (dict_index_t* index = dict_table_get_first_index(table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+
+		if (!dict_stats_should_ignore_index(index)) {
+			dict_stats_assert_initialized_index(index);
+		}
+	}
+}
+
+#define INDEX_EQ(i1, i2) \
+	((i1) != NULL \
+	 && (i2) != NULL \
+	 && (i1)->id == (i2)->id \
+	 && strcmp((i1)->name, (i2)->name) == 0)
 
+/*********************************************************************//**
+Copy table and index statistics from one table to another, including index
+stats. Extra indexes in src are ignored and extra indexes in dst are
+initialized to correspond to an empty index. */
+static
+void
+dict_stats_copy(
+/*============*/
+	dict_table_t*		dst,	/*!< in/out: destination table */
+	const dict_table_t*	src)	/*!< in: source table */
+{
+	dst->stats_last_recalc = src->stats_last_recalc;
+	dst->stat_n_rows = src->stat_n_rows;
+	dst->stat_clustered_index_size = src->stat_clustered_index_size;
+	dst->stat_sum_of_other_index_sizes = src->stat_sum_of_other_index_sizes;
+	dst->stat_modified_counter = src->stat_modified_counter;
+
+	dict_index_t*	dst_idx;
+	dict_index_t*	src_idx;
+
+	for (dst_idx = dict_table_get_first_index(dst),
+	     src_idx = dict_table_get_first_index(src);
+	     dst_idx != NULL;
+	     dst_idx = dict_table_get_next_index(dst_idx),
+	     (src_idx != NULL
+	      && (src_idx = dict_table_get_next_index(src_idx)))) {
+
+		if (dict_stats_should_ignore_index(dst_idx)) {
+			continue;
+		}
+
+		ut_ad(!dict_index_is_univ(dst_idx));
+
+		if (!INDEX_EQ(src_idx, dst_idx)) {
+			for (src_idx = dict_table_get_first_index(src);
+			     src_idx != NULL;
+			     src_idx = dict_table_get_next_index(src_idx)) {
+
+				if (INDEX_EQ(src_idx, dst_idx)) {
+					break;
+				}
+			}
+		}
+
+		if (!INDEX_EQ(src_idx, dst_idx)) {
+			dict_stats_empty_index(dst_idx);
+			continue;
+		}
+
+		ulint	n_copy_el;
+
+		if (dst_idx->n_uniq > src_idx->n_uniq) {
+			n_copy_el = src_idx->n_uniq;
+			/* Since src is smaller some elements in dst
+			will remain untouched by the following memmove(),
+			thus we init all of them here. */
+			dict_stats_empty_index(dst_idx);
+		} else {
+			n_copy_el = dst_idx->n_uniq;
+		}
+
+		memmove(dst_idx->stat_n_diff_key_vals,
+			src_idx->stat_n_diff_key_vals,
+			n_copy_el * sizeof(dst_idx->stat_n_diff_key_vals[0]));
+
+		memmove(dst_idx->stat_n_sample_sizes,
+			src_idx->stat_n_sample_sizes,
+			n_copy_el * sizeof(dst_idx->stat_n_sample_sizes[0]));
+
+		memmove(dst_idx->stat_n_non_null_key_vals,
+			src_idx->stat_n_non_null_key_vals,
+			n_copy_el * sizeof(dst_idx->stat_n_non_null_key_vals[0]));
+
+		dst_idx->stat_index_size = src_idx->stat_index_size;
+
+		dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages;
+	}
+
+	dst->stat_initialized = TRUE;
+}
+
+/*********************************************************************//**
+Duplicate the stats of a table and its indexes.
+This function creates a dummy dict_table_t object and copies the input
+table's stats into it. The returned table object is not in the dictionary
+cache and cannot be accessed by any other threads. In addition to the
+members copied in dict_stats_table_clone_create() this function initializes
+the following:
+dict_table_t::stat_initialized
+dict_table_t::stat_persistent
+dict_table_t::stat_n_rows
+dict_table_t::stat_clustered_index_size
+dict_table_t::stat_sum_of_other_index_sizes
+dict_table_t::stat_modified_counter
+dict_index_t::stat_n_diff_key_vals[]
+dict_index_t::stat_n_sample_sizes[]
+dict_index_t::stat_n_non_null_key_vals[]
+dict_index_t::stat_index_size
+dict_index_t::stat_n_leaf_pages
+The returned object should be freed with dict_stats_snapshot_free()
+when no longer needed.
+@return incomplete table object */
+static
+dict_table_t*
+dict_stats_snapshot_create(
+/*=======================*/
+	const dict_table_t*	table)	/*!< in: table whose stats to copy */
+{
+	mutex_enter(&dict_sys->mutex);
+
+	dict_table_stats_lock(table, RW_S_LATCH);
+
+	dict_stats_assert_initialized(table);
+
+	dict_table_t*	t;
+
+	t = dict_stats_table_clone_create(table);
+
+	dict_stats_copy(t, table);
+
+	t->stat_persistent = table->stat_persistent;
+	t->stats_auto_recalc = table->stats_auto_recalc;
+	t->stats_sample_pages = table->stats_sample_pages;
+	t->stats_bg_flag = table->stats_bg_flag;
+
+	dict_table_stats_unlock(table, RW_S_LATCH);
+
+	mutex_exit(&dict_sys->mutex);
+
+	return(t);
+}
+
+/*********************************************************************//**
+Free the resources occupied by an object returned by
+dict_stats_snapshot_create().
+dict_stats_snapshot_free() @{ */
+static
+void
+dict_stats_snapshot_free(
+/*=====================*/
+	dict_table_t*	t)	/*!< in: dummy table object to free */
+{
+	dict_stats_table_clone_free(t);
+}
+/* @} */
+
+/*********************************************************************//**
+Calculates new estimates for index statistics. This function is
+relatively quick and is used to calculate transient statistics that
+are not saved on disk. This was the only way to calculate statistics
+before the Persistent Statistics feature was introduced.
+dict_stats_update_transient_for_index() @{ */
+static
+void
+dict_stats_update_transient_for_index(
+/*==================================*/
+	dict_index_t*	index)	/*!< in/out: index */
+{
+	if (UNIV_LIKELY
+	    (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE
+	     || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO
+		 && dict_index_is_clust(index)))) {
+		mtr_t	mtr;
+		ulint	size;
+		mtr_start(&mtr);
+		mtr_s_lock(dict_index_get_lock(index), &mtr);
+
+		size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
+
+		if (size != ULINT_UNDEFINED) {
+			index->stat_index_size = size;
+
+			size = btr_get_size(
+				index, BTR_N_LEAF_PAGES, &mtr);
+		}
+
+		mtr_commit(&mtr);
+
+		switch (size) {
+		case ULINT_UNDEFINED:
+			dict_stats_empty_index(index);
+			return;
+		case 0:
+			/* The root node of the tree is a leaf */
+			size = 1;
+		}
+
+		index->stat_n_leaf_pages = size;
+
+		btr_estimate_number_of_different_key_vals(index);
+	} else {
+		/* If we have set a high innodb_force_recovery
+		level, do not calculate statistics, as a badly
+		corrupted index can cause a crash in it.
+		Initialize some bogus index cardinality
+		statistics, so that the data can be queried in
+		various means, also via secondary indexes. */
+		dict_stats_empty_index(index);
+	}
+}
+/* @} */
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. This function
+is relatively quick and is used to calculate transient statistics that
+are not saved on disk.
+This was the only way to calculate statistics before the
+Persistent Statistics feature was introduced.
+dict_stats_update_transient() @{ */
+UNIV_INTERN
+void
+dict_stats_update_transient(
+/*========================*/
+	dict_table_t*	table)	/*!< in/out: table */
+{
+	dict_index_t*	index;
+	ulint		sum_of_index_sizes	= 0;
+
+	/* Find out the sizes of the indexes and how many different values
+	for the key they approximately have */
+
+	index = dict_table_get_first_index(table);
+
+	if (dict_table_is_discarded(table)) {
+		/* Nothing to do. */
+		dict_stats_empty_table(table);
+		return;
+	} else if (index == NULL) {
+		/* Table definition is corrupt */
+
+		char	buf[MAX_FULL_NAME_LEN];
 		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: %s\n", errstr);
+		fprintf(stderr, " InnoDB: table %s has no indexes. "
+			"Cannot calculate statistics.\n",
+			ut_format_name(table->name, TRUE, buf, sizeof(buf)));
+		dict_stats_empty_table(table);
+		return;
+	}
+
+	for (; index != NULL; index = dict_table_get_next_index(index)) {
+
+		ut_ad(!dict_index_is_univ(index));
+
+		if (index->type & DICT_FTS) {
+			continue;
+		}
+
+		dict_stats_empty_index(index);
+
+		if (dict_stats_should_ignore_index(index)) {
+			continue;
+		}
+
+		dict_stats_update_transient_for_index(index);
+
+		sum_of_index_sizes += index->stat_index_size;
 	}
-	/* We return silently if some of the tables are not present because
-	this code is executed during open table. By design we check if the
-	persistent statistics storage is present and whether there are stats
-	for the table being opened and if so, then we use them, otherwise we
-	silently switch back to using the transient stats. */
 
-	return(ret == DB_SUCCESS);
+	index = dict_table_get_first_index(table);
+
+	table->stat_n_rows = index->stat_n_diff_key_vals[
+		dict_index_get_n_unique(index) - 1];
+
+	table->stat_clustered_index_size = index->stat_index_size;
+
+	table->stat_sum_of_other_index_sizes = sum_of_index_sizes
+		- index->stat_index_size;
+
+	table->stats_last_recalc = ut_time();
+
+	table->stat_modified_counter = 0;
+
+	table->stat_initialized = TRUE;
 }
 /* @} */
 
 /* @{ Pseudo code about the relation between the following functions
 
-let N = srv_stats_persistent_sample_pages
+let N = N_SAMPLE_PAGES(index)
 
 dict_stats_analyze_index()
   for each n_prefix
@@ -375,14 +923,11 @@ dict_stats_analyze_index()
 /*********************************************************************//**
 Find the total number and the number of distinct keys on a given level in
 an index. Each of the 1..n_uniq prefixes are looked up and the results are
-saved in the array n_diff[]. Notice that n_diff[] must be able to store
-n_uniq+1 numbers because the results are saved in
-n_diff[1] .. n_diff[n_uniq]. The total number of records on the level is
-saved in total_recs.
+saved in the array n_diff[0] .. n_diff[n_uniq - 1]. The total number of
+records on the level is saved in total_recs.
 Also, the index of the last record in each group of equal records is saved
-in n_diff_boundaries[1..n_uniq], records indexing starts from the leftmost
-record on the level and continues cross pages boundaries, counting from 0.
-dict_stats_analyze_index_level() @{ */
+in n_diff_boundaries[0..n_uniq - 1], records indexing starts from the leftmost
+record on the level and continues cross pages boundaries, counting from 0. */
 static
 void
 dict_stats_analyze_index_level(
@@ -393,78 +938,87 @@ dict_stats_analyze_index_level(
 					distinct keys for all prefixes */
 	ib_uint64_t*	total_recs,	/*!< out: total number of records */
 	ib_uint64_t*	total_pages,	/*!< out: total number of pages */
-	dyn_array_t*	n_diff_boundaries)/*!< out: boundaries of the groups
+	dyn_array_t*	n_diff_boundaries,/*!< out: boundaries of the groups
 					of distinct keys */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
 	ulint		n_uniq;
 	mem_heap_t*	heap;
-	dtuple_t*	dtuple;
 	btr_pcur_t	pcur;
-	mtr_t		mtr;
 	const page_t*	page;
 	const rec_t*	rec;
 	const rec_t*	prev_rec;
+	bool		prev_rec_is_copied;
 	byte*		prev_rec_buf = NULL;
 	ulint		prev_rec_buf_size = 0;
+	ulint*		rec_offsets;
+	ulint*		prev_rec_offsets;
 	ulint		i;
 
 	DEBUG_PRINTF("    %s(table=%s, index=%s, level=%lu)\n", __func__,
 		     index->table->name, index->name, level);
 
+	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+				MTR_MEMO_S_LOCK));
+
 	n_uniq = dict_index_get_n_unique(index);
 
-	/* elements in the n_diff array are 1..n_uniq (inclusive) */
-	memset(n_diff, 0x0, (n_uniq + 1) * sizeof(*n_diff));
+	/* elements in the n_diff array are 0..n_uniq-1 (inclusive) */
+	memset(n_diff, 0x0, n_uniq * sizeof(n_diff[0]));
+
+	/* Allocate space for the offsets header (the allocation size at
+	offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
+	so that this will never be less than the size calculated in
+	rec_get_offsets_func(). */
+	i = (REC_OFFS_HEADER_SIZE + 1 + 1) + index->n_fields;
 
-	heap = mem_heap_create(256);
+	heap = mem_heap_create((2 * sizeof *rec_offsets) * i);
+	rec_offsets = static_cast<ulint*>(
+		mem_heap_alloc(heap, i * sizeof *rec_offsets));
+	prev_rec_offsets = static_cast<ulint*>(
+		mem_heap_alloc(heap, i * sizeof *prev_rec_offsets));
+	rec_offs_set_n_alloc(rec_offsets, i);
+	rec_offs_set_n_alloc(prev_rec_offsets, i);
 
-	/* reset the dynamic arrays n_diff_boundaries[1..n_uniq];
-	n_diff_boundaries[0] is ignored to follow the same convention
-	as n_diff[] */
+	/* reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] */
 	if (n_diff_boundaries != NULL) {
-		for (i = 1; i <= n_uniq; i++) {
+		for (i = 0; i < n_uniq; i++) {
 			dyn_array_free(&n_diff_boundaries[i]);
 
 			dyn_array_create(&n_diff_boundaries[i]);
 		}
 	}
 
-	/* craft a record that is always smaller than the others,
-	this way we are sure that the cursor pcur will be positioned
-	on the leftmost record on the leftmost page on the desired level */
-	dtuple = dtuple_create(heap, dict_index_get_n_unique(index));
-	dict_table_copy_types(dtuple, index->table);
-	dtuple_set_info_bits(dtuple, REC_INFO_MIN_REC_FLAG);
-
-	mtr_start(&mtr);
+	/* Position pcur on the leftmost record on the leftmost page
+	on the desired level. */
 
-	btr_pcur_open_low(index, level, dtuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
-			  &pcur, __FILE__, __LINE__, &mtr);
+	btr_pcur_open_at_index_side(
+		true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
+		&pcur, true, level, mtr);
+	btr_pcur_move_to_next_on_page(&pcur);
 
 	page = btr_pcur_get_page(&pcur);
 
+	/* The page must not be empty, except when
+	it is the root page (and the whole index is empty). */
+	ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
+	ut_ad(btr_pcur_get_rec(&pcur)
+	      == page_rec_get_next_const(page_get_infimum_rec(page)));
+
 	/* check that we are indeed on the desired level */
-	ut_a(btr_page_get_level(page, &mtr) == level);
+	ut_a(btr_page_get_level(page, mtr) == level);
 
 	/* there should not be any pages on the left */
-	ut_a(btr_page_get_prev(page, &mtr) == FIL_NULL);
+	ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
 
 	/* check whether the first record on the leftmost page is marked
 	as such, if we are on a non-leaf level */
-	ut_a(level == 0
-	     || (REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
-		     page_rec_get_next_const(page_get_infimum_rec(page)),
-		     page_is_comp(page))));
-
-	if (btr_pcur_is_before_first_on_page(&pcur)) {
-		btr_pcur_move_to_next_on_page(&pcur);
-	}
-
-	if (btr_pcur_is_after_last_on_page(&pcur)) {
-		btr_pcur_move_to_prev_on_page(&pcur);
-	}
+	ut_a((level == 0)
+	     == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+			  btr_pcur_get_rec(&pcur), page_is_comp(page))));
 
 	prev_rec = NULL;
+	prev_rec_is_copied = false;
 
 	/* no records by default */
 	*total_recs = 0;
@@ -476,56 +1030,83 @@ dict_stats_analyze_index_level(
 	X and the fist on page X+1 */
 	for (;
 	     btr_pcur_is_on_user_rec(&pcur);
-	     btr_pcur_move_to_next_user_rec(&pcur, &mtr)) {
+	     btr_pcur_move_to_next_user_rec(&pcur, mtr)) {
 
 		ulint	matched_fields = 0;
 		ulint	matched_bytes = 0;
-		ulint	offsets_rec_onstack[REC_OFFS_NORMAL_SIZE];
-		ulint*	offsets_rec;
-
-		rec_offs_init(offsets_rec_onstack);
+		bool	rec_is_last_on_page;
 
 		rec = btr_pcur_get_rec(&pcur);
 
+		/* If rec and prev_rec are on different pages, then prev_rec
+		must have been copied, because we hold latch only on the page
+		where rec resides. */
+		if (prev_rec != NULL
+		    && page_align(rec) != page_align(prev_rec)) {
+
+			ut_a(prev_rec_is_copied);
+		}
+
+		rec_is_last_on_page =
+			page_rec_is_supremum(page_rec_get_next_const(rec));
+
 		/* increment the pages counter at the end of each page */
-		if (page_rec_is_supremum(page_rec_get_next_const(rec))) {
+		if (rec_is_last_on_page) {
 
 			(*total_pages)++;
 		}
 
-		/* skip delete-marked records */
-		if (rec_get_deleted_flag(rec, page_is_comp(
-				btr_pcur_get_page(&pcur)))) {
+		/* Skip delete-marked records on the leaf level. If we
+		do not skip them, then ANALYZE quickly after DELETE
+		could count them or not (purge may have already wiped
+		them away) which brings non-determinism. We skip only
+		leaf-level delete marks because delete marks on
+		non-leaf level do not make sense. */
+		if (level == 0 &&
+		    rec_get_deleted_flag(
+			    rec,
+			    page_is_comp(btr_pcur_get_page(&pcur)))) {
+
+			if (rec_is_last_on_page
+			    && !prev_rec_is_copied
+			    && prev_rec != NULL) {
+				/* copy prev_rec */
+
+				prev_rec_offsets = rec_get_offsets(
+					prev_rec, index, prev_rec_offsets,
+					n_uniq, &heap);
+
+				prev_rec = rec_copy_prefix_to_buf(
+					prev_rec, index,
+					rec_offs_n_fields(prev_rec_offsets),
+					&prev_rec_buf, &prev_rec_buf_size);
+
+				prev_rec_is_copied = true;
+			}
 
 			continue;
 		}
 
-		offsets_rec = rec_get_offsets(rec, index, offsets_rec_onstack,
-					      n_uniq, &heap);
+		rec_offsets = rec_get_offsets(
+			rec, index, rec_offsets, n_uniq, &heap);
 
 		(*total_recs)++;
 
 		if (prev_rec != NULL) {
-
-			ulint	offsets_prev_rec_onstack[REC_OFFS_NORMAL_SIZE];
-			ulint*	offsets_prev_rec;
-
-			rec_offs_init(offsets_prev_rec_onstack);
-
-			offsets_prev_rec = rec_get_offsets(
-				prev_rec, index, offsets_prev_rec_onstack,
+			prev_rec_offsets = rec_get_offsets(
+				prev_rec, index, prev_rec_offsets,
 				n_uniq, &heap);
 
 			cmp_rec_rec_with_match(rec,
 					       prev_rec,
-					       offsets_rec,
-					       offsets_prev_rec,
+					       rec_offsets,
+					       prev_rec_offsets,
 					       index,
 					       FALSE,
 					       &matched_fields,
 					       &matched_bytes);
 
-			for (i = matched_fields + 1; i <= n_uniq; i++) {
+			for (i = matched_fields; i < n_uniq; i++) {
 
 				if (n_diff_boundaries != NULL) {
 					/* push the index of the previous
@@ -553,17 +1134,18 @@ dict_stats_analyze_index_level(
 				}
 
 				/* increment the number of different keys
-				for n_prefix=i */
+				for n_prefix=i+1 (e.g. if i=0 then we increment
+				for n_prefix=1 which is stored in n_diff[0]) */
 				n_diff[i]++;
 			}
 		} else {
 			/* this is the first non-delete marked record */
-			for (i = 1; i <= n_uniq; i++) {
+			for (i = 0; i < n_uniq; i++) {
 				n_diff[i] = 1;
 			}
 		}
 
-		if (page_rec_is_supremum(page_rec_get_next_const(rec))) {
+		if (rec_is_last_on_page) {
 			/* end of a page has been reached */
 
 			/* we need to copy the record instead of assigning
@@ -574,8 +1156,9 @@ dict_stats_analyze_index_level(
 			btr_pcur_move_to_next_user_rec() will release the
 			latch on the page that prev_rec is on */
 			prev_rec = rec_copy_prefix_to_buf(
-				rec, index, rec_offs_n_fields(offsets_rec),
+				rec, index, rec_offs_n_fields(rec_offsets),
 				&prev_rec_buf, &prev_rec_buf_size);
+			prev_rec_is_copied = true;
 
 		} else {
 			/* still on the same page, the next call to
@@ -584,12 +1167,14 @@ dict_stats_analyze_index_level(
 			instead of copying the records like above */
 
 			prev_rec = rec;
+			prev_rec_is_copied = false;
 		}
 	}
 
 	/* if *total_pages is left untouched then the above loop was not
 	entered at all and there is one page in the whole tree which is
-	empty */
+	empty or the loop was entered but this is level 0, contains one page
+	and all records are delete-marked */
 	if (*total_pages == 0) {
 
 		ut_ad(level == 0);
@@ -605,7 +1190,7 @@ dict_stats_analyze_index_level(
 		/* remember the index of the last record on the level as the
 		last one from the last group of equal keys; this holds for
 		all possible prefixes */
-		for (i = 1; i <= n_uniq; i++) {
+		for (i = 0; i < n_uniq; i++) {
 			void*		p;
 			ib_uint64_t	idx;
 
@@ -619,10 +1204,10 @@ dict_stats_analyze_index_level(
 	}
 
 	/* now in n_diff_boundaries[i] there are exactly n_diff[i] integers,
-	for i=1..n_uniq */
+	for i=0..n_uniq-1 */
 
 #ifdef UNIV_STATS_DEBUG
-	for (i = 1; i <= n_uniq; i++) {
+	for (i = 0; i < n_uniq; i++) {
 
 		DEBUG_PRINTF("    %s(): total recs: " UINT64PF
 			     ", total pages: " UINT64PF
@@ -654,9 +1239,11 @@ dict_stats_analyze_index_level(
 	}
 #endif /* UNIV_STATS_DEBUG */
 
-	btr_pcur_close(&pcur);
+	/* Release the latch on the last page, because that is not done by
+	btr_pcur_close(). This function works also for non-leaf pages. */
+	btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr);
 
-	mtr_commit(&mtr);
+	btr_pcur_close(&pcur);
 
 	if (prev_rec_buf != NULL) {
 
@@ -665,15 +1252,16 @@ dict_stats_analyze_index_level(
 
 	mem_heap_free(heap);
 }
-/* @} */
 
 /* aux enum for controlling the behavior of dict_stats_scan_page() @{ */
-typedef enum page_scan_method_enum {
-	COUNT_ALL_NON_BORING,	/* scan all records on the given page
-				and count the number of distinct ones */
+enum page_scan_method_t {
+	COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED,/* scan all records on
+				the given page and count the number of
+				distinct ones, also ignore delete marked
+				records */
 	QUIT_ON_FIRST_NON_BORING/* quit when the first record that differs
 				from its right neighbor is found */
-} page_scan_method_t;
+};
 /* @} */
 
 /*********************************************************************//**
@@ -715,11 +1303,18 @@ dict_stats_scan_page(
 	Because offsets1,offsets2 should be big enough,
 	this memory heap should never be used. */
 	mem_heap_t*	heap			= NULL;
+	const rec_t*	(*get_next)(const rec_t*);
 
-	rec = page_rec_get_next_const(page_get_infimum_rec(page));
+	if (scan_method == COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED) {
+		get_next = page_rec_get_next_non_del_marked;
+	} else {
+		get_next = page_rec_get_next_const;
+	}
+
+	rec = get_next(page_get_infimum_rec(page));
 
 	if (page_rec_is_supremum(rec)) {
-		/* the page is empty */
+		/* the page is empty or contains only delete-marked records */
 		*n_diff = 0;
 		*out_rec = NULL;
 		return(NULL);
@@ -728,7 +1323,7 @@ dict_stats_scan_page(
 	offsets_rec = rec_get_offsets(rec, index, offsets_rec,
 				      ULINT_UNDEFINED, &heap);
 
-	next_rec = page_rec_get_next_const(rec);
+	next_rec = get_next(rec);
 
 	*n_diff = 1;
 
@@ -777,7 +1372,8 @@ dict_stats_scan_page(
 			offsets_rec = offsets_next_rec;
 			offsets_next_rec = offsets_tmp;
 		}
-		next_rec = page_rec_get_next_const(next_rec);
+
+		next_rec = get_next(next_rec);
 	}
 
 func_exit:
@@ -814,7 +1410,6 @@ dict_stats_analyze_index_below_cur(
 	ulint*		offsets1;
 	ulint*		offsets2;
 	ulint*		offsets_rec;
-	ulint		root_height;
 	ib_uint64_t	n_diff; /* the result */
 	ulint		size;
 
@@ -841,8 +1436,6 @@ dict_stats_analyze_index_below_cur(
 	rec_offs_set_n_alloc(offsets1, size);
 	rec_offs_set_n_alloc(offsets2, size);
 
-	root_height = btr_page_get_level(btr_root_get(index, mtr), mtr);
-
 	space = dict_index_get_space(index);
 	zip_size = dict_table_zip_size(index->table);
 
@@ -907,14 +1500,7 @@ dict_stats_analyze_index_below_cur(
 
 	offsets_rec = dict_stats_scan_page(
 		&rec, offsets1, offsets2, index, page, n_prefix,
-		COUNT_ALL_NON_BORING, &n_diff);
-
-	if (root_height > 0) {
-
-		/* empty pages are allowed only if the whole B-tree is empty
-		and contains a single empty page */
-		ut_a(offsets_rec != NULL);
-	}
+		COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, &n_diff);
 
 #if 0
 	DEBUG_PRINTF("      %s(): n_diff below page_no=%lu: " UINT64PF "\n",
@@ -928,42 +1514,40 @@ dict_stats_analyze_index_below_cur(
 /* @} */
 
 /*********************************************************************//**
-For a given level in an index select srv_stats_persistent_sample_pages
+For a given level in an index select N_SAMPLE_PAGES(index)
 (or less) records from that level and dive below them to the corresponding
 leaf pages, then scan those leaf pages and save the sampling results in
-index->stat_n_diff_key_vals[n_prefix] and the number of pages scanned in
-index->stat_n_sample_sizes[n_prefix].
-dict_stats_analyze_index_for_n_prefix() @{ */
+index->stat_n_diff_key_vals[n_prefix - 1] and the number of pages scanned in
+index->stat_n_sample_sizes[n_prefix - 1]. */
 static
 void
 dict_stats_analyze_index_for_n_prefix(
 /*==================================*/
-	dict_index_t*	index,			/*!< in/out: index */
-	ulint		level,			/*!< in: level,
-						must be >= 1 */
-	ib_uint64_t	total_recs_on_level,	/*!< in: total number of
-						records on the given level */
-	ulint		n_prefix,		/*!< in: look at first
-						n_prefix columns when
-						comparing records */
-	ib_uint64_t	n_diff_for_this_prefix,	/*!< in: number of distinct
-						records on the given level,
-						when looking at the first
-						n_prefix columns */
-	dyn_array_t*	boundaries)		/*!< in: array that contains
-						n_diff_for_this_prefix
-						integers each of which
-						represents the index (on the
-						level, counting from
-						left/smallest to right/biggest
-						from 0) of the last record
-						from each group of distinct
-						keys */
+	dict_index_t*	index,		/*!< in/out: index */
+	ulint		level,		/*!< in: level, must be >= 1 */
+	ib_uint64_t	total_recs_on_level,
+					/*!< in: total number of
+					records on the given level */
+	ulint		n_prefix,	/*!< in: look at first
+					n_prefix columns when
+					comparing records */
+	ib_uint64_t	n_diff_for_this_prefix,
+					/*!< in: number of distinct
+					records on the given level,
+					when looking at the first
+					n_prefix columns */
+	dyn_array_t*	boundaries,	/*!< in: array that contains
+					n_diff_for_this_prefix
+					integers each of which
+					represents the index (on the
+					level, counting from
+					left/smallest to right/biggest
+					from 0) of the last record
+					from each group of distinct
+					keys */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
-	mem_heap_t*	heap;
-	dtuple_t*	dtuple;
 	btr_pcur_t	pcur;
-	mtr_t		mtr;
 	const page_t*	page;
 	ib_uint64_t	rec_idx;
 	ib_uint64_t	last_idx_on_level;
@@ -978,51 +1562,45 @@ dict_stats_analyze_index_for_n_prefix(
 		     n_prefix, n_diff_for_this_prefix);
 #endif
 
+	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+				MTR_MEMO_S_LOCK));
+
 	/* if some of those is 0 then this means that there is exactly one
 	page in the B-tree and it is empty and we should have done full scan
 	and should not be here */
 	ut_ad(total_recs_on_level > 0);
 	ut_ad(n_diff_for_this_prefix > 0);
 
-	/* this is configured to be min 1, someone has changed the code */
-	ut_ad(srv_stats_persistent_sample_pages > 0);
+	/* this must be at least 1 */
+	ut_ad(N_SAMPLE_PAGES(index) > 0);
 
-	heap = mem_heap_create(256);
+	/* Position pcur on the leftmost record on the leftmost page
+	on the desired level. */
 
-	/* craft a record that is always smaller than the others,
-	this way we are sure that the cursor pcur will be positioned
-	on the leftmost record on the leftmost page on the desired level */
-	dtuple = dtuple_create(heap, dict_index_get_n_unique(index));
-	dict_table_copy_types(dtuple, index->table);
-	dtuple_set_info_bits(dtuple, REC_INFO_MIN_REC_FLAG);
-
-	mtr_start(&mtr);
-
-	btr_pcur_open_low(index, level, dtuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
-			  &pcur, __FILE__, __LINE__, &mtr);
+	btr_pcur_open_at_index_side(
+		true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
+		&pcur, true, level, mtr);
+	btr_pcur_move_to_next_on_page(&pcur);
 
 	page = btr_pcur_get_page(&pcur);
 
+	/* The page must not be empty, except when
+	it is the root page (and the whole index is empty). */
+	ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
+	ut_ad(btr_pcur_get_rec(&pcur)
+	      == page_rec_get_next_const(page_get_infimum_rec(page)));
+
 	/* check that we are indeed on the desired level */
-	ut_a(btr_page_get_level(page, &mtr) == level);
+	ut_a(btr_page_get_level(page, mtr) == level);
 
 	/* there should not be any pages on the left */
-	ut_a(btr_page_get_prev(page, &mtr) == FIL_NULL);
+	ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
 
 	/* check whether the first record on the leftmost page is marked
 	as such, if we are on a non-leaf level */
-	ut_a(level == 0 || REC_INFO_MIN_REC_FLAG
-	     & rec_get_info_bits(page_rec_get_next_const(
-					 page_get_infimum_rec(page)),
-				 page_is_comp(page)));
-
-	if (btr_pcur_is_before_first_on_page(&pcur)) {
-		btr_pcur_move_to_next_on_page(&pcur);
-	}
-
-	if (btr_pcur_is_after_last_on_page(&pcur)) {
-		btr_pcur_move_to_prev_on_page(&pcur);
-	}
+	ut_a((level == 0)
+	     == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+			  btr_pcur_get_rec(&pcur), page_is_comp(page))));
 
 	last_idx_on_level = *(ib_uint64_t*) dyn_array_get_element(boundaries,
 		(ulint) ((n_diff_for_this_prefix - 1) * sizeof(ib_uint64_t)));
@@ -1031,7 +1609,7 @@ dict_stats_analyze_index_for_n_prefix(
 
 	n_diff_sum_of_all_analyzed_pages = 0;
 
-	n_recs_to_dive_below = ut_min(srv_stats_persistent_sample_pages,
+	n_recs_to_dive_below = ut_min(N_SAMPLE_PAGES(index),
 				      n_diff_for_this_prefix);
 
 	for (i = 0; i < n_recs_to_dive_below; i++) {
@@ -1093,7 +1671,7 @@ dict_stats_analyze_index_for_n_prefix(
 		while (rec_idx < dive_below_idx
 		       && btr_pcur_is_on_user_rec(&pcur)) {
 
-			btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+			btr_pcur_move_to_next_user_rec(&pcur, mtr);
 			rec_idx++;
 		}
 
@@ -1107,12 +1685,20 @@ dict_stats_analyze_index_for_n_prefix(
 			break;
 		}
 
+		/* it could be that the tree has changed in such a way that
+		the record under dive_below_idx is the supremum record, in
+		this case rec_idx == dive_below_idx and pcur is positioned
+		on the supremum, we do not want to dive below it */
+		if (!btr_pcur_is_on_user_rec(&pcur)) {
+			break;
+		}
+
 		ut_a(rec_idx == dive_below_idx);
 
 		ib_uint64_t	n_diff_on_leaf_page;
 
 		n_diff_on_leaf_page = dict_stats_analyze_index_below_cur(
-			btr_pcur_get_btr_cur(&pcur), n_prefix, &mtr);
+			btr_pcur_get_btr_cur(&pcur), n_prefix, mtr);
 
 		/* We adjust n_diff_on_leaf_page here to avoid counting
 		one record twice - once as the last on some page and once
@@ -1135,12 +1721,13 @@ dict_stats_analyze_index_for_n_prefix(
 		n_diff_sum_of_all_analyzed_pages += n_diff_on_leaf_page;
 	}
 
-	if (n_diff_sum_of_all_analyzed_pages == 0) {
-		n_diff_sum_of_all_analyzed_pages = 1;
-	}
+	/* n_diff_sum_of_all_analyzed_pages can be 0 here if all the leaf
+	pages sampled contained only delete-marked records. In this case
+	we should assign 0 to index->stat_n_diff_key_vals[n_prefix - 1], which
+	the formula below does. */
 
 	/* See REF01 for an explanation of the algorithm */
-	index->stat_n_diff_key_vals[n_prefix]
+	index->stat_n_diff_key_vals[n_prefix - 1]
 		= index->stat_n_leaf_pages
 
 		* n_diff_for_this_prefix
@@ -1149,31 +1736,25 @@ dict_stats_analyze_index_for_n_prefix(
 		* n_diff_sum_of_all_analyzed_pages
 		/ n_recs_to_dive_below;
 
-	index->stat_n_sample_sizes[n_prefix] = n_recs_to_dive_below;
+	index->stat_n_sample_sizes[n_prefix - 1] = n_recs_to_dive_below;
 
 	DEBUG_PRINTF("    %s(): n_diff=" UINT64PF " for n_prefix=%lu "
 		     "(%lu"
 		     " * " UINT64PF " / " UINT64PF
 		     " * " UINT64PF " / " UINT64PF ")\n",
-		     __func__, index->stat_n_diff_key_vals[n_prefix],
+		     __func__, index->stat_n_diff_key_vals[n_prefix - 1],
 		     n_prefix,
 		     index->stat_n_leaf_pages,
 		     n_diff_for_this_prefix, total_recs_on_level,
 		     n_diff_sum_of_all_analyzed_pages, n_recs_to_dive_below);
 
 	btr_pcur_close(&pcur);
-
-	mtr_commit(&mtr);
-
-	mem_heap_free(heap);
 }
-/* @} */
 
 /*********************************************************************//**
 Calculates new statistics for a given index and saves them to the index
 members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
-stat_n_leaf_pages. This function could be slow.
-dict_stats_analyze_index() @{ */
+stat_n_leaf_pages. This function could be slow. */
 static
 void
 dict_stats_analyze_index(
@@ -1182,7 +1763,7 @@ dict_stats_analyze_index(
 {
 	ulint		root_level;
 	ulint		level;
-	ibool		level_is_analyzed;
+	bool		level_is_analyzed;
 	ulint		n_uniq;
 	ulint		n_prefix;
 	ib_uint64_t*	n_diff_on_level;
@@ -1191,10 +1772,11 @@ dict_stats_analyze_index(
 	dyn_array_t*	n_diff_boundaries;
 	mtr_t		mtr;
 	ulint		size;
-	ulint		i;
 
 	DEBUG_PRINTF("  %s(index=%s)\n", __func__, index->name);
 
+	dict_stats_empty_index(index);
+
 	mtr_start(&mtr);
 
 	mtr_s_lock(dict_index_get_lock(index), &mtr);
@@ -1206,19 +1788,12 @@ dict_stats_analyze_index(
 		size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr);
 	}
 
+	/* Release the X locks on the root page taken by btr_get_size() */
+	mtr_commit(&mtr);
+
 	switch (size) {
 	case ULINT_UNDEFINED:
-		mtr_commit(&mtr);
-		/* Fake some statistics. */
-		index->stat_index_size = index->stat_n_leaf_pages = 1;
-
-		for (i = dict_index_get_n_unique(index); i; ) {
-			index->stat_n_diff_key_vals[i--] = 1;
-		}
-
-		memset(index->stat_n_non_null_key_vals, 0,
-		       (1 + dict_index_get_n_unique(index))
-		       * sizeof(*index->stat_n_non_null_key_vals));
+		dict_stats_assert_initialized_index(index);
 		return;
 	case 0:
 		/* The root node of the tree is a leaf */
@@ -1227,23 +1802,25 @@ dict_stats_analyze_index(
 
 	index->stat_n_leaf_pages = size;
 
-	root_level = btr_page_get_level(btr_root_get(index, &mtr), &mtr);
+	mtr_start(&mtr);
+
+	mtr_s_lock(dict_index_get_lock(index), &mtr);
 
-	mtr_commit(&mtr);
+	root_level = btr_height_get(index, &mtr);
 
 	n_uniq = dict_index_get_n_unique(index);
 
-	/* if the tree has just one level (and one page) or if the user
-	has requested to sample too many pages then do full scan */
+	/* If the tree has just one level (and one page) or if the user
+	has requested to sample too many pages then do full scan.
+
+	For each n-column prefix (for n=1..n_uniq) N_SAMPLE_PAGES(index)
+	will be sampled, so in total N_SAMPLE_PAGES(index) * n_uniq leaf
+	pages will be sampled. If that number is bigger than the total
+	number of leaf pages then do full scan of the leaf level instead
+	since it will be faster and will give better results. */
+
 	if (root_level == 0
-	    /* for each n-column prefix (for n=1..n_uniq)
-	    srv_stats_persistent_sample_pages will be sampled, so in total
-	    srv_stats_persistent_sample_pages * n_uniq leaf pages will be
-	    sampled. If that number is bigger than the total number of leaf
-	    pages then do full scan of the leaf level instead since it will
-	    be faster and will give better results. */
-	    || srv_stats_persistent_sample_pages * n_uniq
-	       > index->stat_n_leaf_pages) {
+	    || N_SAMPLE_PAGES(index) * n_uniq > index->stat_n_leaf_pages) {
 
 		if (root_level == 0) {
 			DEBUG_PRINTF("  %s(): just one page, "
@@ -1261,27 +1838,28 @@ dict_stats_analyze_index(
 					       index->stat_n_diff_key_vals,
 					       &total_recs,
 					       &total_pages,
-					       NULL /*boundaries not needed*/);
+					       NULL /* boundaries not needed */,
+					       &mtr);
 
-		for (i = 1; i <= n_uniq; i++) {
+		for (ulint i = 0; i < n_uniq; i++) {
 			index->stat_n_sample_sizes[i] = total_pages;
 		}
 
+		mtr_commit(&mtr);
+
+		dict_stats_assert_initialized_index(index);
 		return;
 	}
-	/* else */
 
 	/* set to zero */
-	n_diff_on_level = (ib_uint64_t*) mem_zalloc((n_uniq + 1)
-						    * sizeof(ib_uint64_t));
+	n_diff_on_level = reinterpret_cast<ib_uint64_t*>
+		(mem_zalloc(n_uniq * sizeof(ib_uint64_t)));
 
-	n_diff_boundaries = (dyn_array_t*) mem_alloc((n_uniq + 1)
-						     * sizeof(dyn_array_t));
+	n_diff_boundaries = reinterpret_cast<dyn_array_t*>
+		(mem_alloc(n_uniq * sizeof(dyn_array_t)));
 
-	for (i = 1; i <= n_uniq; i++) {
-		/* initialize the dynamic arrays, the first one
-		(index=0) is ignored to follow the same indexing
-		scheme as n_diff_on_level[] */
+	for (ulint i = 0; i < n_uniq; i++) {
+		/* initialize the dynamic arrays */
 		dyn_array_create(&n_diff_boundaries[i]);
 	}
 
@@ -1299,25 +1877,42 @@ dict_stats_analyze_index(
 	So if we find that the first level containing D distinct
 	keys (on n_prefix columns) is L, we continue from L when
 	searching for D distinct keys on n_prefix-1 columns. */
-	level = (long) root_level;
-	level_is_analyzed = FALSE;
+	level = root_level;
+	level_is_analyzed = false;
+
 	for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
 
 		DEBUG_PRINTF("  %s(): searching level with >=%llu "
 			     "distinct records, n_prefix=%lu\n",
-			     __func__, N_DIFF_REQUIRED, n_prefix);
+			     __func__, N_DIFF_REQUIRED(index), n_prefix);
+
+		/* Commit the mtr to release the tree S lock to allow
+		other threads to do some work too. */
+		mtr_commit(&mtr);
+		mtr_start(&mtr);
+		mtr_s_lock(dict_index_get_lock(index), &mtr);
+		if (root_level != btr_height_get(index, &mtr)) {
+			/* Just quit if the tree has changed beyond
+			recognition here. The old stats from previous
+			runs will remain in the values that we have
+			not calculated yet. Initially when the index
+			object is created the stats members are given
+			some sensible values so leaving them untouched
+			here even the first time will not cause us to
+			read uninitialized memory later. */
+			break;
+		}
 
 		/* check whether we should pick the current level;
 		we pick level 1 even if it does not have enough
 		distinct records because we do not want to scan the
 		leaf level because it may contain too many records */
 		if (level_is_analyzed
-		    && (n_diff_on_level[n_prefix] >= N_DIFF_REQUIRED
+		    && (n_diff_on_level[n_prefix - 1] >= N_DIFF_REQUIRED(index)
 			|| level == 1)) {
 
 			goto found_level;
 		}
-		/* else */
 
 		/* search for a level that contains enough distinct records */
 
@@ -1325,12 +1920,14 @@ dict_stats_analyze_index(
 
 			/* if this does not hold we should be on
 			"found_level" instead of here */
-			ut_ad(n_diff_on_level[n_prefix] < N_DIFF_REQUIRED);
+			ut_ad(n_diff_on_level[n_prefix - 1]
+			      < N_DIFF_REQUIRED(index));
 
 			level--;
-			level_is_analyzed = FALSE;
+			level_is_analyzed = false;
 		}
 
+		/* descend into the tree, searching for "good enough" level */
 		for (;;) {
 
 			/* make sure we do not scan the leaf level
@@ -1349,18 +1946,19 @@ dict_stats_analyze_index(
 			total_recs is left from the previous iteration when
 			we scanned one level upper or we have not scanned any
 			levels yet in which case total_recs is 1. */
-			if (total_recs > srv_stats_persistent_sample_pages) {
+			if (total_recs > N_SAMPLE_PAGES(index)) {
 
-				/* if the above cond is true then we are not
-				at the root level since on the root level
-				total_recs == 1 and cannot
-				be > srv_stats_persistent_sample_pages */
+				/* if the above cond is true then we are
+				not at the root level since on the root
+				level total_recs == 1 (set before we
+				enter the n-prefix loop) and cannot
+				be > N_SAMPLE_PAGES(index) */
 				ut_a(level != root_level);
 
 				/* step one level back and be satisfied with
 				whatever it contains */
 				level++;
-				level_is_analyzed = TRUE;
+				level_is_analyzed = true;
 
 				break;
 			}
@@ -1370,27 +1968,28 @@ dict_stats_analyze_index(
 						       n_diff_on_level,
 						       &total_recs,
 						       &total_pages,
-						       n_diff_boundaries);
+						       n_diff_boundaries,
+						       &mtr);
 
-			level_is_analyzed = TRUE;
+			level_is_analyzed = true;
 
-			if (n_diff_on_level[n_prefix] >= N_DIFF_REQUIRED
+			if (n_diff_on_level[n_prefix - 1]
+			    >= N_DIFF_REQUIRED(index)
 			    || level == 1) {
 				/* we found a good level with many distinct
 				records or we have reached the last level we
 				could scan */
 				break;
 			}
-			/* else */
 
 			level--;
-			level_is_analyzed = FALSE;
+			level_is_analyzed = false;
 		}
 found_level:
 
 		DEBUG_PRINTF("  %s(): found level %lu that has " UINT64PF
 			     " distinct records for n_prefix=%lu\n",
-			     __func__, level, n_diff_on_level[n_prefix],
+			     __func__, level, n_diff_on_level[n_prefix - 1],
 			     n_prefix);
 
 		/* here we are either on level 1 or the level that we are on
@@ -1406,28 +2005,30 @@ found_level:
 
 		dict_stats_analyze_index_for_n_prefix(
 			index, level, total_recs, n_prefix,
-			n_diff_on_level[n_prefix],
-			&n_diff_boundaries[n_prefix]);
+			n_diff_on_level[n_prefix - 1],
+			&n_diff_boundaries[n_prefix - 1], &mtr);
 	}
 
-	for (i = 1; i <= n_uniq; i++) {
+	mtr_commit(&mtr);
+
+	for (ulint i = 0; i < n_uniq; i++) {
 		dyn_array_free(&n_diff_boundaries[i]);
 	}
 
 	mem_free(n_diff_boundaries);
 
 	mem_free(n_diff_on_level);
+
+	dict_stats_assert_initialized_index(index);
 }
-/* @} */
 
 /*********************************************************************//**
 Calculates new estimates for table and index statistics. This function
 is relatively slow and is used to calculate persistent statistics that
 will be saved on disk.
-dict_stats_update_persistent() @{
 @return DB_SUCCESS or error code */
 static
-enum db_err
+dberr_t
 dict_stats_update_persistent(
 /*=========================*/
 	dict_table_t*	table)		/*!< in/out: table */
@@ -1436,21 +2037,30 @@ dict_stats_update_persistent(
 
 	DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name);
 
-	/* XXX quit if interrupted, e.g. SIGTERM */
+	dict_table_stats_lock(table, RW_X_LATCH);
 
 	/* analyze the clustered index first */
 
 	index = dict_table_get_first_index(table);
 
-	if (index == NULL) {
+	if (index == NULL
+	    || dict_index_is_corrupted(index)
+	    || (index->type | DICT_UNIQUE) != (DICT_CLUSTERED | DICT_UNIQUE)) {
+
 		/* Table definition is corrupt */
+		dict_table_stats_unlock(table, RW_X_LATCH);
+		dict_stats_empty_table(table);
+
 		return(DB_CORRUPTION);
 	}
 
+	ut_ad(!dict_index_is_univ(index));
+
 	dict_stats_analyze_index(index);
 
-	table->stat_n_rows
-		= index->stat_n_diff_key_vals[dict_index_get_n_unique(index)];
+	ulint	n_unique = dict_index_get_n_unique(index);
+
+	table->stat_n_rows = index->stat_n_diff_key_vals[n_unique - 1];
 
 	table->stat_clustered_index_size = index->stat_index_size;
 
@@ -1462,31 +2072,47 @@ dict_stats_update_persistent(
 	     index != NULL;
 	     index = dict_table_get_next_index(index)) {
 
+		ut_ad(!dict_index_is_univ(index));
+
 		if (index->type & DICT_FTS) {
 			continue;
 		}
 
-		dict_stats_analyze_index(index);
+		dict_stats_empty_index(index);
+
+		if (dict_stats_should_ignore_index(index)) {
+			continue;
+		}
+
+		if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) {
+			dict_stats_analyze_index(index);
+		}
 
 		table->stat_sum_of_other_index_sizes
 			+= index->stat_index_size;
 	}
 
+	table->stats_last_recalc = ut_time();
+
 	table->stat_modified_counter = 0;
 
 	table->stat_initialized = TRUE;
 
+	dict_stats_assert_initialized(table);
+
+	dict_table_stats_unlock(table, RW_X_LATCH);
+
 	return(DB_SUCCESS);
 }
-/* @} */
 
+#include "mysql_com.h"
 /*********************************************************************//**
 Save an individual index's statistic into the persistent statistics
 storage.
 dict_stats_save_index_stat() @{
 @return DB_SUCCESS or error code */
 static
-enum db_err
+dberr_t
 dict_stats_save_index_stat(
 /*=======================*/
 	dict_index_t*	index,		/*!< in: index */
@@ -1494,95 +2120,114 @@ dict_stats_save_index_stat(
 	const char*	stat_name,	/*!< in: name of the stat */
 	ib_uint64_t	stat_value,	/*!< in: value of the stat */
 	ib_uint64_t*	sample_size,	/*!< in: n pages sampled or NULL */
-	const char*	stat_description,/*!< in: description of the stat */
-	trx_t*		trx,		/*!< in/out: transaction to use */
-	ibool		caller_has_dict_sys_mutex)/*!< in: TRUE if the caller
-					owns dict_sys->mutex */
+	const char*	stat_description)/*!< in: description of the stat */
 {
 	pars_info_t*	pinfo;
-	enum db_err	ret;
+	dberr_t		ret;
+	char		db_utf8[MAX_DB_UTF8_LEN];
+	char		table_utf8[MAX_TABLE_UTF8_LEN];
 
-	pinfo = pars_info_create();
-
-	pars_info_add_literal(pinfo, "database_name", index->table->name,
-			      dict_get_db_name_len(index->table->name),
-			      DATA_VARCHAR, 0);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(mutex_own(&dict_sys->mutex));
 
-	pars_info_add_str_literal(pinfo, "table_name",
-				  dict_remove_db_name(index->table->name));
+	dict_fs2utf8(index->table->name, db_utf8, sizeof(db_utf8),
+		     table_utf8, sizeof(table_utf8));
 
+	pinfo = pars_info_create();
+	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
+	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
+	UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name));
 	pars_info_add_str_literal(pinfo, "index_name", index->name);
-
+	UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4);
 	pars_info_add_int4_literal(pinfo, "last_update", last_update);
-
+	UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name));
 	pars_info_add_str_literal(pinfo, "stat_name", stat_name);
-
+	UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8);
 	pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
-
 	if (sample_size != NULL) {
+		UNIV_MEM_ASSERT_RW_ABORT(sample_size, 8);
 		pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
 	} else {
 		pars_info_add_literal(pinfo, "sample_size", NULL,
 				      UNIV_SQL_NULL, DATA_FIXBINARY, 0);
 	}
-
+	UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description));
 	pars_info_add_str_literal(pinfo, "stat_description",
 				  stat_description);
 
-	ret = que_eval_sql(pinfo,
-			   "PROCEDURE INDEX_STATS_SAVE () IS\n"
-			   "dummy CHAR;\n"
-			   "BEGIN\n"
-
-			   "SELECT database_name INTO dummy\n"
-			   "FROM \"" INDEX_STATS_NAME "\"\n"
-			   "WHERE\n"
-			   "database_name = :database_name AND\n"
-			   "table_name = :table_name AND\n"
-			   "index_name = :index_name AND\n"
-			   "stat_name = :stat_name\n"
-			   "FOR UPDATE;\n"
-
-			   "IF (SQL % NOTFOUND) THEN\n"
-			   "  INSERT INTO \"" INDEX_STATS_NAME "\"\n"
-			   "  VALUES\n"
-			   "  (\n"
-			   "  :database_name,\n"
-			   "  :table_name,\n"
-			   "  :index_name,\n"
-			   "  :last_update,\n"
-			   "  :stat_name,\n"
-			   "  :stat_value,\n"
-			   "  :sample_size,\n"
-			   "  :stat_description\n"
-			   "  );\n"
-			   "ELSE\n"
-			   "  UPDATE \"" INDEX_STATS_NAME "\" SET\n"
-			   "  last_update = :last_update,\n"
-			   "  stat_value = :stat_value,\n"
-			   "  sample_size = :sample_size,\n"
-			   "  stat_description = :stat_description\n"
-			   "  WHERE\n"
-			   "  database_name = :database_name AND\n"
-			   "  table_name = :table_name AND\n"
-			   "  index_name = :index_name AND\n"
-			   "  stat_name = :stat_name;\n"
-			   "END IF;\n"
-			   "END;",
-		!caller_has_dict_sys_mutex, trx);
-
-	/* pinfo is freed by que_eval_sql() */
+	ret = dict_stats_exec_sql(
+		pinfo,
+		"PROCEDURE INDEX_STATS_SAVE_INSERT () IS\n"
+		"BEGIN\n"
+		"INSERT INTO \"" INDEX_STATS_NAME "\"\n"
+		"VALUES\n"
+		"(\n"
+		":database_name,\n"
+		":table_name,\n"
+		":index_name,\n"
+		":last_update,\n"
+		":stat_name,\n"
+		":stat_value,\n"
+		":sample_size,\n"
+		":stat_description\n"
+		");\n"
+		"END;");
+
+	if (ret == DB_DUPLICATE_KEY) {
+
+		pinfo = pars_info_create();
+		pars_info_add_str_literal(pinfo, "database_name", db_utf8);
+		pars_info_add_str_literal(pinfo, "table_name", table_utf8);
+		UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name));
+		pars_info_add_str_literal(pinfo, "index_name", index->name);
+		UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4);
+		pars_info_add_int4_literal(pinfo, "last_update", last_update);
+		UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name));
+		pars_info_add_str_literal(pinfo, "stat_name", stat_name);
+		UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8);
+		pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
+		if (sample_size != NULL) {
+			UNIV_MEM_ASSERT_RW_ABORT(sample_size, 8);
+			pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
+		} else {
+			pars_info_add_literal(pinfo, "sample_size", NULL,
+					      UNIV_SQL_NULL, DATA_FIXBINARY, 0);
+		}
+		UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description));
+		pars_info_add_str_literal(pinfo, "stat_description",
+					  stat_description);
+
+		ret = dict_stats_exec_sql(
+			pinfo,
+			"PROCEDURE INDEX_STATS_SAVE_UPDATE () IS\n"
+			"BEGIN\n"
+			"UPDATE \"" INDEX_STATS_NAME "\" SET\n"
+			"last_update = :last_update,\n"
+			"stat_value = :stat_value,\n"
+			"sample_size = :sample_size,\n"
+			"stat_description = :stat_description\n"
+			"WHERE\n"
+			"database_name = :database_name AND\n"
+			"table_name = :table_name AND\n"
+			"index_name = :index_name AND\n"
+			"stat_name = :stat_name;\n"
+			"END;");
+	}
 
 	if (ret != DB_SUCCESS) {
+		char	buf_table[MAX_FULL_NAME_LEN];
+		char	buf_index[MAX_FULL_NAME_LEN];
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
-			" InnoDB: Error while trying to save index "
-			"statistics for table %s, index %s, "
-			"stat name %s: %s\n",
-			index->table->name, index->name,
+			" InnoDB: Cannot save index statistics for table "
+			"%s, index %s, stat name \"%s\": %s\n",
+			ut_format_name(index->table->name, TRUE,
+				       buf_table, sizeof(buf_table)),
+			ut_format_name(index->name, FALSE,
+				       buf_index, sizeof(buf_index)),
 			stat_name, ut_strerr(ret));
-
-		trx->error_state = DB_SUCCESS;
 	}
 
 	return(ret);
@@ -1594,196 +2239,165 @@ Save the table's statistics into the persistent statistics storage.
 dict_stats_save() @{
 @return DB_SUCCESS or error code */
 static
-enum db_err
+dberr_t
 dict_stats_save(
 /*============*/
-	dict_table_t*	table,		/*!< in: table */
-	ibool		caller_has_dict_sys_mutex)/*!< in: TRUE if the caller
-					owns dict_sys->mutex */
+	dict_table_t*	table_orig)	/*!< in: table */
 {
-	trx_t*		trx;
 	pars_info_t*	pinfo;
-	dict_index_t*	index;
 	lint		now;
-	enum db_err	ret;
+	dberr_t		ret;
+	dict_table_t*	table;
+	char		db_utf8[MAX_DB_UTF8_LEN];
+	char		table_utf8[MAX_TABLE_UTF8_LEN];
+
+	table = dict_stats_snapshot_create(table_orig);
+
+	dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+		     table_utf8, sizeof(table_utf8));
+
+	rw_lock_x_lock(&dict_operation_lock);
+	mutex_enter(&dict_sys->mutex);
 
 	/* MySQL's timestamp is 4 byte, so we use
 	pars_info_add_int4_literal() which takes a lint arg, so "now" is
 	lint */
 	now = (lint) ut_time();
 
-	trx = trx_allocate_for_background();
-
-	/* Use 'read-uncommitted' so that the SELECTs we execute
-	do not get blocked in case some user has locked the rows we
-	are SELECTing */
-
-	trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
-
-	trx_start_if_not_started(trx);
+#define PREPARE_PINFO_FOR_TABLE_SAVE(p, t, n)				\
+	do {								\
+	pars_info_add_str_literal((p), "database_name", db_utf8);	\
+	pars_info_add_str_literal((p), "table_name", table_utf8);	\
+	pars_info_add_int4_literal((p), "last_update", (n));		\
+	pars_info_add_ull_literal((p), "n_rows", (t)->stat_n_rows);	\
+	pars_info_add_ull_literal((p), "clustered_index_size",		\
+		(t)->stat_clustered_index_size);			\
+	pars_info_add_ull_literal((p), "sum_of_other_index_sizes",	\
+		(t)->stat_sum_of_other_index_sizes);			\
+	} while(false);
 
 	pinfo = pars_info_create();
 
-	pars_info_add_literal(pinfo, "database_name", table->name,
-			      dict_get_db_name_len(table->name),
-			      DATA_VARCHAR, 0);
-
-	pars_info_add_str_literal(pinfo, "table_name",
-				  dict_remove_db_name(table->name));
-
-	pars_info_add_int4_literal(pinfo, "last_update", now);
-
-	pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
-
-	pars_info_add_ull_literal(pinfo, "clustered_index_size",
-				     table->stat_clustered_index_size);
-
-	pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
-				     table->stat_sum_of_other_index_sizes);
-
-	ret = que_eval_sql(pinfo,
-			   "PROCEDURE TABLE_STATS_SAVE () IS\n"
-			   "dummy CHAR;\n"
-			   "BEGIN\n"
-
-			   "SELECT database_name INTO dummy\n"
-			   "FROM \"" TABLE_STATS_NAME "\"\n"
-			   "WHERE\n"
-			   "database_name = :database_name AND\n"
-			   "table_name = :table_name\n"
-			   "FOR UPDATE;\n"
-
-			   "IF (SQL % NOTFOUND) THEN\n"
-			   "  INSERT INTO \"" TABLE_STATS_NAME "\"\n"
-			   "  VALUES\n"
-			   "  (\n"
-			   "  :database_name,\n"
-			   "  :table_name,\n"
-			   "  :last_update,\n"
-			   "  :n_rows,\n"
-			   "  :clustered_index_size,\n"
-			   "  :sum_of_other_index_sizes\n"
-			   "  );\n"
-			   "ELSE\n"
-			   "  UPDATE \"" TABLE_STATS_NAME "\" SET\n"
-			   "  last_update = :last_update,\n"
-			   "  n_rows = :n_rows,\n"
-			   "  clustered_index_size = :clustered_index_size,\n"
-			   "  sum_of_other_index_sizes = "
-			   "    :sum_of_other_index_sizes\n"
-			   "  WHERE\n"
-			   "  database_name = :database_name AND\n"
-			   "  table_name = :table_name;\n"
-			   "END IF;\n"
-			   "END;",
-			   !caller_has_dict_sys_mutex, trx);
-
-	/* pinfo is freed by que_eval_sql() */
+	PREPARE_PINFO_FOR_TABLE_SAVE(pinfo, table, now);
+
+	ret = dict_stats_exec_sql(
+		pinfo,
+		"PROCEDURE TABLE_STATS_SAVE_INSERT () IS\n"
+		"BEGIN\n"
+		"INSERT INTO \"" TABLE_STATS_NAME "\"\n"
+		"VALUES\n"
+		"(\n"
+		":database_name,\n"
+		":table_name,\n"
+		":last_update,\n"
+		":n_rows,\n"
+		":clustered_index_size,\n"
+		":sum_of_other_index_sizes\n"
+		");\n"
+		"END;");
+
+	if (ret == DB_DUPLICATE_KEY) {
+		pinfo = pars_info_create();
+
+		PREPARE_PINFO_FOR_TABLE_SAVE(pinfo, table, now);
+
+		ret = dict_stats_exec_sql(
+			pinfo,
+			"PROCEDURE TABLE_STATS_SAVE_UPDATE () IS\n"
+			"BEGIN\n"
+			"UPDATE \"" TABLE_STATS_NAME "\" SET\n"
+			"last_update = :last_update,\n"
+			"n_rows = :n_rows,\n"
+			"clustered_index_size = :clustered_index_size,\n"
+			"sum_of_other_index_sizes = "
+			"  :sum_of_other_index_sizes\n"
+			"WHERE\n"
+			"database_name = :database_name AND\n"
+			"table_name = :table_name;\n"
+			"END;");
+	}
 
 	if (ret != DB_SUCCESS) {
-
+		char	buf[MAX_FULL_NAME_LEN];
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
-			" InnoDB: Error while trying to save table "
-			"statistics for table %s: %s\n",
-			table->name, ut_strerr(ret));
-
-		goto end_rollback;
+			" InnoDB: Cannot save table statistics for table "
+			"%s: %s\n",
+			ut_format_name(table->name, TRUE, buf, sizeof(buf)),
+			ut_strerr(ret));
+		goto end;
 	}
 
+	dict_index_t*	index;
+
 	for (index = dict_table_get_first_index(table);
 	     index != NULL;
 	     index = dict_table_get_next_index(index)) {
 
-		ib_uint64_t	stat_n_diff_key_vals[REC_MAX_N_FIELDS];
-		ib_uint64_t	stat_n_sample_sizes[REC_MAX_N_FIELDS];
-		ulint		n_uniq;
-		ulint		i;
+		if (dict_stats_should_ignore_index(index)) {
+			continue;
+		}
+
+		ut_ad(!dict_index_is_univ(index));
 
 		ret = dict_stats_save_index_stat(index, now, "size",
 						 index->stat_index_size,
 						 NULL,
 						 "Number of pages "
-						 "in the index",
-						 trx,
-						 caller_has_dict_sys_mutex);
+						 "in the index");
 		if (ret != DB_SUCCESS) {
-			goto end_rollback;
+			goto end;
 		}
 
 		ret = dict_stats_save_index_stat(index, now, "n_leaf_pages",
 						 index->stat_n_leaf_pages,
 						 NULL,
 						 "Number of leaf pages "
-						 "in the index",
-						 trx,
-						 caller_has_dict_sys_mutex);
+						 "in the index");
 		if (ret != DB_SUCCESS) {
-			goto end_rollback;
+			goto end;
 		}
 
-		n_uniq = dict_index_get_n_unique(index);
-
-		ut_ad(n_uniq + 1 <= UT_ARR_SIZE(stat_n_diff_key_vals));
-
-		memcpy(stat_n_diff_key_vals, index->stat_n_diff_key_vals,
-		       (n_uniq + 1) * sizeof(index->stat_n_diff_key_vals[0]));
-
-		ut_ad(n_uniq + 1 <= UT_ARR_SIZE(stat_n_sample_sizes));
-
-		memcpy(stat_n_sample_sizes, index->stat_n_sample_sizes,
-		       (n_uniq + 1) * sizeof(index->stat_n_sample_sizes[0]));
-
-		for (i = 1; i <= n_uniq; i++) {
+		for (ulint i = 0; i < index->n_uniq; i++) {
 
 			char	stat_name[16];
 			char	stat_description[1024];
 			ulint	j;
 
 			ut_snprintf(stat_name, sizeof(stat_name),
-				    "n_diff_pfx%02lu", i);
+				    "n_diff_pfx%02lu", i + 1);
 
 			/* craft a string that contains the columns names */
 			ut_snprintf(stat_description,
 				    sizeof(stat_description),
 				    "%s", index->fields[0].name);
-			for (j = 2; j <= i; j++) {
+			for (j = 1; j <= i; j++) {
 				size_t	len;
 
 				len = strlen(stat_description);
 
 				ut_snprintf(stat_description + len,
 					    sizeof(stat_description) - len,
-					    ",%s", index->fields[j - 1].name);
+					    ",%s", index->fields[j].name);
 			}
 
 			ret = dict_stats_save_index_stat(
 				index, now, stat_name,
-				stat_n_diff_key_vals[i],
-				&stat_n_sample_sizes[i],
-				stat_description, trx,
-				caller_has_dict_sys_mutex);
+				index->stat_n_diff_key_vals[i],
+				&index->stat_n_sample_sizes[i],
+				stat_description);
 
 			if (ret != DB_SUCCESS) {
-				goto end_rollback;
+				goto end;
 			}
 		}
 	}
 
-	trx_commit_for_mysql(trx);
-	ret = DB_SUCCESS;
-	goto end_free;
-
-end_rollback:
-
-	trx->op_info = "rollback of internal transaction on stats tables";
-	trx_rollback_to_savepoint(trx, NULL);
-	trx->op_info = "";
-	ut_a(trx->error_state == DB_SUCCESS);
+end:
+	mutex_exit(&dict_sys->mutex);
+	rw_lock_x_unlock(&dict_operation_lock);
 
-end_free:
-
-	trx_free_for_background(trx);
+	dict_stats_snapshot_free(table);
 
 	return(ret);
 }
@@ -1875,11 +2489,11 @@ dict_stats_fetch_table_stats_step(
 
 /** Aux struct used to pass a table and a boolean to
 dict_stats_fetch_index_stats_step(). */
-typedef struct index_fetch_struct {
+struct index_fetch_t {
 	dict_table_t*	table;	/*!< table whose indexes are to be modified */
-	ibool		stats_were_modified; /*!< will be set to TRUE if at
+	bool		stats_were_modified; /*!< will be set to true if at
 				least one index stats were modified */
-} index_fetch_t;
+};
 
 /*********************************************************************//**
 Called for the rows that are selected by
@@ -2036,12 +2650,12 @@ dict_stats_fetch_index_stats_step(
 	if (stat_name_len == 4 /* strlen("size") */
 	    && strncasecmp("size", stat_name, stat_name_len) == 0) {
 		index->stat_index_size = (ulint) stat_value;
-		arg->stats_were_modified = TRUE;
+		arg->stats_were_modified = true;
 	} else if (stat_name_len == 12 /* strlen("n_leaf_pages") */
 		   && strncasecmp("n_leaf_pages", stat_name, stat_name_len)
 		   == 0) {
 		index->stat_n_leaf_pages = (ulint) stat_value;
-		arg->stats_were_modified = TRUE;
+		arg->stats_were_modified = true;
 	} else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
 		   && strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
 
@@ -2057,19 +2671,24 @@ dict_stats_fetch_index_stats_step(
 		    || num_ptr[0] < '0' || num_ptr[0] > '9'
 		    || num_ptr[1] < '0' || num_ptr[1] > '9') {
 
+			char	db_utf8[MAX_DB_UTF8_LEN];
+			char	table_utf8[MAX_TABLE_UTF8_LEN];
+
+			dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+				     table_utf8, sizeof(table_utf8));
+
 			ut_print_timestamp(stderr);
 			fprintf(stderr,
 				" InnoDB: Ignoring strange row from "
 				"%s WHERE "
-				"database_name = '%.*s' AND "
+				"database_name = '%s' AND "
 				"table_name = '%s' AND "
 				"index_name = '%s' AND "
 				"stat_name = '%.*s'; because stat_name "
 				"is malformed\n",
 				INDEX_STATS_NAME_PRINT,
-				(int) dict_get_db_name_len(table->name),
-				table->name,
-				dict_remove_db_name(table->name),
+				db_utf8,
+				table_utf8,
 				index->name,
 				(int) stat_name_len,
 				stat_name);
@@ -2081,41 +2700,50 @@ dict_stats_fetch_index_stats_step(
 		note that stat_name does not have a terminating '\0' */
 		n_pfx = (num_ptr[0] - '0') * 10 + (num_ptr[1] - '0');
 
-		if (n_pfx == 0 || n_pfx > dict_index_get_n_unique(index)) {
+		ulint	n_uniq = index->n_uniq;
+
+		if (n_pfx == 0 || n_pfx > n_uniq) {
+
+			char	db_utf8[MAX_DB_UTF8_LEN];
+			char	table_utf8[MAX_TABLE_UTF8_LEN];
+
+			dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+				     table_utf8, sizeof(table_utf8));
 
 			ut_print_timestamp(stderr);
 			fprintf(stderr,
 				" InnoDB: Ignoring strange row from "
 				"%s WHERE "
-				"database_name = '%.*s' AND "
+				"database_name = '%s' AND "
 				"table_name = '%s' AND "
 				"index_name = '%s' AND "
 				"stat_name = '%.*s'; because stat_name is "
 				"out of range, the index has %lu unique "
 				"columns\n",
 				INDEX_STATS_NAME_PRINT,
-				(int) dict_get_db_name_len(table->name),
-				table->name,
-				dict_remove_db_name(table->name),
+				db_utf8,
+				table_utf8,
 				index->name,
 				(int) stat_name_len,
 				stat_name,
-				dict_index_get_n_unique(index));
+				n_uniq);
 			return(TRUE);
 		}
 		/* else */
 
-		index->stat_n_diff_key_vals[n_pfx] = stat_value;
+		index->stat_n_diff_key_vals[n_pfx - 1] = stat_value;
 
 		if (sample_size != UINT64_UNDEFINED) {
-			index->stat_n_sample_sizes[n_pfx] = sample_size;
+			index->stat_n_sample_sizes[n_pfx - 1] = sample_size;
 		} else {
 			/* hmm, strange... the user must have UPDATEd the
 			table manually and SET sample_size = NULL */
-			index->stat_n_sample_sizes[n_pfx] = 0;
+			index->stat_n_sample_sizes[n_pfx - 1] = 0;
 		}
 
-		arg->stats_were_modified = TRUE;
+		index->stat_n_non_null_key_vals[n_pfx - 1] = 0;
+
+		arg->stats_were_modified = true;
 	} else {
 		/* silently ignore rows with unknown stat_name, the
 		user may have developed her own stats */
@@ -2131,19 +2759,25 @@ Read table's statistics from the persistent statistics storage.
 dict_stats_fetch_from_ps() @{
 @return DB_SUCCESS or error code */
 static
-enum db_err
+dberr_t
 dict_stats_fetch_from_ps(
 /*=====================*/
-	dict_table_t*	table,		/*!< in/out: table */
-	ibool		caller_has_dict_sys_mutex)/*!< in: TRUE if the caller
-					owns dict_sys->mutex */
+	dict_table_t*	table)	/*!< in/out: table */
 {
 	index_fetch_t	index_fetch_arg;
 	trx_t*		trx;
 	pars_info_t*	pinfo;
-	enum db_err	ret;
+	dberr_t		ret;
+	char		db_utf8[MAX_DB_UTF8_LEN];
+	char		table_utf8[MAX_TABLE_UTF8_LEN];
+
+	ut_ad(!mutex_own(&dict_sys->mutex));
 
-	ut_ad(mutex_own(&dict_sys->mutex) == caller_has_dict_sys_mutex);
+	/* Initialize all stats to dummy values before fetching because if
+	the persistent storage contains incomplete stats (e.g. missing stats
+	for some index) then we would end up with (partially) uninitialized
+	stats. */
+	dict_stats_empty_table(table);
 
 	trx = trx_allocate_for_background();
 
@@ -2155,14 +2789,14 @@ dict_stats_fetch_from_ps(
 
 	trx_start_if_not_started(trx);
 
+	dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+		     table_utf8, sizeof(table_utf8));
+
 	pinfo = pars_info_create();
 
-	pars_info_add_literal(pinfo, "database_name", table->name,
-			      dict_get_db_name_len(table->name),
-			      DATA_VARCHAR, 0);
+	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
 
-	pars_info_add_str_literal(pinfo, "table_name",
-				  dict_remove_db_name(table->name));
+	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
 
 	pars_info_bind_function(pinfo,
 			       "fetch_table_stats_step",
@@ -2170,7 +2804,7 @@ dict_stats_fetch_from_ps(
 			       table);
 
 	index_fetch_arg.table = table;
-	index_fetch_arg.stats_were_modified = FALSE;
+	index_fetch_arg.stats_were_modified = false;
 	pars_info_bind_function(pinfo,
 			        "fetch_index_stats_step",
 			        dict_stats_fetch_index_stats_step,
@@ -2230,19 +2864,9 @@ dict_stats_fetch_from_ps(
 			   "CLOSE index_stats_cur;\n"
 
 			   "END;",
-			   !caller_has_dict_sys_mutex, trx);
-
+			   TRUE, trx);
 	/* pinfo is freed by que_eval_sql() */
 
-	/* XXX If mysql.innodb_index_stats contained less rows than the number
-	of indexes in the table, then some of the indexes of the table
-	were left uninitialized. Currently this is ignored and those
-	indexes are left with uninitialized stats until ANALYZE TABLE is
-	run. This condition happens when the user creates a new index
-	on a table. We could return DB_STATS_DO_NOT_EXIST from here,
-	forcing the usage of transient stats until mysql.innodb_index_stats
-	is complete. */
-
 	trx_commit_for_mysql(trx);
 
 	trx_free_for_background(trx);
@@ -2256,32 +2880,67 @@ dict_stats_fetch_from_ps(
 /* @} */
 
 /*********************************************************************//**
+Fetches or calculates new estimates for index statistics.
+dict_stats_update_for_index() @{ */
+UNIV_INTERN
+void
+dict_stats_update_for_index(
+/*========================*/
+	dict_index_t*	index)	/*!< in/out: index */
+{
+	ut_ad(!mutex_own(&dict_sys->mutex));
+
+	if (dict_stats_is_persistent_enabled(index->table)) {
+
+		if (dict_stats_persistent_storage_check(false)) {
+			dict_table_stats_lock(index->table, RW_X_LATCH);
+			dict_stats_analyze_index(index);
+			dict_table_stats_unlock(index->table, RW_X_LATCH);
+			dict_stats_save(index->table);
+			return;
+		}
+		/* else */
+
+		/* Fall back to transient stats since the persistent
+		storage is not present or is corrupted */
+		char	buf_table[MAX_FULL_NAME_LEN];
+		char	buf_index[MAX_FULL_NAME_LEN];
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			" InnoDB: Recalculation of persistent statistics "
+			"requested for table %s index %s but the required "
+			"persistent statistics storage is not present or is "
+			"corrupted. Using transient stats instead.\n",
+			ut_format_name(index->table->name, TRUE,
+				       buf_table, sizeof(buf_table)),
+			ut_format_name(index->name, FALSE,
+				       buf_index, sizeof(buf_index)));
+	}
+
+	dict_table_stats_lock(index->table, RW_X_LATCH);
+	dict_stats_update_transient_for_index(index);
+	dict_table_stats_unlock(index->table, RW_X_LATCH);
+}
+/* @} */
+
+/*********************************************************************//**
 Calculates new estimates for table and index statistics. The statistics
 are used in query optimization.
-dict_stats_update() @{
-@return DB_* error code or DB_SUCCESS */
+@return DB_SUCCESS or error code */
 UNIV_INTERN
-enum db_err
+dberr_t
 dict_stats_update(
 /*==============*/
 	dict_table_t*		table,	/*!< in/out: table */
-	dict_stats_upd_option_t	stats_upd_option,
+	dict_stats_upd_option_t	stats_upd_option)
 					/*!< in: whether to (re) calc
 					the stats or to fetch them from
 					the persistent statistics
 					storage */
-	ibool			caller_has_dict_sys_mutex)
-					/*!< in: TRUE if the caller
-					owns dict_sys->mutex */
 {
-	enum db_err	ret = DB_ERROR;
+	char			buf[MAX_FULL_NAME_LEN];
 
-	/* check whether caller_has_dict_sys_mutex is set correctly;
-	note that mutex_own() is not implemented in non-debug code so
-	we cannot avoid having this extra param to the current function */
-	ut_ad(caller_has_dict_sys_mutex
-	      ? mutex_own(&dict_sys->mutex)
-	      : !mutex_own(&dict_sys->mutex));
+	ut_ad(!mutex_own(&dict_sys->mutex));
 
 	if (table->ibd_file_missing) {
 		ut_print_timestamp(stderr);
@@ -2289,83 +2948,61 @@ dict_stats_update(
 			" InnoDB: cannot calculate statistics for table %s "
 			"because the .ibd file is missing. For help, please "
 			"refer to " REFMAN "innodb-troubleshooting.html\n",
-			table->name);
-
+			ut_format_name(table->name, TRUE, buf, sizeof(buf)));
+		dict_stats_empty_table(table);
 		return(DB_TABLESPACE_DELETED);
-	}
-
-	/* If we have set a high innodb_force_recovery level, do not calculate
-	statistics, as a badly corrupted index can cause a crash in it. */
-
-	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
-
+	} else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+		/* If we have set a high innodb_force_recovery level, do
+		not calculate statistics, as a badly corrupted index can
+		cause a crash in it. */
+		dict_stats_empty_table(table);
 		return(DB_SUCCESS);
 	}
 
 	switch (stats_upd_option) {
 	case DICT_STATS_RECALC_PERSISTENT:
-	case DICT_STATS_RECALC_PERSISTENT_SILENT:
+
+		ut_ad(!srv_read_only_mode);
+
 		/* Persistent recalculation requested, called from
-		ANALYZE TABLE or from TRUNCATE TABLE */
-
-		/* FTS auxiliary tables do not need persistent stats */
-		if ((ut_strcount(table->name, "FTS") > 0
-		     && (ut_strcount(table->name, "CONFIG") > 0
-			 || ut_strcount(table->name, "INDEX") > 0
-			 || ut_strcount(table->name, "DELETED") > 0
-			 || ut_strcount(table->name, "DOC_ID") > 0
-			 || ut_strcount(table->name, "ADDED") > 0))) {
-			goto transient;
-		}
+		1) ANALYZE TABLE, or
+		2) the auto recalculation background thread, or
+		3) open table if stats do not exist on disk and auto recalc
+		   is enabled */
+
+		/* InnoDB internal tables (e.g. SYS_TABLES) cannot have
+		persistent stats enabled */
+		ut_a(strchr(table->name, '/') != NULL);
 
 		/* check if the persistent statistics storage exists
 		before calling the potentially slow function
 		dict_stats_update_persistent(); that is a
 		prerequisite for dict_stats_save() succeeding */
-		if (dict_stats_persistent_storage_check(
-				caller_has_dict_sys_mutex)) {
-
-			dict_table_stats_lock(table, RW_X_LATCH);
+		if (dict_stats_persistent_storage_check(false)) {
 
-			ret = dict_stats_update_persistent(table);
+			dberr_t	err;
 
-			/* XXX Currently dict_stats_save() would read the
-			stats from the table without dict_table_stats_lock()
-			which means it could save inconsistent data on the
-			disk. This is because we must call
-			dict_table_stats_lock() after locking dict_sys->mutex.
-			A solution is to copy here the stats to a temporary
-			buffer while holding the _stats_lock(), release it,
-			and pass that buffer to dict_stats_save(). */
+			err = dict_stats_update_persistent(table);
 
-			dict_table_stats_unlock(table, RW_X_LATCH);
-
-			if (ret == DB_SUCCESS) {
-				ret = dict_stats_save(
-					table,
-					caller_has_dict_sys_mutex);
+			if (err != DB_SUCCESS) {
+				return(err);
 			}
 
-			return(ret);
+			err = dict_stats_save(table);
+
+			return(err);
 		}
-		/* else */
 
 		/* Fall back to transient stats since the persistent
 		storage is not present or is corrupted */
 
-		if (stats_upd_option == DICT_STATS_RECALC_PERSISTENT) {
-
-			ut_print_timestamp(stderr);
-			/* XXX add link to the doc about storage
-			creation */
-			fprintf(stderr,
-				" InnoDB: Recalculation of persistent "
-				"statistics requested but the required "
-				"persistent statistics storage is not "
-				"present or is corrupted. "
-				"Using quick transient stats "
-				"instead.\n");
-		}
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			" InnoDB: Recalculation of persistent statistics "
+			"requested for table %s but the required persistent "
+			"statistics storage is not present or is corrupted. "
+			"Using transient stats instead.\n",
+			ut_format_name(table->name, TRUE, buf, sizeof(buf)));
 
 		goto transient;
 
@@ -2373,265 +3010,317 @@ dict_stats_update(
 
 		goto transient;
 
-	case DICT_STATS_FETCH:
-	case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
-		/* fetch requested, either fetch from persistent statistics
-		storage or use the old method */
+	case DICT_STATS_EMPTY_TABLE:
 
-		dict_table_stats_lock(table, RW_X_LATCH);
+		dict_stats_empty_table(table);
 
-		if (stats_upd_option == DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY
-		    && table->stat_initialized) {
+		/* If table is using persistent stats,
+		then save the stats on disk */
 
-			dict_table_stats_unlock(table, RW_X_LATCH);
-			return(DB_SUCCESS);
+		if (dict_stats_is_persistent_enabled(table)) {
+
+			if (dict_stats_persistent_storage_check(false)) {
+
+				return(dict_stats_save(table));
+			}
+
+			return(DB_STATS_DO_NOT_EXIST);
 		}
-		/* else */
 
-		/* Must unlock because otherwise there is a lock order
-		violation with dict_sys->mutex below. Declare stats to be
-		initialized before unlocking. */
-		table->stat_initialized = TRUE;
-		dict_table_stats_unlock(table, RW_X_LATCH);
+		return(DB_SUCCESS);
+
+	case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
 
-		if (strchr(table->name, '/') == NULL
-		    || strcmp(table->name, INDEX_STATS_NAME) == 0
-		    || strcmp(table->name, TABLE_STATS_NAME) == 0
-		    || (ut_strcount(table->name, "FTS") > 0
-		        && (ut_strcount(table->name, "CONFIG") > 0
-			    || ut_strcount(table->name, "INDEX") > 0
-			    || ut_strcount(table->name, "DELETED") > 0
-			    || ut_strcount(table->name, "DOC_ID") > 0
-			    || ut_strcount(table->name, "ADDED") > 0))) {
-			/* Use the quick transient stats method for
-			InnoDB internal tables, because we know the
-			persistent stats storage does not contain data
-			for them */
+		/* fetch requested, either fetch from persistent statistics
+		storage or use the old method */
 
-			goto transient;
+		if (table->stat_initialized) {
+			return(DB_SUCCESS);
 		}
-		/* else */
 
-		if (dict_stats_persistent_storage_check(
-			caller_has_dict_sys_mutex)) {
+		/* InnoDB internal tables (e.g. SYS_TABLES) cannot have
+		persistent stats enabled */
+		ut_a(strchr(table->name, '/') != NULL);
 
-			ret = dict_stats_fetch_from_ps(table,
-				caller_has_dict_sys_mutex);
+		if (!dict_stats_persistent_storage_check(false)) {
+			/* persistent statistics storage does not exist
+			or is corrupted, calculate the transient stats */
 
-			if (ret == DB_STATS_DO_NOT_EXIST
-			    || (ret != DB_SUCCESS && stats_upd_option
-				== DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY)) {
-				/* Stats for this particular table do not
-				exist or we have been called from open table
-				which needs to initialize the stats,
-				calculate the quick transient statistics */
-				goto transient;
-			}
-			/* else */
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				" InnoDB: Error: Fetch of persistent "
+				"statistics requested for table %s but the "
+				"required system tables %s and %s are not "
+				"present or have unexpected structure. "
+				"Using transient stats instead.\n",
+				ut_format_name(table->name, TRUE,
+					       buf, sizeof(buf)),
+				TABLE_STATS_NAME_PRINT,
+				INDEX_STATS_NAME_PRINT);
 
-			return(ret);
-		} else {
-			/* persistent statistics storage does not exist,
-			calculate the transient stats */
 			goto transient;
 		}
 
-		break;
+		dict_table_t*	t;
 
-	/* no "default:" in order to produce a compilation warning
-	about unhandled enumeration value */
-	}
+		ut_ad(!srv_read_only_mode);
 
-transient:
+		/* Create a dummy table object with the same name and
+		indexes, suitable for fetching the stats into it. */
+		t = dict_stats_table_clone_create(table);
 
-	dict_table_stats_lock(table, RW_X_LATCH);
+		dberr_t	err = dict_stats_fetch_from_ps(t);
 
-	dict_stats_update_transient(table);
+		t->stats_last_recalc = table->stats_last_recalc;
+		t->stat_modified_counter = 0;
 
-	dict_table_stats_unlock(table, RW_X_LATCH);
+		switch (err) {
+		case DB_SUCCESS:
 
-	return(DB_SUCCESS);
-}
-/* @} */
+			dict_table_stats_lock(table, RW_X_LATCH);
 
-/*********************************************************************//**
-Close the stats tables. Should always be called after successful
-dict_stats_open(). It will free the dict_stats handle.
-dict_stats_close() @{ */
-UNIV_INLINE
-void
-dict_stats_close(
-/*=============*/
-	dict_stats_t*	dict_stats)	/*!< in/own: Handle to open
-					statistics tables */
-{
-	if (dict_stats->table_stats != NULL) {
-		dict_table_close(dict_stats->table_stats, FALSE);
-		dict_stats->table_stats = NULL;
-	}
+			/* Initialize all stats to dummy values before
+			copying because dict_stats_table_clone_create() does
+			skip corrupted indexes so our dummy object 't' may
+			have less indexes than the real object 'table'. */
+			dict_stats_empty_table(table);
 
-	if (dict_stats->index_stats != NULL) {
-		dict_table_close(dict_stats->index_stats, FALSE);
-		dict_stats->index_stats = NULL;
-	}
+			dict_stats_copy(table, t);
 
-	mem_free(dict_stats);
-}
-/* @} */
+			dict_stats_assert_initialized(table);
 
-/*********************************************************************//**
-Open stats tables to prevent these tables from being DROPped.
-Also check whether they have the correct structure. The caller
-must call dict_stats_close() when he has finished DMLing the tables.
-dict_stats_open() @{
-@return pointer to open tables or NULL on failure */
-UNIV_INLINE
-dict_stats_t*
-dict_stats_open(void)
-/*=================*/
-{
-	dict_stats_t*	dict_stats;
+			dict_table_stats_unlock(table, RW_X_LATCH);
+
+			dict_stats_table_clone_free(t);
+
+			return(DB_SUCCESS);
+		case DB_STATS_DO_NOT_EXIST:
+
+			dict_stats_table_clone_free(t);
 
-	dict_stats = static_cast<dict_stats_t*>(
-		mem_zalloc(sizeof(*dict_stats)));
+			if (dict_stats_auto_recalc_is_enabled(table)) {
+				return(dict_stats_update(
+						table,
+						DICT_STATS_RECALC_PERSISTENT));
+			}
 
-	dict_stats->table_stats = dict_table_open_on_name_no_stats(
-		TABLE_STATS_NAME, FALSE, DICT_ERR_IGNORE_NONE);
+			ut_format_name(table->name, TRUE, buf, sizeof(buf));
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				" InnoDB: Trying to use table %s which has "
+				"persistent statistics enabled, but auto "
+				"recalculation turned off and the statistics "
+				"do not exist in %s and %s. Please either run "
+				"\"ANALYZE TABLE %s;\" manually or enable the "
+				"auto recalculation with "
+				"\"ALTER TABLE %s STATS_AUTO_RECALC=1;\". "
+				"InnoDB will now use transient statistics for "
+				"%s.\n",
+				buf, TABLE_STATS_NAME, INDEX_STATS_NAME, buf,
+				buf, buf);
 
-	dict_stats->index_stats = dict_table_open_on_name_no_stats(
-		INDEX_STATS_NAME, FALSE, DICT_ERR_IGNORE_NONE);
+			goto transient;
+		default:
 
-	/* Check if the tables have the correct structure, if yes then
-	after this function we can safely DELETE from them without worrying
-	that they may get DROPped or DDLed because the open will have
-	increased the reference count. */
+			dict_stats_table_clone_free(t);
 
-	if (dict_stats->table_stats == NULL
-	    || dict_stats->index_stats == NULL
-	    || !dict_stats_persistent_storage_check(FALSE)) {
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				" InnoDB: Error fetching persistent statistics "
+				"for table %s from %s and %s: %s. "
+				"Using transient stats method instead.\n",
+				ut_format_name(table->name, TRUE, buf,
+					       sizeof(buf)),
+				TABLE_STATS_NAME,
+				INDEX_STATS_NAME,
+				ut_strerr(err));
 
-		/* There was an error, close the tables and free the handle. */
-		dict_stats_close(dict_stats);
-		dict_stats = NULL;
+			goto transient;
+		}
+	/* no "default:" in order to produce a compilation warning
+	about unhandled enumeration value */
 	}
 
-	return(dict_stats);
+transient:
+
+	dict_table_stats_lock(table, RW_X_LATCH);
+
+	dict_stats_update_transient(table);
+
+	dict_table_stats_unlock(table, RW_X_LATCH);
+
+	return(DB_SUCCESS);
 }
-/* @} */
 
 /*********************************************************************//**
 Removes the information for a particular index's stats from the persistent
 storage if it exists and if there is data stored for this index.
-The transaction is not committed, it must not be committed in this
-function because this is the user trx that is running DROP INDEX.
-The transaction will be committed at the very end when dropping an
-index.
+This function creates its own trx and commits it.
 A note from Marko why we cannot edit user and sys_* tables in one trx:
 marko: The problem is that ibuf merges should be disabled while we are
 rolling back dict transactions.
 marko: If ibuf merges are not disabled, we need to scan the *.ibd files.
 But we shouldn't open *.ibd files before we have rolled back dict
 transactions and opened the SYS_* records for the *.ibd files.
-dict_stats_delete_index_stats() @{
+dict_stats_drop_index() @{
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-enum db_err
-dict_stats_delete_index_stats(
-/*==========================*/
-	dict_index_t*	index,	/*!< in: index */
-	trx_t*		trx,	/*!< in: transaction to use */
+dberr_t
+dict_stats_drop_index(
+/*==================*/
+	const char*	db_and_table,/*!< in: db and table, e.g. 'db/table' */
+	const char*	iname,	/*!< in: index name */
 	char*		errstr, /*!< out: error message if != DB_SUCCESS
 				is returned */
 	ulint		errstr_sz)/*!< in: size of the errstr buffer */
 {
-	char		database_name[MAX_DATABASE_NAME_LEN + 1];
-	const char*	table_name;
+	char		db_utf8[MAX_DB_UTF8_LEN];
+	char		table_utf8[MAX_TABLE_UTF8_LEN];
 	pars_info_t*	pinfo;
-	enum db_err	ret;
-	dict_stats_t*	dict_stats;
-	void*		mysql_thd = trx->mysql_thd;
+	dberr_t		ret;
+
+	ut_ad(!mutex_own(&dict_sys->mutex));
 
 	/* skip indexes whose table names do not contain a database name
 	e.g. if we are dropping an index from SYS_TABLES */
-	if (strchr(index->table_name, '/') == NULL) {
-
-		return(DB_SUCCESS);
-	}
+	if (strchr(db_and_table, '/') == NULL) {
 
-	/* Increment table reference count to prevent the tables from
-	being DROPped just before que_eval_sql(). */
-	dict_stats = dict_stats_open();
-
-	if (dict_stats == NULL) {
-		/* stats tables do not exist or have unexpected structure */
 		return(DB_SUCCESS);
 	}
 
-	/* the stats tables cannot be DROPped now */
-
-	ut_snprintf(database_name, sizeof(database_name), "%.*s",
-		    (int) dict_get_db_name_len(index->table_name),
-		    index->table_name);
-
-	table_name = dict_remove_db_name(index->table_name);
+	dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
+		     table_utf8, sizeof(table_utf8));
 
 	pinfo = pars_info_create();
 
-	pars_info_add_str_literal(pinfo, "database_name", database_name);
+	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
 
-	pars_info_add_str_literal(pinfo, "table_name", table_name);
+	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
 
-	pars_info_add_str_literal(pinfo, "index_name", index->name);
+	pars_info_add_str_literal(pinfo, "index_name", iname);
 
-	/* Force lock wait timeout to be instantaneous because the incoming
-	transaction was created via MySQL. */
+	rw_lock_x_lock(&dict_operation_lock);
+	mutex_enter(&dict_sys->mutex);
 
-	mysql_thd = trx->mysql_thd;
-	trx->mysql_thd = NULL;
+	ret = dict_stats_exec_sql(
+		pinfo,
+		"PROCEDURE DROP_INDEX_STATS () IS\n"
+		"BEGIN\n"
+		"DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
+		"database_name = :database_name AND\n"
+		"table_name = :table_name AND\n"
+		"index_name = :index_name;\n"
+		"END;\n");
 
-	ret = que_eval_sql(pinfo,
-			   "PROCEDURE DROP_INDEX_STATS () IS\n"
-			   "BEGIN\n"
-			   "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
-			   "database_name = :database_name AND\n"
-			   "table_name = :table_name AND\n"
-			   "index_name = :index_name;\n"
-			   "END;\n",
-			   TRUE,
-			   trx);
-
-	trx->mysql_thd = mysql_thd;
-
-	/* pinfo is freed by que_eval_sql() */
+	mutex_exit(&dict_sys->mutex);
+	rw_lock_x_unlock(&dict_operation_lock);
 
-	/* do not to commit here, see the function's comment */
+	if (ret == DB_STATS_DO_NOT_EXIST) {
+		ret = DB_SUCCESS;
+	}
 
 	if (ret != DB_SUCCESS) {
-
 		ut_snprintf(errstr, errstr_sz,
 			    "Unable to delete statistics for index %s "
-			    "from %s%s. They can be deleted later using "
+			    "from %s%s: %s. They can be deleted later using "
 			    "DELETE FROM %s WHERE "
 			    "database_name = '%s' AND "
 			    "table_name = '%s' AND "
 			    "index_name = '%s';",
-			    index->name,
+			    iname,
 			    INDEX_STATS_NAME_PRINT,
 			    (ret == DB_LOCK_WAIT_TIMEOUT
 			     ? " because the rows are locked"
 			     : ""),
+			    ut_strerr(ret),
 			    INDEX_STATS_NAME_PRINT,
-			    database_name,
-			    table_name,
-			    index->name);
+			    db_utf8,
+			    table_utf8,
+			    iname);
 
 		ut_print_timestamp(stderr);
 		fprintf(stderr, " InnoDB: %s\n", errstr);
-
-		trx->error_state = DB_SUCCESS;
 	}
 
-	dict_stats_close(dict_stats);
+	return(ret);
+}
+/* @} */
+
+/*********************************************************************//**
+Executes
+DELETE FROM mysql.innodb_table_stats
+WHERE database_name = '...' AND table_name = '...';
+Creates its own transaction and commits it.
+dict_stats_delete_from_table_stats() @{
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+dberr_t
+dict_stats_delete_from_table_stats(
+/*===============================*/
+	const char*	database_name,	/*!< in: database name, e.g. 'db' */
+	const char*	table_name)	/*!< in: table name, e.g. 'table' */
+{
+	pars_info_t*	pinfo;
+	dberr_t		ret;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_STAT */
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	pinfo = pars_info_create();
+
+	pars_info_add_str_literal(pinfo, "database_name", database_name);
+	pars_info_add_str_literal(pinfo, "table_name", table_name);
+
+	ret = dict_stats_exec_sql(
+		pinfo,
+		"PROCEDURE DELETE_FROM_TABLE_STATS () IS\n"
+		"BEGIN\n"
+		"DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
+		"database_name = :database_name AND\n"
+		"table_name = :table_name;\n"
+		"END;\n");
+
+	return(ret);
+}
+/* @} */
+
+/*********************************************************************//**
+Executes
+DELETE FROM mysql.innodb_index_stats
+WHERE database_name = '...' AND table_name = '...';
+Creates its own transaction and commits it.
+dict_stats_delete_from_index_stats() @{
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+dberr_t
+dict_stats_delete_from_index_stats(
+/*===============================*/
+	const char*	database_name,	/*!< in: database name, e.g. 'db' */
+	const char*	table_name)	/*!< in: table name, e.g. 'table' */
+{
+	pars_info_t*	pinfo;
+	dberr_t		ret;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_STAT */
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	pinfo = pars_info_create();
+
+	pars_info_add_str_literal(pinfo, "database_name", database_name);
+	pars_info_add_str_literal(pinfo, "table_name", table_name);
+
+	ret = dict_stats_exec_sql(
+		pinfo,
+		"PROCEDURE DELETE_FROM_INDEX_STATS () IS\n"
+		"BEGIN\n"
+		"DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
+		"database_name = :database_name AND\n"
+		"table_name = :table_name;\n"
+		"END;\n");
 
 	return(ret);
 }
@@ -2640,130 +3329,332 @@ dict_stats_delete_index_stats(
 /*********************************************************************//**
 Removes the statistics for a table and all of its indexes from the
 persistent statistics storage if it exists and if there is data stored for
-the table.  This function creates its own transaction and commits it.
-dict_stats_delete_table_stats() @{
+the table. This function creates its own transaction and commits it.
+dict_stats_drop_table() @{
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-enum db_err
-dict_stats_delete_table_stats(
-/*==========================*/
-	const char*	table_name,	/*!< in: table name */
+dberr_t
+dict_stats_drop_table(
+/*==================*/
+	const char*	db_and_table,	/*!< in: db and table, e.g. 'db/table' */
 	char*		errstr,		/*!< out: error message
 					if != DB_SUCCESS is returned */
 	ulint		errstr_sz)	/*!< in: size of errstr buffer */
 {
-	char		database_name[MAX_DATABASE_NAME_LEN + 1];
-	const char*	table_name_strip; /* without leading db name */
-	trx_t*		trx;
-	pars_info_t*	pinfo;
-	enum db_err	ret = DB_ERROR;
-	dict_stats_t*	dict_stats;
+	char		db_utf8[MAX_DB_UTF8_LEN];
+	char		table_utf8[MAX_TABLE_UTF8_LEN];
+	dberr_t		ret;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_STAT */
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	/* skip tables that do not contain a database name
 	e.g. if we are dropping SYS_TABLES */
-	if (strchr(table_name, '/') == NULL) {
+	if (strchr(db_and_table, '/') == NULL) {
 
 		return(DB_SUCCESS);
 	}
 
 	/* skip innodb_table_stats and innodb_index_stats themselves */
-	if (strcmp(table_name, TABLE_STATS_NAME) == 0
-	    || strcmp(table_name, INDEX_STATS_NAME) == 0) {
+	if (strcmp(db_and_table, TABLE_STATS_NAME) == 0
+	    || strcmp(db_and_table, INDEX_STATS_NAME) == 0) {
 
 		return(DB_SUCCESS);
 	}
 
-	/* Create a new private trx */
+	dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
+		     table_utf8, sizeof(table_utf8));
 
-	trx = trx_allocate_for_background();
+	ret = dict_stats_delete_from_table_stats(db_utf8, table_utf8);
 
-	/* Use 'read-uncommitted' so that the SELECTs we execute
-	do not get blocked in case some user has locked the rows we
-	are SELECTing */
+	if (ret == DB_SUCCESS) {
+		ret = dict_stats_delete_from_index_stats(db_utf8, table_utf8);
+	}
 
-	trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
+	if (ret == DB_STATS_DO_NOT_EXIST) {
+		ret = DB_SUCCESS;
+	}
 
-	trx_start_if_not_started(trx);
+	if (ret != DB_SUCCESS) {
 
-	/* Increment table reference count to prevent the tables from
-	being DROPped just before que_eval_sql(). */
-	dict_stats = dict_stats_open();
+		ut_snprintf(errstr, errstr_sz,
+			    "Unable to delete statistics for table %s.%s: %s. "
+			    "They can be deleted later using "
 
-	if (dict_stats == NULL) {
-		/* stats tables do not exist or have unexpected structure */
-		ret = DB_SUCCESS;
-		goto commit_and_return;
+			    "DELETE FROM %s WHERE "
+			    "database_name = '%s' AND "
+			    "table_name = '%s'; "
+
+			    "DELETE FROM %s WHERE "
+			    "database_name = '%s' AND "
+			    "table_name = '%s';",
+
+			    db_utf8, table_utf8,
+			    ut_strerr(ret),
+
+			    INDEX_STATS_NAME_PRINT,
+			    db_utf8, table_utf8,
+
+			    TABLE_STATS_NAME_PRINT,
+			    db_utf8, table_utf8);
 	}
 
-	ut_snprintf(database_name, sizeof(database_name), "%.*s",
-		    (int) dict_get_db_name_len(table_name),
-		    table_name);
+	return(ret);
+}
+/* @} */
+
+/*********************************************************************//**
+Executes
+UPDATE mysql.innodb_table_stats SET
+database_name = '...', table_name = '...'
+WHERE database_name = '...' AND table_name = '...';
+Creates its own transaction and commits it.
+dict_stats_rename_in_table_stats() @{
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+dberr_t
+dict_stats_rename_in_table_stats(
+/*=============================*/
+	const char*	old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
+	const char*	old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
+	const char*	new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
+	const char*	new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
+{
+	pars_info_t*	pinfo;
+	dberr_t		ret;
 
-	table_name_strip = dict_remove_db_name(table_name);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_STAT */
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	pinfo = pars_info_create();
 
-	pars_info_add_str_literal(pinfo, "database_name", database_name);
+	pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
+	pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
+	pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
+	pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
+
+	ret = dict_stats_exec_sql(
+		pinfo,
+		"PROCEDURE RENAME_IN_TABLE_STATS () IS\n"
+		"BEGIN\n"
+		"UPDATE \"" TABLE_STATS_NAME "\" SET\n"
+		"database_name = :new_dbname_utf8,\n"
+		"table_name = :new_tablename_utf8\n"
+		"WHERE\n"
+		"database_name = :old_dbname_utf8 AND\n"
+		"table_name = :old_tablename_utf8;\n"
+		"END;\n");
 
-	pars_info_add_str_literal(pinfo, "table_name", table_name_strip);
+	return(ret);
+}
+/* @} */
 
-	ret = que_eval_sql(pinfo,
-			   "PROCEDURE DROP_TABLE_STATS () IS\n"
-			   "BEGIN\n"
+/*********************************************************************//**
+Executes
+UPDATE mysql.innodb_index_stats SET
+database_name = '...', table_name = '...'
+WHERE database_name = '...' AND table_name = '...';
+Creates its own transaction and commits it.
+dict_stats_rename_in_index_stats() @{
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+dberr_t
+dict_stats_rename_in_index_stats(
+/*=============================*/
+	const char*	old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
+	const char*	old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
+	const char*	new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
+	const char*	new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
+{
+	pars_info_t*	pinfo;
+	dberr_t		ret;
 
-			   "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
-			   "database_name = :database_name AND\n"
-			   "table_name = :table_name;\n"
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_STAT */
+	ut_ad(mutex_own(&dict_sys->mutex));
 
-			   "DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
-			   "database_name = :database_name AND\n"
-			   "table_name = :table_name;\n"
+	pinfo = pars_info_create();
 
-			   "END;\n",
-			   TRUE,
-			   trx);
+	pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
+	pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
+	pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
+	pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
+
+	ret = dict_stats_exec_sql(
+		pinfo,
+		"PROCEDURE RENAME_IN_INDEX_STATS () IS\n"
+		"BEGIN\n"
+		"UPDATE \"" INDEX_STATS_NAME "\" SET\n"
+		"database_name = :new_dbname_utf8,\n"
+		"table_name = :new_tablename_utf8\n"
+		"WHERE\n"
+		"database_name = :old_dbname_utf8 AND\n"
+		"table_name = :old_tablename_utf8;\n"
+		"END;\n");
 
-	/* pinfo is freed by que_eval_sql() */
+	return(ret);
+}
+/* @} */
 
-	if (ret != DB_SUCCESS) {
+/*********************************************************************//**
+Renames a table in InnoDB persistent stats storage.
+This function creates its own transaction and commits it.
+dict_stats_rename_table() @{
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_stats_rename_table(
+/*====================*/
+	const char*	old_name,	/*!< in: old name, e.g. 'db/table' */
+	const char*	new_name,	/*!< in: new name, e.g. 'db/table' */
+	char*		errstr,		/*!< out: error string if != DB_SUCCESS
+					is returned */
+	size_t		errstr_sz)	/*!< in: errstr size */
+{
+	char		old_db_utf8[MAX_DB_UTF8_LEN];
+	char		new_db_utf8[MAX_DB_UTF8_LEN];
+	char		old_table_utf8[MAX_TABLE_UTF8_LEN];
+	char		new_table_utf8[MAX_TABLE_UTF8_LEN];
+	dberr_t		ret;
 
-		ut_snprintf(errstr, errstr_sz,
-			    "Unable to delete statistics for table %s.%s "
-			    "from %s or %s%s. "
-			    "They can be deleted later using "
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_STAT */
+	ut_ad(!mutex_own(&dict_sys->mutex));
 
-			    "DELETE FROM %s WHERE "
-			    "database_name = '%s' AND "
-			    "table_name = '%s'; "
+	/* skip innodb_table_stats and innodb_index_stats themselves */
+	if (strcmp(old_name, TABLE_STATS_NAME) == 0
+	    || strcmp(old_name, INDEX_STATS_NAME) == 0
+	    || strcmp(new_name, TABLE_STATS_NAME) == 0
+	    || strcmp(new_name, INDEX_STATS_NAME) == 0) {
 
-			    "DELETE FROM %s WHERE "
-			    "database_name = '%s' AND "
-			    "table_name = '%s';",
+		return(DB_SUCCESS);
+	}
 
-			    database_name, table_name_strip,
-			    TABLE_STATS_NAME_PRINT, INDEX_STATS_NAME_PRINT,
+	dict_fs2utf8(old_name, old_db_utf8, sizeof(old_db_utf8),
+		     old_table_utf8, sizeof(old_table_utf8));
 
-			    (ret == DB_LOCK_WAIT_TIMEOUT
-			     ? " because the rows are locked"
-			     : ""),
+	dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8),
+		     new_table_utf8, sizeof(new_table_utf8));
 
-			    INDEX_STATS_NAME_PRINT,
-			    database_name, table_name_strip,
+	rw_lock_x_lock(&dict_operation_lock);
+	mutex_enter(&dict_sys->mutex);
+
+	ulint	n_attempts = 0;
+	do {
+		n_attempts++;
+
+		ret = dict_stats_rename_in_table_stats(
+			old_db_utf8, old_table_utf8,
+			new_db_utf8, new_table_utf8);
+
+		if (ret == DB_DUPLICATE_KEY) {
+			dict_stats_delete_from_table_stats(
+				new_db_utf8, new_table_utf8);
+		}
+
+		if (ret == DB_STATS_DO_NOT_EXIST) {
+			ret = DB_SUCCESS;
+		}
+
+		if (ret != DB_SUCCESS) {
+			mutex_exit(&dict_sys->mutex);
+			rw_lock_x_unlock(&dict_operation_lock);
+			os_thread_sleep(200000 /* 0.2 sec */);
+			rw_lock_x_lock(&dict_operation_lock);
+			mutex_enter(&dict_sys->mutex);
+		}
+	} while ((ret == DB_DEADLOCK
+		  || ret == DB_DUPLICATE_KEY
+		  || ret == DB_LOCK_WAIT_TIMEOUT)
+		 && n_attempts < 5);
+
+	if (ret != DB_SUCCESS) {
+		ut_snprintf(errstr, errstr_sz,
+			    "Unable to rename statistics from "
+			    "%s.%s to %s.%s in %s: %s. "
+			    "They can be renamed later using "
+
+			    "UPDATE %s SET "
+			    "database_name = '%s', "
+			    "table_name = '%s' "
+			    "WHERE "
+			    "database_name = '%s' AND "
+			    "table_name = '%s';",
 
+			    old_db_utf8, old_table_utf8,
+			    new_db_utf8, new_table_utf8,
 			    TABLE_STATS_NAME_PRINT,
-			    database_name, table_name_strip);
+			    ut_strerr(ret),
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: %s\n", errstr);
+			    TABLE_STATS_NAME_PRINT,
+			    new_db_utf8, new_table_utf8,
+			    old_db_utf8, old_table_utf8);
+		mutex_exit(&dict_sys->mutex);
+		rw_lock_x_unlock(&dict_operation_lock);
+		return(ret);
 	}
+	/* else */
 
-	dict_stats_close(dict_stats);
+	n_attempts = 0;
+	do {
+		n_attempts++;
 
-commit_and_return:
+		ret = dict_stats_rename_in_index_stats(
+			old_db_utf8, old_table_utf8,
+			new_db_utf8, new_table_utf8);
 
-	trx_commit_for_mysql(trx);
+		if (ret == DB_DUPLICATE_KEY) {
+			dict_stats_delete_from_index_stats(
+				new_db_utf8, new_table_utf8);
+		}
 
-	trx_free_for_background(trx);
+		if (ret == DB_STATS_DO_NOT_EXIST) {
+			ret = DB_SUCCESS;
+		}
+
+		if (ret != DB_SUCCESS) {
+			mutex_exit(&dict_sys->mutex);
+			rw_lock_x_unlock(&dict_operation_lock);
+			os_thread_sleep(200000 /* 0.2 sec */);
+			rw_lock_x_lock(&dict_operation_lock);
+			mutex_enter(&dict_sys->mutex);
+		}
+	} while ((ret == DB_DEADLOCK
+		  || ret == DB_DUPLICATE_KEY
+		  || ret == DB_LOCK_WAIT_TIMEOUT)
+		 && n_attempts < 5);
+
+	mutex_exit(&dict_sys->mutex);
+	rw_lock_x_unlock(&dict_operation_lock);
+
+	if (ret != DB_SUCCESS) {
+		ut_snprintf(errstr, errstr_sz,
+			    "Unable to rename statistics from "
+			    "%s.%s to %s.%s in %s: %s. "
+			    "They can be renamed later using "
+
+			    "UPDATE %s SET "
+			    "database_name = '%s', "
+			    "table_name = '%s' "
+			    "WHERE "
+			    "database_name = '%s' AND "
+			    "table_name = '%s';",
+
+			    old_db_utf8, old_table_utf8,
+			    new_db_utf8, new_table_utf8,
+			    INDEX_STATS_NAME_PRINT,
+			    ut_strerr(ret),
+
+			    INDEX_STATS_NAME_PRINT,
+			    new_db_utf8, new_table_utf8,
+			    old_db_utf8, old_table_utf8);
+	}
 
 	return(ret);
 }
@@ -2933,13 +3824,13 @@ test_dict_stats_save()
 	dict_table_t	table;
 	dict_index_t	index1;
 	dict_field_t	index1_fields[1];
-	ib_uint64_t	index1_stat_n_diff_key_vals[2];
-	ib_uint64_t	index1_stat_n_sample_sizes[2];
+	ib_uint64_t	index1_stat_n_diff_key_vals[1];
+	ib_uint64_t	index1_stat_n_sample_sizes[1];
 	dict_index_t	index2;
 	dict_field_t	index2_fields[4];
-	ib_uint64_t	index2_stat_n_diff_key_vals[5];
-	ib_uint64_t	index2_stat_n_sample_sizes[5];
-	enum db_err	ret;
+	ib_uint64_t	index2_stat_n_diff_key_vals[4];
+	ib_uint64_t	index2_stat_n_sample_sizes[4];
+	dberr_t		ret;
 
 	/* craft a dummy dict_table_t */
 	table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
@@ -2949,16 +3840,11 @@ test_dict_stats_save()
 	UT_LIST_INIT(table.indexes);
 	UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
 	UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
-#ifdef UNIV_DEBUG
-	table.magic_n = DICT_TABLE_MAGIC_N;
-#endif /* UNIV_DEBUG */
+	ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
+	ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
 
 	index1.name = TEST_IDX1_NAME;
 	index1.table = &table;
-#ifdef UNIV_DEBUG
-	index1.magic_n = DICT_INDEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
-	index1.to_be_dropped = 0;
 	index1.cached = 1;
 	index1.n_uniq = 1;
 	index1.fields = index1_fields;
@@ -2967,17 +3853,12 @@ test_dict_stats_save()
 	index1.stat_index_size = TEST_IDX1_INDEX_SIZE;
 	index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES;
 	index1_fields[0].name = TEST_IDX1_COL1_NAME;
-	index1_stat_n_diff_key_vals[0] = 1; /* dummy */
-	index1_stat_n_diff_key_vals[1] = TEST_IDX1_N_DIFF1;
-	index1_stat_n_sample_sizes[0] = 0; /* dummy */
-	index1_stat_n_sample_sizes[1] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
+	index1_stat_n_diff_key_vals[0] = TEST_IDX1_N_DIFF1;
+	index1_stat_n_sample_sizes[0] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
 
+	ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
 	index2.name = TEST_IDX2_NAME;
 	index2.table = &table;
-#ifdef UNIV_DEBUG
-	index2.magic_n = DICT_INDEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
-	index2.to_be_dropped = 0;
 	index2.cached = 1;
 	index2.n_uniq = 4;
 	index2.fields = index2_fields;
@@ -2989,18 +3870,16 @@ test_dict_stats_save()
 	index2_fields[1].name = TEST_IDX2_COL2_NAME;
 	index2_fields[2].name = TEST_IDX2_COL3_NAME;
 	index2_fields[3].name = TEST_IDX2_COL4_NAME;
-	index2_stat_n_diff_key_vals[0] = 1; /* dummy */
-	index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF1;
-	index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF2;
-	index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF3;
-	index2_stat_n_diff_key_vals[4] = TEST_IDX2_N_DIFF4;
-	index2_stat_n_sample_sizes[0] = 0; /* dummy */
-	index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
-	index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
-	index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
-	index2_stat_n_sample_sizes[4] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
-
-	ret = dict_stats_save(&table, FALSE);
+	index2_stat_n_diff_key_vals[0] = TEST_IDX2_N_DIFF1;
+	index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF2;
+	index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF3;
+	index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF4;
+	index2_stat_n_sample_sizes[0] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
+	index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
+	index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
+	index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
+
+	ret = dict_stats_save(&table);
 
 	ut_a(ret == DB_SUCCESS);
 
@@ -3098,41 +3977,35 @@ test_dict_stats_fetch_from_ps()
 {
 	dict_table_t	table;
 	dict_index_t	index1;
-	ib_uint64_t	index1_stat_n_diff_key_vals[2];
-	ib_uint64_t	index1_stat_n_sample_sizes[2];
+	ib_uint64_t	index1_stat_n_diff_key_vals[1];
+	ib_uint64_t	index1_stat_n_sample_sizes[1];
 	dict_index_t	index2;
-	ib_uint64_t	index2_stat_n_diff_key_vals[5];
-	ib_uint64_t	index2_stat_n_sample_sizes[5];
-	enum db_err	ret;
+	ib_uint64_t	index2_stat_n_diff_key_vals[4];
+	ib_uint64_t	index2_stat_n_sample_sizes[4];
+	dberr_t		ret;
 
 	/* craft a dummy dict_table_t */
 	table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
 	UT_LIST_INIT(table.indexes);
 	UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
 	UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
-#ifdef UNIV_DEBUG
-	table.magic_n = DICT_TABLE_MAGIC_N;
-#endif /* UNIV_DEBUG */
+	ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
 
 	index1.name = TEST_IDX1_NAME;
-#ifdef UNIV_DEBUG
-	index1.magic_n = DICT_INDEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
+	ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
 	index1.cached = 1;
 	index1.n_uniq = 1;
 	index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
 	index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
 
 	index2.name = TEST_IDX2_NAME;
-#ifdef UNIV_DEBUG
-	index2.magic_n = DICT_INDEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
+	ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
 	index2.cached = 1;
 	index2.n_uniq = 4;
 	index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
 	index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
 
-	ret = dict_stats_fetch_from_ps(&table, FALSE);
+	ret = dict_stats_fetch_from_ps(&table);
 
 	ut_a(ret == DB_SUCCESS);
 
@@ -3143,19 +4016,19 @@ test_dict_stats_fetch_from_ps()
 
 	ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE);
 	ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES);
-	ut_a(index1_stat_n_diff_key_vals[1] == TEST_IDX1_N_DIFF1);
-	ut_a(index1_stat_n_sample_sizes[1] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
+	ut_a(index1_stat_n_diff_key_vals[0] == TEST_IDX1_N_DIFF1);
+	ut_a(index1_stat_n_sample_sizes[0] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
 
 	ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE);
 	ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES);
-	ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF1);
-	ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
-	ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF2);
-	ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
-	ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF3);
-	ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
-	ut_a(index2_stat_n_diff_key_vals[4] == TEST_IDX2_N_DIFF4);
-	ut_a(index2_stat_n_sample_sizes[4] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
+	ut_a(index2_stat_n_diff_key_vals[0] == TEST_IDX2_N_DIFF1);
+	ut_a(index2_stat_n_sample_sizes[0] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
+	ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF2);
+	ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
+	ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF3);
+	ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
+	ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF4);
+	ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
 
 	printf("OK: fetch successful\n");
 }
diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc
new file mode 100644
index 00000000000..7a30b748e7f
--- /dev/null
+++ b/storage/innobase/dict/dict0stats_bg.cc
@@ -0,0 +1,392 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0stats_bg.cc
+Code used for background table and index stats gathering.
+
+Created Apr 25, 2012 Vasil Dimov
+*******************************************************/
+
+#include "row0mysql.h"
+#include "srv0start.h"
+#include "dict0stats.h"
+#include "dict0stats_bg.h"
+
+#include <vector>
+
+/** Minimum time interval between stats recalc for a given table */
+#define MIN_RECALC_INTERVAL	10 /* seconds */
+
+#define SHUTTING_DOWN()		(srv_shutdown_state != SRV_SHUTDOWN_NONE)
+
+/** Event to wake up the stats thread */
+UNIV_INTERN os_event_t		dict_stats_event = NULL;
+
+/** This mutex protects the "recalc_pool" variable. */
+static ib_mutex_t		recalc_pool_mutex;
+#ifdef HAVE_PSI_INTERFACE
+static mysql_pfs_key_t		recalc_pool_mutex_key;
+#endif /* HAVE_PSI_INTERFACE */
+
+/** The number of tables that can be added to "recalc_pool" before
+it is enlarged */
+static const ulint RECALC_POOL_INITIAL_SLOTS = 128;
+
+/** The multitude of tables whose stats are to be automatically
+recalculated - an STL vector */
+typedef std::vector<table_id_t>	recalc_pool_t;
+static recalc_pool_t		recalc_pool;
+
+typedef recalc_pool_t::iterator	recalc_pool_iterator_t;
+
+/*****************************************************************//**
+Initialize the recalc pool, called once during thread initialization. */
+static
+void
+dict_stats_recalc_pool_init()
+/*=========================*/
+{
+	ut_ad(!srv_read_only_mode);
+
+	recalc_pool.reserve(RECALC_POOL_INITIAL_SLOTS);
+}
+
+/*****************************************************************//**
+Free the resources occupied by the recalc pool, called once during
+thread de-initialization. */
+static
+void
+dict_stats_recalc_pool_deinit()
+/*===========================*/
+{
+	ut_ad(!srv_read_only_mode);
+
+	recalc_pool.clear();
+}
+
+/*****************************************************************//**
+Add a table to the recalc pool, which is processed by the
+background stats gathering thread. Only the table id is added to the
+list, so the table can be closed after being enqueued and it will be
+opened when needed. If the table does not exist later (has been DROPped),
+then it will be removed from the pool and skipped.
+dict_stats_recalc_pool_add() @{ */
+UNIV_INTERN
+void
+dict_stats_recalc_pool_add(
+/*=======================*/
+	const dict_table_t*	table)	/*!< in: table to add */
+{
+	ut_ad(!srv_read_only_mode);
+
+	mutex_enter(&recalc_pool_mutex);
+
+	/* quit if already in the list */
+	for (recalc_pool_iterator_t iter = recalc_pool.begin();
+	     iter != recalc_pool.end();
+	     ++iter) {
+
+		if (*iter == table->id) {
+			mutex_exit(&recalc_pool_mutex);
+			return;
+		}
+	}
+
+	recalc_pool.push_back(table->id);
+
+	mutex_exit(&recalc_pool_mutex);
+
+	os_event_set(dict_stats_event);
+}
+/* @} */
+
+/*****************************************************************//**
+Get a table from the auto recalc pool. The returned table id is removed
+from the pool.
+dict_stats_recalc_pool_get() @{
+@return true if the pool was non-empty and "id" was set, false otherwise */
+static
+bool
+dict_stats_recalc_pool_get(
+/*=======================*/
+	table_id_t*	id)	/*!< out: table id, or unmodified if list is
+				empty */
+{
+	ut_ad(!srv_read_only_mode);
+
+	mutex_enter(&recalc_pool_mutex);
+
+	if (recalc_pool.empty()) {
+		mutex_exit(&recalc_pool_mutex);
+		return(false);
+	}
+
+	*id = recalc_pool[0];
+
+	recalc_pool.erase(recalc_pool.begin());
+
+	mutex_exit(&recalc_pool_mutex);
+
+	return(true);
+}
+/* @} */
+
+/*****************************************************************//**
+Delete a given table from the auto recalc pool.
+dict_stats_recalc_pool_del() */
+UNIV_INTERN
+void
+dict_stats_recalc_pool_del(
+/*=======================*/
+	const dict_table_t*	table)	/*!< in: table to remove */
+{
+	ut_ad(!srv_read_only_mode);
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	mutex_enter(&recalc_pool_mutex);
+
+	ut_ad(table->id > 0);
+
+	for (recalc_pool_iterator_t iter = recalc_pool.begin();
+	     iter != recalc_pool.end();
+	     ++iter) {
+
+		if (*iter == table->id) {
+			/* erase() invalidates the iterator */
+			recalc_pool.erase(iter);
+			break;
+		}
+	}
+
+	mutex_exit(&recalc_pool_mutex);
+}
+
+/*****************************************************************//**
+Wait until background stats thread has stopped using the specified table(s).
+The caller must have locked the data dictionary using
+row_mysql_lock_data_dictionary() and this function may unlock it temporarily
+and restore the lock before it exits.
+The background stats thead is guaranteed not to start using the specified
+tables after this function returns and before the caller unlocks the data
+dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
+under dict_sys->mutex.
+dict_stats_wait_bg_to_stop_using_table() @{ */
+UNIV_INTERN
+void
+dict_stats_wait_bg_to_stop_using_tables(
+/*====================================*/
+	dict_table_t*	table1,	/*!< in/out: table1 */
+	dict_table_t*	table2,	/*!< in/out: table2, could be NULL */
+	trx_t*		trx)	/*!< in/out: transaction to use for
+				unlocking/locking the data dict */
+{
+	ut_ad(!srv_read_only_mode);
+
+	while ((table1->stats_bg_flag & BG_STAT_IN_PROGRESS)
+	       || (table2 != NULL
+		   && (table2->stats_bg_flag & BG_STAT_IN_PROGRESS))) {
+
+		table1->stats_bg_flag |= BG_STAT_SHOULD_QUIT;
+		if (table2 != NULL) {
+			table2->stats_bg_flag |= BG_STAT_SHOULD_QUIT;
+		}
+
+		row_mysql_unlock_data_dictionary(trx);
+		os_thread_sleep(250000);
+		row_mysql_lock_data_dictionary(trx);
+	}
+}
+/* @} */
+
+/*****************************************************************//**
+Initialize global variables needed for the operation of dict_stats_thread()
+Must be called before dict_stats_thread() is started.
+dict_stats_thread_init() @{ */
+UNIV_INTERN
+void
+dict_stats_thread_init()
+/*====================*/
+{
+	ut_a(!srv_read_only_mode);
+
+	dict_stats_event = os_event_create();
+
+	/* The recalc_pool_mutex is acquired from:
+	1) the background stats gathering thread before any other latch
+	   and released without latching anything else in between (thus
+	   any level would do here)
+	2) from row_update_statistics_if_needed()
+	   and released without latching anything else in between. We know
+	   that dict_sys->mutex (SYNC_DICT) is not acquired when
+	   row_update_statistics_if_needed() is called and it may be acquired
+	   inside that function (thus a level <=SYNC_DICT would do).
+	3) from row_drop_table_for_mysql() after dict_sys->mutex (SYNC_DICT)
+	   and dict_operation_lock (SYNC_DICT_OPERATION) have been locked
+	   (thus a level <SYNC_DICT && <SYNC_DICT_OPERATION would do)
+	So we choose SYNC_STATS_AUTO_RECALC to be about below SYNC_DICT. */
+	mutex_create(recalc_pool_mutex_key, &recalc_pool_mutex,
+		     SYNC_STATS_AUTO_RECALC);
+
+	dict_stats_recalc_pool_init();
+}
+/* @} */
+
+/*****************************************************************//**
+Free resources allocated by dict_stats_thread_init(), must be called
+after dict_stats_thread() has exited.
+dict_stats_thread_deinit() @{ */
+UNIV_INTERN
+void
+dict_stats_thread_deinit()
+/*======================*/
+{
+	ut_a(!srv_read_only_mode);
+	ut_ad(!srv_dict_stats_thread_active);
+
+	dict_stats_recalc_pool_deinit();
+
+	mutex_free(&recalc_pool_mutex);
+	memset(&recalc_pool_mutex, 0x0, sizeof(recalc_pool_mutex));
+
+	os_event_free(dict_stats_event);
+	dict_stats_event = NULL;
+}
+/* @} */
+
+/*****************************************************************//**
+Get the first table that has been added for auto recalc and eventually
+update its stats.
+dict_stats_process_entry_from_recalc_pool() @{ */
+static
+void
+dict_stats_process_entry_from_recalc_pool()
+/*=======================================*/
+{
+	table_id_t	table_id;
+
+	ut_ad(!srv_read_only_mode);
+
+	/* pop the first table from the auto recalc pool */
+	if (!dict_stats_recalc_pool_get(&table_id)) {
+		/* no tables for auto recalc */
+		return;
+	}
+
+	dict_table_t*	table;
+
+	mutex_enter(&dict_sys->mutex);
+
+	table = dict_table_open_on_id(table_id, TRUE, FALSE);
+
+	if (table == NULL) {
+		/* table does not exist, must have been DROPped
+		after its id was enqueued */
+		mutex_exit(&dict_sys->mutex);
+		return;
+	}
+
+	/* Check whether table is corrupted */
+	if (table->corrupted) {
+		dict_table_close(table, TRUE, FALSE);
+		mutex_exit(&dict_sys->mutex);
+		return;
+	}
+
+	table->stats_bg_flag = BG_STAT_IN_PROGRESS;
+
+	mutex_exit(&dict_sys->mutex);
+
+	/* ut_time() could be expensive, the current function
+	is called once every time a table has been changed more than 10% and
+	on a system with lots of small tables, this could become hot. If we
+	find out that this is a problem, then the check below could eventually
+	be replaced with something else, though a time interval is the natural
+	approach. */
+
+	if (ut_difftime(ut_time(), table->stats_last_recalc)
+	    < MIN_RECALC_INTERVAL) {
+
+		/* Stats were (re)calculated not long ago. To avoid
+		too frequent stats updates we put back the table on
+		the auto recalc list and do nothing. */
+
+		dict_stats_recalc_pool_add(table);
+
+	} else {
+
+		dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT);
+	}
+
+	mutex_enter(&dict_sys->mutex);
+
+	table->stats_bg_flag = BG_STAT_NONE;
+
+	dict_table_close(table, TRUE, FALSE);
+
+	mutex_exit(&dict_sys->mutex);
+}
+/* @} */
+
+/*****************************************************************//**
+This is the thread for background stats gathering. It pops tables, from
+the auto recalc list and proceeds them, eventually recalculating their
+statistics.
+dict_stats_thread() @{
+@return this function does not return, it calls os_thread_exit() */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(dict_stats_thread)(
+/*==============================*/
+	void*	arg __attribute__((unused)))	/*!< in: a dummy parameter
+						required by os_thread_create */
+{
+	ut_a(!srv_read_only_mode);
+
+	srv_dict_stats_thread_active = TRUE;
+
+	while (!SHUTTING_DOWN()) {
+
+		/* Wake up periodically even if not signaled. This is
+		because we may lose an event - if the below call to
+		dict_stats_process_entry_from_recalc_pool() puts the entry back
+		in the list, the os_event_set() will be lost by the subsequent
+		os_event_reset(). */
+		os_event_wait_time(
+			dict_stats_event, MIN_RECALC_INTERVAL * 1000000);
+
+		if (SHUTTING_DOWN()) {
+			break;
+		}
+
+		dict_stats_process_entry_from_recalc_pool();
+
+		os_event_reset(dict_stats_event);
+	}
+
+	srv_dict_stats_thread_active = FALSE;
+
+	/* We count the number of threads in os_thread_exit(). A created
+	thread should always use that to exit instead of return(). */
+	os_thread_exit(NULL);
+
+	OS_THREAD_DUMMY_RETURN;
+}
+/* @} */
+
+/* vim: set foldmethod=marker foldmarker=@{,@}: */
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 2e6835fe0c0..a89875352c6 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,6 +25,9 @@ Created 10/25/1995 Heikki Tuuri
 
 #include "fil0fil.h"
 
+#include <debug_sync.h>
+#include <my_dbug.h>
+
 #include "mem0mem.h"
 #include "hash0hash.h"
 #include "os0file.h"
@@ -41,7 +44,7 @@ Created 10/25/1995 Heikki Tuuri
 #include "page0page.h"
 #include "page0zip.h"
 #include "trx0sys.h"
-#include "buf0rea.h"
+#include "row0mysql.h"
 #ifndef UNIV_HOTBACKUP
 # include "buf0lru.h"
 # include "ibuf0ibuf.h"
@@ -138,7 +141,7 @@ UNIV_INTERN mysql_pfs_key_t	fil_space_latch_key;
 #endif /* UNIV_PFS_RWLOCK */
 
 /** File node of a tablespace or the log data space */
-struct fil_node_struct {
+struct fil_node_t {
 	fil_space_t*	space;	/*!< backpointer to the space where this node
 				belongs */
 	char*		name;	/*!< path to the file */
@@ -172,11 +175,11 @@ struct fil_node_struct {
 	ulint		magic_n;/*!< FIL_NODE_MAGIC_N */
 };
 
-/** Value of fil_node_struct::magic_n */
+/** Value of fil_node_t::magic_n */
 #define	FIL_NODE_MAGIC_N	89389
 
 /** Tablespace or log data space: let us call them by a common name space */
-struct fil_space_struct {
+struct fil_space_t {
 	char*		name;	/*!< space name = the path to the first file in
 				it */
 	ulint		id;	/*!< space id */
@@ -215,7 +218,8 @@ struct fil_space_struct {
 				last incomplete megabytes in data files may be
 				ignored if space == 0 */
 	ulint		flags;	/*!< tablespace flags; see
-				fsp_flags_validate(), fsp_flags_get_zip_size() */
+				fsp_flags_is_valid(),
+				fsp_flags_get_zip_size() */
 	ulint		n_reserved_extents;
 				/*!< number of reserved free extents for
 				ongoing operations like B-tree page split */
@@ -238,26 +242,23 @@ struct fil_space_struct {
 	UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
 				/*!< list of spaces with at least one unflushed
 				file we have written to */
-	ibool		is_in_unflushed_spaces; /*!< TRUE if this space is
-				currently in unflushed_spaces */
+	bool		is_in_unflushed_spaces;
+				/*!< true if this space is currently in
+				unflushed_spaces */
 	UT_LIST_NODE_T(fil_space_t) space_list;
 				/*!< list of all spaces */
 	ulint		magic_n;/*!< FIL_SPACE_MAGIC_N */
 };
 
-/** Value of fil_space_struct::magic_n */
+/** Value of fil_space_t::magic_n */
 #define	FIL_SPACE_MAGIC_N	89472
 
-/** The tablespace memory cache */
-typedef	struct fil_system_struct	fil_system_t;
-
 /** The tablespace memory cache; also the totality of logs (the log
 data space) is stored here; below we talk about tablespaces, but also
 the ib_logfiles form a 'space' and it is handled here */
-
-struct fil_system_struct {
+struct fil_system_t {
 #ifndef UNIV_HOTBACKUP
-	mutex_t		mutex;		/*!< The mutex protecting the cache */
+	ib_mutex_t		mutex;		/*!< The mutex protecting the cache */
 #endif /* !UNIV_HOTBACKUP */
 	hash_table_t*	spaces;		/*!< The hash table of spaces in the
 					system; they are hashed on the space
@@ -313,7 +314,17 @@ initialized. */
 static fil_system_t*	fil_system	= NULL;
 
 /** Determine if (i) is a user tablespace id or not. */
-# define fil_is_user_tablespace_id(i) ((i) > srv_undo_tablespaces)
+# define fil_is_user_tablespace_id(i) ((i) > srv_undo_tablespaces_open)
+
+/** Determine if user has explicitly disabled fsync(). */
+#ifndef __WIN__
+# define fil_buffering_disabled(s)	\
+	((s)->purpose == FIL_TABLESPACE	\
+	 && srv_unix_file_flush_method	\
+	 == SRV_UNIX_O_DIRECT_NO_FSYNC)
+#else /* __WIN__ */
+# define fil_buffering_disabled(s)	(0)
+#endif /* __WIN__ */
 
 #ifdef UNIV_DEBUG
 /** Try fil_validate() every this many times */
@@ -384,16 +395,6 @@ fil_node_complete_io(
 				the node as modified if
 				type == OS_FILE_WRITE */
 /*******************************************************************//**
-Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache.
-@return	space id, ULINT_UNDEFINED if not found */
-static
-ulint
-fil_get_space_id_for_table(
-/*=======================*/
-	const char*	name);	/*!< in: table name in the standard
-				'databasename/tablename' format */
-/*******************************************************************//**
 Frees a space object from the tablespace memory cache. Closes the files in
 the chain but does not delete them. There must not be any pending i/o's or
 flushes on the files.
@@ -412,7 +413,7 @@ calculating the byte offset within a space.
 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
 i/o on a tablespace which does not exist */
 UNIV_INLINE
-ulint
+dberr_t
 fil_read(
 /*=====*/
 	ibool	sync,		/*!< in: TRUE if synchronous aio is desired */
@@ -441,7 +442,7 @@ calculating the byte offset within a space.
 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
 i/o on a tablespace which does not exist */
 UNIV_INLINE
-ulint
+dberr_t
 fil_write(
 /*======*/
 	ibool	sync,		/*!< in: TRUE if synchronous aio is desired */
@@ -459,6 +460,8 @@ fil_write(
 	void*	message)	/*!< in: message for aio handler if non-sync
 				aio used, else ignored */
 {
+	ut_ad(!srv_read_only_mode);
+
 	return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
 					   byte_offset, len, buf, message));
 }
@@ -592,9 +595,9 @@ fil_space_get_type(
 /**********************************************************************//**
 Checks if all the file nodes in a space are flushed. The caller must hold
 the fil_system mutex.
-@return	TRUE if all are flushed */
+@return	true if all are flushed */
 static
-ibool
+bool
 fil_space_is_flushed(
 /*=================*/
 	fil_space_t*	space)	/*!< in: space */
@@ -608,19 +611,21 @@ fil_space_is_flushed(
 	while (node) {
 		if (node->modification_counter > node->flush_counter) {
 
-			return(FALSE);
+			ut_ad(!fil_buffering_disabled(space));
+			return(false);
 		}
 
 		node = UT_LIST_GET_NEXT(chain, node);
 	}
 
-	return(TRUE);
+	return(true);
 }
 
 /*******************************************************************//**
-Appends a new file to the chain of files of a space. File must be closed. */
+Appends a new file to the chain of files of a space. File must be closed.
+@return pointer to the file name, or NULL on error */
 UNIV_INTERN
-void
+char*
 fil_node_create(
 /*============*/
 	const char*	name,	/*!< in: file name (file must be closed) */
@@ -663,7 +668,7 @@ fil_node_create(
 
 		mutex_exit(&fil_system->mutex);
 
-		return;
+		return(NULL);
 	}
 
 	space->size += size;
@@ -678,6 +683,8 @@ fil_node_create(
 	}
 
 	mutex_exit(&fil_system->mutex);
+
+	return(node->name);
 }
 
 /********************************************************************//**
@@ -718,7 +725,7 @@ fil_node_open_file(
 			OS_FILE_READ_ONLY, &success);
 		if (!success) {
 			/* The following call prints an error message */
-			os_file_get_last_error(TRUE);
+			os_file_get_last_error(true);
 
 			ut_print_timestamp(stderr);
 
@@ -798,9 +805,9 @@ fil_node_open_file(
 				  != page_size)) {
 			fprintf(stderr,
 				"InnoDB: Error: tablespace file %s"
-				" has page size %lx\n"
+				" has page size 0x%lx\n"
 				"InnoDB: but the data dictionary"
-				" expects page size %lx!\n",
+				" expects page size 0x%lx!\n",
 				node->name, flags,
 				fsp_flags_get_page_size(space->flags));
 
@@ -809,9 +816,9 @@ fil_node_open_file(
 
 		if (UNIV_UNLIKELY(space->flags != flags)) {
 			fprintf(stderr,
-				"InnoDB: Error: table flags are %lx"
+				"InnoDB: Error: table flags are 0x%lx"
 				" in the data dictionary\n"
-				"InnoDB: but the flags in file %s are %lx!\n",
+				"InnoDB: but the flags in file %s are 0x%lx!\n",
 				space->flags, node->name, flags);
 
 			ut_error;
@@ -971,6 +978,7 @@ fil_try_to_close_file_in_LRU(
 				", because mod_count %ld != fl_count %ld\n",
 				(long) node->modification_counter,
 				(long) node->flush_counter);
+
 		}
 
 		if (node->being_extended) {
@@ -1143,10 +1151,15 @@ fil_node_free(
 
 		node->modification_counter = node->flush_counter;
 
-		if (space->is_in_unflushed_spaces
-		    && fil_space_is_flushed(space)) {
+		if (fil_buffering_disabled(space)) {
+
+			ut_ad(!space->is_in_unflushed_spaces);
+			ut_ad(fil_space_is_flushed(space));
 
-			space->is_in_unflushed_spaces = FALSE;
+		} else if (space->is_in_unflushed_spaces
+			   && fil_space_is_flushed(space)) {
+
+			space->is_in_unflushed_spaces = false;
 
 			UT_LIST_REMOVE(unflushed_spaces,
 				       system->unflushed_spaces,
@@ -1215,82 +1228,50 @@ fil_space_create(
 {
 	fil_space_t*	space;
 
-	fsp_flags_validate(flags);
-
-try_again:
-	/*printf(
-	"InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name,
-	purpose);*/
+	DBUG_EXECUTE_IF("fil_space_create_failure", return(false););
 
 	ut_a(fil_system);
-	ut_a(name);
+	ut_a(fsp_flags_is_valid(flags));
 
-	mutex_enter(&fil_system->mutex);
+	/* Look for a matching tablespace and if found free it. */
+	do {
+		mutex_enter(&fil_system->mutex);
 
-	space = fil_space_get_by_name(name);
+		space = fil_space_get_by_name(name);
 
-	if (UNIV_LIKELY_NULL(space)) {
-		ibool	success;
-		ulint	namesake_id;
+		if (space != 0) {
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Tablespace '%s' exists in the cache "
+				"with id %lu", name, (ulong) id);
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Warning: trying to init to the"
-			" tablespace memory cache\n"
-			"InnoDB: a tablespace %lu of name ", (ulong) id);
-		ut_print_filename(stderr, name);
-		fprintf(stderr, ",\n"
-			"InnoDB: but a tablespace %lu of the same name\n"
-			"InnoDB: already exists in the"
-			" tablespace memory cache!\n",
-			(ulong) space->id);
+			if (id == 0 || purpose != FIL_TABLESPACE) {
 
-		if (id == 0 || purpose != FIL_TABLESPACE) {
+				mutex_exit(&fil_system->mutex);
 
-			mutex_exit(&fil_system->mutex);
+				return(FALSE);
+			}
 
-			return(FALSE);
-		}
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Freeing existing tablespace '%s' entry "
+				"from the cache with id %lu",
+				name, (ulong) id);
 
-		fprintf(stderr,
-			"InnoDB: We assume that InnoDB did a crash recovery,"
-			" and you had\n"
-			"InnoDB: an .ibd file for which the table"
-			" did not exist in the\n"
-			"InnoDB: InnoDB internal data dictionary in the"
-			" ibdata files.\n"
-			"InnoDB: We assume that you later removed the"
-			" .ibd and .frm files,\n"
-			"InnoDB: and are now trying to recreate the table."
-			" We now remove the\n"
-			"InnoDB: conflicting tablespace object"
-			" from the memory cache and try\n"
-			"InnoDB: the init again.\n");
-
-		namesake_id = space->id;
-
-		success = fil_space_free(namesake_id, FALSE);
-		ut_a(success);
+			ibool	success = fil_space_free(space->id, FALSE);
+			ut_a(success);
 
-		mutex_exit(&fil_system->mutex);
+			mutex_exit(&fil_system->mutex);
+		}
 
-		goto try_again;
-	}
+	} while (space != 0);
 
 	space = fil_space_get_by_id(id);
 
-	if (UNIV_LIKELY_NULL(space)) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to add tablespace %lu"
-			" of name ", (ulong) id);
-		ut_print_filename(stderr, name);
-		fprintf(stderr, "\n"
-			"InnoDB: to the tablespace memory cache,"
-			" but tablespace\n"
-			"InnoDB: %lu of name ", (ulong) space->id);
-		ut_print_filename(stderr, space->name);
-		fputs(" already exists in the tablespace\n"
-		      "InnoDB: memory cache!\n", stderr);
+	if (space != 0) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Trying to add tablespace '%s' with id %lu "
+			"to the tablespace memory cache, but tablespace '%s' "
+			"with id %lu already exists in the cache!",
+			name, (ulong) id, space->name, (ulong) space->id);
 
 		mutex_exit(&fil_system->mutex);
 
@@ -1306,15 +1287,15 @@ try_again:
 	space->tablespace_version = fil_system->tablespace_version;
 	space->mark = FALSE;
 
-	if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on)
-	    && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) {
+	if (purpose == FIL_TABLESPACE && !recv_recovery_on
+	    && id > fil_system->max_assigned_id) {
+
 		if (!fil_system->space_id_reuse_warned) {
 			fil_system->space_id_reuse_warned = TRUE;
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Warning: allocated tablespace %lu,"
-				" old maximum was %lu\n",
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Allocated tablespace %lu, old maximum "
+				"was %lu",
 				(ulong) id,
 				(ulong) fil_system->max_assigned_id);
 		}
@@ -1333,7 +1314,7 @@ try_again:
 
 	HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
 		    ut_fold_string(name), space);
-	space->is_in_unflushed_spaces = FALSE;
+	space->is_in_unflushed_spaces = false;
 
 	UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
 
@@ -1418,7 +1399,6 @@ fil_space_free(
 {
 	fil_space_t*	space;
 	fil_space_t*	fnamespace;
-	fil_node_t*	fil_node;
 
 	ut_ad(mutex_own(&fil_system->mutex));
 
@@ -1444,7 +1424,9 @@ fil_space_free(
 		    ut_fold_string(space->name), space);
 
 	if (space->is_in_unflushed_spaces) {
-		space->is_in_unflushed_spaces = FALSE;
+
+		ut_ad(!fil_buffering_disabled(space));
+		space->is_in_unflushed_spaces = false;
 
 		UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces,
 			       space);
@@ -1455,12 +1437,11 @@ fil_space_free(
 	ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
 	ut_a(0 == space->n_pending_flushes);
 
-	fil_node = UT_LIST_GET_FIRST(space->chain);
+	for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain);
+	     fil_node != NULL;
+	     fil_node = UT_LIST_GET_FIRST(space->chain)) {
 
-	while (fil_node != NULL) {
 		fil_node_free(fil_node, fil_system, space);
-
-		fil_node = UT_LIST_GET_FIRST(space->chain);
 	}
 
 	ut_a(0 == UT_LIST_GET_LEN(space->chain));
@@ -1478,34 +1459,30 @@ fil_space_free(
 }
 
 /*******************************************************************//**
-Returns the size of the space in pages. The tablespace must be cached in the
-memory cache.
-@return	space size, 0 if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_size(
-/*===============*/
+Returns a pointer to the file_space_t that is in the memory cache
+associated with a space id. The caller must lock fil_system->mutex.
+@return	file_space_t pointer, NULL if space not found */
+UNIV_INLINE
+fil_space_t*
+fil_space_get_space(
+/*================*/
 	ulint	id)	/*!< in: space id */
 {
-	fil_node_t*	node;
 	fil_space_t*	space;
-	ulint		size;
+	fil_node_t*	node;
 
 	ut_ad(fil_system);
 
-	fil_mutex_enter_and_prepare_for_io(id);
-
 	space = fil_space_get_by_id(id);
-
 	if (space == NULL) {
-		mutex_exit(&fil_system->mutex);
-
-		return(0);
+		return(NULL);
 	}
 
 	if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
 		ut_a(id != 0);
 
+		/* The following code must change when InnoDB supports
+		multiple datafiles per tablespace. */
 		ut_a(1 == UT_LIST_GET_LEN(space->chain));
 
 		node = UT_LIST_GET_FIRST(space->chain);
@@ -1518,7 +1495,69 @@ fil_space_get_size(
 		fil_node_complete_io(node, fil_system, OS_FILE_READ);
 	}
 
-	size = space->size;
+	return(space);
+}
+
+/*******************************************************************//**
+Returns the path from the first fil_node_t found for the space ID sent.
+The caller is responsible for freeing the memory allocated here for the
+value returned.
+@return	own: A copy of fil_node_t::path, NULL if space ID is zero
+or not found. */
+UNIV_INTERN
+char*
+fil_space_get_first_path(
+/*=====================*/
+	ulint		id)	/*!< in: space id */
+{
+	fil_space_t*	space;
+	fil_node_t*	node;
+	char*		path;
+
+	ut_ad(fil_system);
+	ut_a(id);
+
+	fil_mutex_enter_and_prepare_for_io(id);
+
+	space = fil_space_get_space(id);
+
+	if (space == NULL) {
+		mutex_exit(&fil_system->mutex);
+
+		return(NULL);
+	}
+
+	ut_ad(mutex_own(&fil_system->mutex));
+
+	node = UT_LIST_GET_FIRST(space->chain);
+
+	path = mem_strdup(node->name);
+
+	mutex_exit(&fil_system->mutex);
+
+	return(path);
+}
+
+/*******************************************************************//**
+Returns the size of the space in pages. The tablespace must be cached in the
+memory cache.
+@return	space size, 0 if space not found */
+UNIV_INTERN
+ulint
+fil_space_get_size(
+/*===============*/
+	ulint	id)	/*!< in: space id */
+{
+	fil_space_t*	space;
+	ulint		size;
+
+	ut_ad(fil_system);
+
+	fil_mutex_enter_and_prepare_for_io(id);
+
+	space = fil_space_get_space(id);
+
+	size = space ? space->size : 0;
 
 	mutex_exit(&fil_system->mutex);
 
@@ -1535,19 +1574,18 @@ fil_space_get_flags(
 /*================*/
 	ulint	id)	/*!< in: space id */
 {
-	fil_node_t*	node;
 	fil_space_t*	space;
 	ulint		flags;
 
 	ut_ad(fil_system);
 
-	if (UNIV_UNLIKELY(!id)) {
+	if (!id) {
 		return(0);
 	}
 
 	fil_mutex_enter_and_prepare_for_io(id);
 
-	space = fil_space_get_by_id(id);
+	space = fil_space_get_space(id);
 
 	if (space == NULL) {
 		mutex_exit(&fil_system->mutex);
@@ -1555,21 +1593,6 @@ fil_space_get_flags(
 		return(ULINT_UNDEFINED);
 	}
 
-	if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
-		ut_a(id != 0);
-
-		ut_a(1 == UT_LIST_GET_LEN(space->chain));
-
-		node = UT_LIST_GET_FIRST(space->chain);
-
-		/* It must be a single-table tablespace and we have not opened
-		the file yet; the following calls will open it and update the
-		size fields */
-
-		fil_node_prepare_for_io(node, fil_system, space);
-		fil_node_complete_io(node, fil_system, OS_FILE_READ);
-	}
-
 	flags = space->flags;
 
 	mutex_exit(&fil_system->mutex);
@@ -1744,6 +1767,49 @@ fil_close_all_files(void)
 }
 
 /*******************************************************************//**
+Closes the redo log files. There must not be any pending i/o's or not
+flushed modifications in the files. */
+UNIV_INTERN
+void
+fil_close_log_files(
+/*================*/
+	bool	free)	/*!< in: whether to free the memory object */
+{
+	fil_space_t*	space;
+
+	mutex_enter(&fil_system->mutex);
+
+	space = UT_LIST_GET_FIRST(fil_system->space_list);
+
+	while (space != NULL) {
+		fil_node_t*	node;
+		fil_space_t*	prev_space = space;
+
+		if (space->purpose != FIL_LOG) {
+			space = UT_LIST_GET_NEXT(space_list, space);
+			continue;
+		}
+
+		for (node = UT_LIST_GET_FIRST(space->chain);
+		     node != NULL;
+		     node = UT_LIST_GET_NEXT(chain, node)) {
+
+			if (node->open) {
+				fil_node_close_file(node, fil_system);
+			}
+		}
+
+		space = UT_LIST_GET_NEXT(space_list, space);
+
+		if (free) {
+			fil_space_free(prev_space->id, FALSE);
+		}
+	}
+
+	mutex_exit(&fil_system->mutex);
+}
+
+/*******************************************************************//**
 Sets the max tablespace id counter if the given number is bigger than the
 previous value. */
 UNIV_INTERN
@@ -1773,8 +1839,8 @@ fil_set_max_space_id_if_bigger(
 Writes the flushed lsn and the latest archived log number to the page header
 of the first page of a data file of the system tablespace (space 0),
 which is uncompressed. */
-static
-ulint
+static __attribute__((warn_unused_result))
+dberr_t
 fil_write_lsn_and_arch_no_to_file(
 /*==============================*/
 	ulint	space,		/*!< in: space to write to */
@@ -1786,19 +1852,23 @@ fil_write_lsn_and_arch_no_to_file(
 {
 	byte*	buf1;
 	byte*	buf;
+	dberr_t	err;
 
 	buf1 = static_cast<byte*>(mem_alloc(2 * UNIV_PAGE_SIZE));
 	buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE));
 
-	fil_read(TRUE, space, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
-
-	mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
+	err = fil_read(TRUE, space, 0, sum_of_sizes, 0,
+		       UNIV_PAGE_SIZE, buf, NULL);
+	if (err == DB_SUCCESS) {
+		mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
 
-	fil_write(TRUE, space, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
+		err = fil_write(TRUE, space, 0, sum_of_sizes, 0,
+				UNIV_PAGE_SIZE, buf, NULL);
+	}
 
 	mem_free(buf1);
 
-	return(DB_SUCCESS);
+	return(err);
 }
 
 /****************************************************************//**
@@ -1806,7 +1876,7 @@ Writes the flushed lsn and the latest archived log number to the page
 header of the first page of each data file in the system tablespace.
 @return	DB_SUCCESS or error number */
 UNIV_INTERN
-ulint
+dberr_t
 fil_write_flushed_lsn_to_data_files(
 /*================================*/
 	lsn_t	lsn,		/*!< in: lsn to write */
@@ -1814,7 +1884,7 @@ fil_write_flushed_lsn_to_data_files(
 {
 	fil_space_t*	space;
 	fil_node_t*	node;
-	ulint		err;
+	dberr_t		err;
 
 	mutex_enter(&fil_system->mutex);
 
@@ -1830,7 +1900,6 @@ fil_write_flushed_lsn_to_data_files(
 
 		if (space->purpose == FIL_TABLESPACE
 		    && !fil_is_user_tablespace_id(space->id)) {
-
 			ulint	sum_of_sizes = 0;
 
 			for (node = UT_LIST_GET_FIRST(space->chain);
@@ -1872,6 +1941,7 @@ fil_read_first_page(
 						parameters below already
 						contain sensible data */
 	ulint*		flags,			/*!< out: tablespace flags */
+	ulint*		space_id,		/*!< out: tablespace ID */
 #ifdef UNIV_LOG_ARCHIVE
 	ulint*		min_arch_log_no,	/*!< out: min of archived
 						log numbers in data files */
@@ -1897,7 +1967,9 @@ fil_read_first_page(
 
 	*flags = fsp_header_get_flags(page);
 
-	flushed_lsn = mach_read_from_8(page+ FIL_PAGE_FILE_FLUSH_LSN);
+	*space_id = fsp_header_get_space_id(page);
+
+	flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
 
 	ut_free(buf);
 
@@ -2102,6 +2174,12 @@ created does not exist, then we create the directory, too.
 
 Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
 datadir that we should use in replaying the file operations.
+
+InnoDB recovery does not replay these fully since it always sets the space id
+to zero. But ibbackup does replay them.  TODO: If remote tablespaces are used,
+ibbackup will only create tables in the default directory since MLOG_FILE_CREATE
+and MLOG_FILE_CREATE2 only know the tablename, not the path.
+
 @return end of log record, or NULL if the record was not completely
 contained between ptr and end_ptr */
 UNIV_INTERN
@@ -2197,7 +2275,9 @@ fil_op_log_parse_or_replay(
 	switch (type) {
 	case MLOG_FILE_DELETE:
 		if (fil_tablespace_exists_in_mem(space_id)) {
-			ut_a(fil_delete_tablespace(space_id));
+			dberr_t	err = fil_delete_tablespace(
+				space_id, BUF_REMOVE_FLUSH_NO_WRITE);
+			ut_a(err == DB_SUCCESS);
 		}
 
 		break;
@@ -2218,10 +2298,10 @@ fil_op_log_parse_or_replay(
 
 			if (fil_get_space_id_for_table(new_name)
 			    == ULINT_UNDEFINED) {
-				/* We do not care of the old name, that is
-				why we pass NULL as the first argument */
+				/* We do not care about the old name, that
+				is why we pass NULL as the first argument. */
 				if (!fil_rename_tablespace(NULL, space_id,
-							   new_name)) {
+							   new_name, NULL)) {
 					ut_error;
 				}
 			}
@@ -2239,12 +2319,14 @@ fil_op_log_parse_or_replay(
 		} else if (log_flags & MLOG_FILE_FLAG_TEMP) {
 			/* Temporary table, do nothing */
 		} else {
+			const char*	path = NULL;
+
 			/* Create the database directory for name, if it does
 			not exist yet */
 			fil_create_directory_for_tablename(name);
 
 			if (fil_create_new_single_table_tablespace(
-				    space_id, name, FALSE, flags,
+				    space_id, name, path, flags,
 				    DICT_TF2_USE_TABLESPACE,
 				    FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
 				ut_error;
@@ -2261,118 +2343,271 @@ fil_op_log_parse_or_replay(
 }
 
 /*******************************************************************//**
-Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-fil_delete_tablespace(
-/*==================*/
-	ulint	id)	/*!< in: space id */
+Allocates a file name for the EXPORT/IMPORT config file name.  The
+string must be freed by caller with mem_free().
+@return own: file name */
+static
+char*
+fil_make_cfg_name(
+/*==============*/
+	const char*	filepath)	/*!< in: .ibd file name */
 {
-	ibool		success;
-	fil_space_t*	space;
-	fil_node_t*	node;
-	ulint		count		= 0;
-	char*		path;
+	char*	cfg_name;
 
-	ut_a(id != 0);
-stop_new_ops:
-	mutex_enter(&fil_system->mutex);
+	/* Create a temporary file path by replacing the .ibd suffix
+	with .cfg. */
 
-	space = fil_space_get_by_id(id);
+	ut_ad(strlen(filepath) > 4);
 
-	if (space != NULL) {
-		space->stop_new_ops = TRUE;
+	cfg_name = mem_strdup(filepath);
+	ut_snprintf(cfg_name + strlen(cfg_name) - 3, 4, "cfg");
+	return(cfg_name);
+}
 
-		if (space->n_pending_ops == 0) {
-			mutex_exit(&fil_system->mutex);
+/*******************************************************************//**
+Check for change buffer merges.
+@return 0 if no merges else count + 1. */
+static
+ulint
+fil_ibuf_check_pending_ops(
+/*=======================*/
+	fil_space_t*	space,	/*!< in/out: Tablespace to check */
+	ulint		count)	/*!< in: number of attempts so far */
+{
+	ut_ad(mutex_own(&fil_system->mutex));
 
-			count = 0;
+	if (space != 0 && space->n_pending_ops != 0) {
 
-			goto try_again;
-		} else {
-			if (count > 5000) {
-				ut_print_timestamp(stderr);
-				fputs("  InnoDB: Warning: trying to"
-				      " delete tablespace ", stderr);
-				ut_print_filename(stderr, space->name);
-				fprintf(stderr, ",\n"
-					"InnoDB: but there are %lu pending"
-					" operations (most likely ibuf merges)"
-					" on it.\n"
-					"InnoDB: Loop %lu.\n",
-					(ulong) space->n_pending_ops,
-					(ulong) count);
-			}
+		if (count > 5000) {
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Trying to close/delete tablespace "
+				"'%s' but there are %lu pending change "
+				"buffer merges on it.",
+				space->name,
+				(ulong) space->n_pending_ops);
+		}
 
-			mutex_exit(&fil_system->mutex);
+		return(count + 1);
+	}
 
-			os_thread_sleep(20000);
-			count++;
+	return(0);
+}
+
+/*******************************************************************//**
+Check for pending IO.
+@return 0 if no pending else count + 1. */
+static
+ulint
+fil_check_pending_io(
+/*=================*/
+	fil_space_t*	space,	/*!< in/out: Tablespace to check */
+	fil_node_t**	node,	/*!< out: Node in space list */
+	ulint		count)	/*!< in: number of attempts so far */
+{
+	ut_ad(mutex_own(&fil_system->mutex));
+	ut_a(space->n_pending_ops == 0);
+
+	/* The following code must change when InnoDB supports
+	multiple datafiles per tablespace. */
+	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+
+	*node = UT_LIST_GET_FIRST(space->chain);
+
+	if (space->n_pending_flushes > 0 || (*node)->n_pending > 0) {
+
+		ut_a(!(*node)->being_extended);
 
-			goto stop_new_ops;
+		if (count > 1000) {
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Trying to close/delete tablespace '%s' "
+				"but there are %lu flushes "
+				" and %lu pending i/o's on it.",
+				space->name,
+				(ulong) space->n_pending_flushes,
+				(ulong) (*node)->n_pending);
 		}
+
+		return(count + 1);
 	}
 
-	mutex_exit(&fil_system->mutex);
-	count = 0;
+	return(0);
+}
+
+/*******************************************************************//**
+Check pending operations on a tablespace.
+@return DB_SUCCESS or error failure. */
+static
+dberr_t
+fil_check_pending_operations(
+/*=========================*/
+	ulint		id,	/*!< in: space id */
+	fil_space_t**	space,	/*!< out: tablespace instance in memory */
+	char**		path)	/*!< out/own: tablespace path */
+{
+	ulint		count = 0;
+
+	ut_a(id != TRX_SYS_SPACE);
+	ut_ad(space);
+
+	*space = 0;
 
-try_again:
 	mutex_enter(&fil_system->mutex);
+	fil_space_t* sp = fil_space_get_by_id(id);
+	if (sp) {
+		sp->stop_new_ops = TRUE;
+	}
+	mutex_exit(&fil_system->mutex);
 
-	space = fil_space_get_by_id(id);
+	/* Check for pending change buffer merges. */
 
-	if (space == NULL) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Error: cannot delete tablespace %lu\n"
-			"InnoDB: because it is not found in the"
-			" tablespace memory cache.\n",
-			(ulong) id);
+	do {
+		mutex_enter(&fil_system->mutex);
+
+		sp = fil_space_get_by_id(id);
+
+		count = fil_ibuf_check_pending_ops(sp, count);
 
 		mutex_exit(&fil_system->mutex);
 
-		return(FALSE);
-	}
+		if (count > 0) {
+			os_thread_sleep(20000);
+		}
 
-	ut_a(space->stop_new_ops);
-	ut_a(space->n_pending_ops == 0);
+	} while (count > 0);
 
-	/* TODO: The following code must change when InnoDB supports
-	multiple datafiles per tablespace. */
-	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+	/* Check for pending IO. */
 
-	node = UT_LIST_GET_FIRST(space->chain);
+	*path = 0;
 
-	if (space->n_pending_flushes > 0 || node->n_pending > 0
-	    || node->being_extended) {
-		if (count > 1000) {
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: Warning: trying to"
-			      " delete tablespace ", stderr);
-			ut_print_filename(stderr, space->name);
-			fprintf(stderr, ",\n"
-				"InnoDB: but there are %lu flushes"
-				" and %lu pending i/o's on it\n"
-				"InnoDB: Or it is being extended\n"
-				"InnoDB: Loop %lu.\n",
-				(ulong) space->n_pending_flushes,
-				(ulong) node->n_pending,
-				(ulong) count);
+	do {
+		mutex_enter(&fil_system->mutex);
+
+		sp = fil_space_get_by_id(id);
+
+		if (sp == NULL) {
+			mutex_exit(&fil_system->mutex);
+			return(DB_TABLESPACE_NOT_FOUND);
+		}
+
+		fil_node_t*	node;
+
+		count = fil_check_pending_io(sp, &node, count);
+
+		if (count == 0) {
+			*path = mem_strdup(node->name);
 		}
+
 		mutex_exit(&fil_system->mutex);
-		os_thread_sleep(20000);
 
-		count++;
+		if (count > 0) {
+			os_thread_sleep(20000);
+		}
+
+	} while (count > 0);
+
+	ut_ad(sp);
+
+	*space = sp;
+	return(DB_SUCCESS);
+}
+
+/*******************************************************************//**
+Closes a single-table tablespace. The tablespace must be cached in the
+memory cache. Free all pages used by the tablespace.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+dberr_t
+fil_close_tablespace(
+/*=================*/
+	trx_t*		trx,	/*!< in/out: Transaction covering the close */
+	ulint		id)	/*!< in: space id */
+{
+	char*		path = 0;
+	fil_space_t*	space = 0;
+
+	ut_a(id != TRX_SYS_SPACE);
+
+	dberr_t		err = fil_check_pending_operations(id, &space, &path);
 
-		goto try_again;
+	if (err != DB_SUCCESS) {
+		return(err);
 	}
 
-	path = mem_strdup(node->name);
+	ut_a(space);
+	ut_a(path != 0);
+
+	rw_lock_x_lock(&space->latch);
+
+#ifndef UNIV_HOTBACKUP
+	/* Invalidate in the buffer pool all pages belonging to the
+	tablespace. Since we have set space->stop_new_ops = TRUE, readahead
+	or ibuf merge can no longer read more pages of this tablespace to the
+	buffer pool. Thus we can clean the tablespace out of the buffer pool
+	completely and permanently. The flag stop_new_ops also prevents
+	fil_flush() from being applied to this tablespace. */
+
+	buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_FLUSH_WRITE, trx);
+#endif
+	mutex_enter(&fil_system->mutex);
+
+	/* If the free is successful, the X lock will be released before
+	the space memory data structure is freed. */
+
+	if (!fil_space_free(id, TRUE)) {
+		rw_lock_x_unlock(&space->latch);
+		err = DB_TABLESPACE_NOT_FOUND;
+	} else {
+		err = DB_SUCCESS;
+	}
 
 	mutex_exit(&fil_system->mutex);
 
+	/* If it is a delete then also delete any generated files, otherwise
+	when we drop the database the remove directory will fail. */
+
+	char*	cfg_name = fil_make_cfg_name(path);
+
+	os_file_delete_if_exists(cfg_name);
+
+	mem_free(path);
+	mem_free(cfg_name);
+
+	return(err);
+}
+
+/*******************************************************************//**
+Deletes a single-table tablespace. The tablespace must be cached in the
+memory cache.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+dberr_t
+fil_delete_tablespace(
+/*==================*/
+	ulint		id,		/*!< in: space id */
+	buf_remove_t	buf_remove)	/*!< in: specify the action to take
+					on the tables pages in the buffer
+					pool */
+{
+	char*		path = 0;
+	fil_space_t*	space = 0;
+
+	ut_a(id != TRX_SYS_SPACE);
+
+	dberr_t		err = fil_check_pending_operations(id, &space, &path);
+
+	if (err != DB_SUCCESS) {
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Cannot delete tablespace %lu because it is not "
+			"found in the tablespace memory cache.",
+			(ulong) id);
+
+		return(err);
+	}
+
+	ut_a(space);
+	ut_a(path != 0);
+
 	/* Important: We rely on the data dictionary mutex to ensure
 	that a race is not possible here. It should serialize the tablespace
 	drop/free. We acquire an X latch only to avoid a race condition
@@ -2407,9 +2642,22 @@ try_again:
 	To deal with potential read requests by checking the
 	::stop_new_ops flag in fil_io() */
 
-	buf_LRU_invalidate_tablespace(id);
-#endif
-	/* printf("Deleting tablespace %s id %lu\n", space->name, id); */
+	buf_LRU_flush_or_remove_pages(id, buf_remove, 0);
+
+#endif /* !UNIV_HOTBACKUP */
+
+	/* If it is a delete then also delete any generated files, otherwise
+	when we drop the database the remove directory will fail. */
+	{
+		char*	cfg_name = fil_make_cfg_name(path);
+		os_file_delete_if_exists(cfg_name);
+		mem_free(cfg_name);
+	}
+
+	/* Delete the link file pointing to the ibd file we are deleting. */
+	if (FSP_FLAGS_HAS_DATA_DIR(space->flags)) {
+		fil_delete_link_file(space->name);
+	}
 
 	mutex_enter(&fil_system->mutex);
 
@@ -2418,25 +2666,27 @@ try_again:
 	if (fil_space_get_by_id(id)) {
 		ut_a(space->n_pending_ops == 0);
 		ut_a(UT_LIST_GET_LEN(space->chain) == 1);
-		node = UT_LIST_GET_FIRST(space->chain);
+		fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
 		ut_a(node->n_pending == 0);
 	}
 
-	success = fil_space_free(id, TRUE);
+	if (!fil_space_free(id, TRUE)) {
+		err = DB_TABLESPACE_NOT_FOUND;
+	}
 
 	mutex_exit(&fil_system->mutex);
 
-	if (success) {
-		success = os_file_delete(path);
-
-		if (!success) {
-			success = os_file_delete_if_exists(path);
-		}
-	} else {
+	if (err != DB_SUCCESS) {
 		rw_lock_x_unlock(&space->latch);
+	} else if (!os_file_delete(path) && !os_file_delete_if_exists(path)) {
+
+		/* Note: This is because we have removed the
+		tablespace instance from the cache. */
+
+		err = DB_IO_ERROR;
 	}
 
-	if (success) {
+	if (err == DB_SUCCESS) {
 #ifndef UNIV_HOTBACKUP
 		/* Write a log record about the deletion of the .ibd
 		file, so that ibbackup can replay it in the
@@ -2451,14 +2701,12 @@ try_again:
 		fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr);
 		mtr_commit(&mtr);
 #endif
-		mem_free(path);
-
-		return(TRUE);
+		err = DB_SUCCESS;
 	}
 
 	mem_free(path);
 
-	return(FALSE);
+	return(err);
 }
 
 /*******************************************************************//**
@@ -2490,36 +2738,49 @@ fil_tablespace_is_being_deleted(
 /*******************************************************************//**
 Discards a single-table tablespace. The tablespace must be cached in the
 memory cache. Discarding is like deleting a tablespace, but
-1) we do not drop the table from the data dictionary;
-2) we remove all insert buffer entries for the tablespace immediately; in DROP
-TABLE they are only removed gradually in the background;
-3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
-as it originally had.
-@return	TRUE if success */
+
+ 1. We do not drop the table from the data dictionary;
+
+ 2. We remove all insert buffer entries for the tablespace immediately;
+    in DROP TABLE they are only removed gradually in the background;
+
+ 3. Free all the pages in use by the tablespace.
+@return	DB_SUCCESS or error */
 UNIV_INTERN
-ibool
+dberr_t
 fil_discard_tablespace(
 /*===================*/
 	ulint	id)	/*!< in: space id */
 {
-	ibool	success;
+	dberr_t	err;
 
-	success = fil_delete_tablespace(id);
+	switch (err = fil_delete_tablespace(id, BUF_REMOVE_ALL_NO_WRITE)) {
+	case DB_SUCCESS:
+		break;
 
-	if (!success) {
-		fprintf(stderr,
-			"InnoDB: Warning: cannot delete tablespace %lu"
-			" in DISCARD TABLESPACE.\n"
-			"InnoDB: But let us remove the"
-			" insert buffer entries for this tablespace.\n",
-			(ulong) id);
+	case DB_IO_ERROR:
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"While deleting tablespace %lu in DISCARD TABLESPACE."
+			" File rename/delete failed: %s",
+			(ulong) id, ut_strerr(err));
+		break;
+
+	case DB_TABLESPACE_NOT_FOUND:
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Cannot delete tablespace %lu in DISCARD "
+			"TABLESPACE. %s",
+			(ulong) id, ut_strerr(err));
+		break;
+
+	default:
+		ut_error;
 	}
 
 	/* Remove all insert buffer entries for the tablespace */
 
 	ibuf_delete_for_discarded_space(id);
 
-	return(success);
+	return(err);
 }
 #endif /* !UNIV_HOTBACKUP */
 
@@ -2575,30 +2836,27 @@ fil_rename_tablespace_in_mem(
 Allocates a file name for a single-table tablespace. The string must be freed
 by caller with mem_free().
 @return	own: file name */
-static
+UNIV_INTERN
 char*
 fil_make_ibd_name(
 /*==============*/
-	const char*	name,		/*!< in: table name or a dir path of a
-					TEMPORARY table */
-	ibool		is_temp)	/*!< in: TRUE if it is a dir path */
+	const char*	name,		/*!< in: table name or a dir path */
+	bool		is_full_path)	/*!< in: TRUE if it is a dir path */
 {
 	char*	filename;
 	ulint	namelen		= strlen(name);
 	ulint	dirlen		= strlen(fil_path_to_mysql_datadir);
+	ulint	pathlen		= dirlen + namelen + sizeof "/.ibd";
 
-	filename = static_cast<char*>(
-		mem_alloc(namelen + dirlen + sizeof "/.ibd"));
+	filename = static_cast<char*>(mem_alloc(pathlen));
 
-	if (is_temp) {
+	if (is_full_path) {
 		memcpy(filename, name, namelen);
 		memcpy(filename + namelen, ".ibd", sizeof ".ibd");
 	} else {
-		memcpy(filename, fil_path_to_mysql_datadir, dirlen);
-		filename[dirlen] = '/';
+		ut_snprintf(filename, pathlen, "%s/%s.ibd",
+			fil_path_to_mysql_datadir, name);
 
-		memcpy(filename + dirlen + 1, name, namelen);
-		memcpy(filename + dirlen + namelen + 1, ".ibd", sizeof ".ibd");
 	}
 
 	srv_normalize_path_for_win(filename);
@@ -2607,6 +2865,31 @@ fil_make_ibd_name(
 }
 
 /*******************************************************************//**
+Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
+The string must be freed by caller with mem_free().
+@return	own: file name */
+UNIV_INTERN
+char*
+fil_make_isl_name(
+/*==============*/
+	const char*	name)	/*!< in: table name */
+{
+	char*	filename;
+	ulint	namelen		= strlen(name);
+	ulint	dirlen		= strlen(fil_path_to_mysql_datadir);
+	ulint	pathlen		= dirlen + namelen + sizeof "/.isl";
+
+	filename = static_cast<char*>(mem_alloc(pathlen));
+
+	ut_snprintf(filename, pathlen, "%s/%s.isl",
+		fil_path_to_mysql_datadir, name);
+
+	srv_normalize_path_for_win(filename);
+
+	return(filename);
+}
+
+/*******************************************************************//**
 Renames a single-table tablespace. The tablespace must be cached in the
 tablespace memory cache.
 @return	TRUE if success */
@@ -2614,14 +2897,19 @@ UNIV_INTERN
 ibool
 fil_rename_tablespace(
 /*==================*/
-	const char*	old_name_in,	/*!< in: old table name in the standard
-					databasename/tablename format of
-					InnoDB, or NULL if we do the rename
-					based on the space id only */
+	const char*	old_name_in,	/*!< in: old table name in the
+					standard databasename/tablename
+					format of InnoDB, or NULL if we
+					do the rename based on the space
+					id only */
 	ulint		id,		/*!< in: space id */
-	const char*	new_name)	/*!< in: new table name in the standard
-					databasename/tablename format
-					of InnoDB */
+	const char*	new_name,	/*!< in: new table name in the
+					standard databasename/tablename
+					format of InnoDB */
+	const char*	new_path_in)	/*!< in: new full datafile path
+					if the tablespace is remotely
+					located, or NULL if it is located
+					in the normal data directory. */
 {
 	ibool		success;
 	fil_space_t*	space;
@@ -2651,14 +2939,14 @@ retry:
 
 	space = fil_space_get_by_id(id);
 
+	DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1", space = NULL; );
+
 	if (space == NULL) {
-		fprintf(stderr,
-			"InnoDB: Error: cannot find space id %lu"
-			" in the tablespace memory cache\n"
-			"InnoDB: though the table ", (ulong) id);
-		ut_print_filename(stderr,
-				  old_name_in ? old_name_in : not_given);
-		fputs(" in a rename operation should have that id\n", stderr);
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Cannot find space id %lu in the tablespace "
+			"memory cache, though the table '%s' in a "
+			"rename operation should have that id.",
+			(ulong) id, old_name_in ? old_name_in : not_given);
 		mutex_exit(&fil_system->mutex);
 
 		return(FALSE);
@@ -2677,10 +2965,13 @@ retry:
 
 	space->stop_ios = TRUE;
 
+	/* The following code must change when InnoDB supports
+	multiple datafiles per tablespace. */
 	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
 	node = UT_LIST_GET_FIRST(space->chain);
 
-	if (node->n_pending > 0 || node->n_pending_flushes > 0
+	if (node->n_pending > 0
+	    || node->n_pending_flushes > 0
 	    || node->being_extended) {
 		/* There are pending i/o's or flushes or the file is
 		currently being extended, sleep for a while and
@@ -2713,24 +3004,31 @@ retry:
 
 	if (old_name_in) {
 		old_name = mem_strdup(old_name_in);
-		old_path = fil_make_ibd_name(old_name, FALSE);
-
 		ut_a(strcmp(space->name, old_name) == 0);
-		ut_a(strcmp(node->name, old_path) == 0);
 	} else {
 		old_name = mem_strdup(space->name);
-		old_path = mem_strdup(node->name);
 	}
+	old_path = mem_strdup(node->name);
 
 	/* Rename the tablespace and the node in the memory cache */
-	new_path = fil_make_ibd_name(new_name, FALSE);
+	new_path = new_path_in ? mem_strdup(new_path_in)
+		: fil_make_ibd_name(new_name, false);
+
 	success = fil_rename_tablespace_in_mem(
 		space, node, new_name, new_path);
 
 	if (success) {
+
+		DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
+			goto skip_second_rename; );
+
 		success = os_file_rename(
 			innodb_file_data_key, old_path, new_path);
 
+		DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
+skip_second_rename:
+			success = FALSE; );
+
 		if (!success) {
 			/* We have to revert the changes we made
 			to the tablespace memory cache */
@@ -2745,7 +3043,7 @@ retry:
 	mutex_exit(&fil_system->mutex);
 
 #ifndef UNIV_HOTBACKUP
-	if (success) {
+	if (success && !recv_recovery_on) {
 		mtr_t		mtr;
 
 		mtr_start(&mtr);
@@ -2754,7 +3052,7 @@ retry:
 				 &mtr);
 		mtr_commit(&mtr);
 	}
-#endif
+#endif /* !UNIV_HOTBACKUP */
 
 	mem_free(new_path);
 	mem_free(old_path);
@@ -2764,23 +3062,202 @@ retry:
 }
 
 /*******************************************************************//**
+Creates a new InnoDB Symbolic Link (ISL) file.  It is always created
+under the 'datadir' of MySQL. The datadir is the directory of a
+running mysqld program. We can refer to it by simply using the path '.'.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_create_link_file(
+/*=================*/
+	const char*	tablename,	/*!< in: tablename */
+	const char*	filepath)	/*!< in: pathname of tablespace */
+{
+	os_file_t	file;
+	ibool		success;
+	dberr_t		err = DB_SUCCESS;
+	char*		link_filepath;
+	char*		prev_filepath = fil_read_link_file(tablename);
+
+	ut_ad(!srv_read_only_mode);
+
+	if (prev_filepath) {
+		/* Truncate will call this with an existing
+		link file which contains the same filepath. */
+		if (0 == strcmp(prev_filepath, filepath)) {
+			mem_free(prev_filepath);
+			return(DB_SUCCESS);
+		}
+		mem_free(prev_filepath);
+	}
+
+	link_filepath = fil_make_isl_name(tablename);
+
+	file = os_file_create_simple_no_error_handling(
+		innodb_file_data_key, link_filepath,
+		OS_FILE_CREATE, OS_FILE_READ_WRITE, &success);
+
+	if (!success) {
+		/* The following call will print an error message */
+		ulint	error = os_file_get_last_error(true);
+
+		ut_print_timestamp(stderr);
+		fputs("  InnoDB: Cannot create file ", stderr);
+		ut_print_filename(stderr, link_filepath);
+		fputs(".\n", stderr);
+
+		if (error == OS_FILE_ALREADY_EXISTS) {
+			fputs("InnoDB: The link file: ", stderr);
+			ut_print_filename(stderr, filepath);
+			fputs(" already exists.\n", stderr);
+			err = DB_TABLESPACE_EXISTS;
+
+		} else if (error == OS_FILE_DISK_FULL) {
+			err = DB_OUT_OF_FILE_SPACE;
+
+		} else {
+			err = DB_ERROR;
+		}
+
+		/* file is not open, no need to close it. */
+		mem_free(link_filepath);
+		return(err);
+	}
+
+	if (!os_file_write(link_filepath, file, filepath, 0,
+			    strlen(filepath))) {
+		err = DB_ERROR;
+	}
+
+	/* Close the file, we only need it at startup */
+	os_file_close(file);
+
+	mem_free(link_filepath);
+
+	return(err);
+}
+
+/*******************************************************************//**
+Deletes an InnoDB Symbolic Link (ISL) file. */
+UNIV_INTERN
+void
+fil_delete_link_file(
+/*=================*/
+	const char*	tablename)	/*!< in: name of table */
+{
+	char* link_filepath = fil_make_isl_name(tablename);
+
+	os_file_delete_if_exists(link_filepath);
+
+	mem_free(link_filepath);
+}
+
+/*******************************************************************//**
+Reads an InnoDB Symbolic Link (ISL) file.
+It is always created under the 'datadir' of MySQL.  The name is of the
+form {databasename}/{tablename}. and the isl file is expected to be in a
+'{databasename}' directory called '{tablename}.isl'. The caller must free
+the memory of the null-terminated path returned if it is not null.
+@return	own: filepath found in link file, NULL if not found. */
+UNIV_INTERN
+char*
+fil_read_link_file(
+/*===============*/
+	const char*	name)		/*!< in: tablespace name */
+{
+	char*		filepath = NULL;
+	char*		link_filepath;
+	FILE*		file = NULL;
+
+	/* The .isl file is in the 'normal' tablespace location. */
+	link_filepath = fil_make_isl_name(name);
+
+	file = fopen(link_filepath, "r+b");
+
+	mem_free(link_filepath);
+
+	if (file) {
+		filepath = static_cast<char*>(mem_alloc(OS_FILE_MAX_PATH));
+
+		os_file_read_string(file, filepath, OS_FILE_MAX_PATH);
+		fclose(file);
+
+		if (strlen(filepath)) {
+			/* Trim whitespace from end of filepath */
+			ulint lastch = strlen(filepath) - 1;
+			while (lastch > 4 && filepath[lastch] <= 0x20) {
+				filepath[lastch--] = 0x00;
+			}
+			srv_normalize_path_for_win(filepath);
+		}
+	}
+
+	return(filepath);
+}
+
+/*******************************************************************//**
+Opens a handle to the file linked to in an InnoDB Symbolic Link file.
+@return	TRUE if remote linked tablespace file is found and opened. */
+UNIV_INTERN
+ibool
+fil_open_linked_file(
+/*===============*/
+	const char*	tablename,	/*!< in: database/tablename */
+	char**		remote_filepath,/*!< out: remote filepath */
+	os_file_t*	remote_file)	/*!< out: remote file handle */
+
+{
+	ibool		success;
+
+	*remote_filepath = fil_read_link_file(tablename);
+	if (*remote_filepath == NULL) {
+		return(FALSE);
+	}
+
+	/* The filepath provided is different from what was
+	found in the link file. */
+	*remote_file = os_file_create_simple_no_error_handling(
+		innodb_file_data_key, *remote_filepath,
+		OS_FILE_OPEN, OS_FILE_READ_ONLY,
+		&success);
+
+	if (!success) {
+		char*	link_filepath = fil_make_isl_name(tablename);
+
+		/* The following call prints an error message */
+		os_file_get_last_error(true);
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"A link file was found named '%s' "
+			"but the linked tablespace '%s' "
+			"could not be opened.",
+			link_filepath, *remote_filepath);
+
+		mem_free(link_filepath);
+		mem_free(*remote_filepath);
+		*remote_filepath = NULL;
+	}
+
+	return(success);
+}
+
+/*******************************************************************//**
 Creates a new single-table tablespace to a database directory of MySQL.
 Database directories are under the 'datadir' of MySQL. The datadir is the
 directory of a running mysqld program. We can refer to it by simply the
 path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
 dir of the mysqld server.
+
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fil_create_new_single_table_tablespace(
 /*===================================*/
 	ulint		space_id,	/*!< in: space id */
 	const char*	tablename,	/*!< in: the table name in the usual
 					databasename/tablename format
-					of InnoDB, or a dir path to a temp
-					table */
-	ibool		is_temp,	/*!< in: TRUE if a table created with
-					CREATE TEMPORARY TABLE */
+					of InnoDB */
+	const char*	dir_path,	/*!< in: NULL or a dir path */
 	ulint		flags,		/*!< in: tablespace flags */
 	ulint		flags2,		/*!< in: table flags2 */
 	ulint		size)		/*!< in: the initial size of the
@@ -2789,18 +3266,40 @@ fil_create_new_single_table_tablespace(
 {
 	os_file_t	file;
 	ibool		ret;
-	ulint		err;
+	dberr_t		err;
 	byte*		buf2;
 	byte*		page;
 	char*		path;
 	ibool		success;
+	/* TRUE if a table is created with CREATE TEMPORARY TABLE */
+	bool		is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
+	bool		has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
 
 	ut_a(space_id > 0);
+	ut_ad(!srv_read_only_mode);
 	ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
 	ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
-	fsp_flags_validate(flags);
+	ut_a(fsp_flags_is_valid(flags));
 
-	path = fil_make_ibd_name(tablename, is_temp);
+	if (is_temp) {
+		/* Temporary table filepath */
+		ut_ad(dir_path);
+		path = fil_make_ibd_name(dir_path, true);
+	} else if (has_data_dir) {
+		ut_ad(dir_path);
+		path = os_file_make_remote_pathname(dir_path, tablename, "ibd");
+
+		/* Since this tablespace file will be created in a
+		remote directory, let's create the subdirectories
+		in the path, if they are not there already. */
+		success = os_file_create_subdirs_if_needed(path);
+		if (!success) {
+			err = DB_ERROR;
+			goto error_exit_3;
+		}
+	} else {
+		path = fil_make_ibd_name(tablename, false);
+	}
 
 	file = os_file_create(
 		innodb_file_data_key, path,
@@ -2810,58 +3309,44 @@ fil_create_new_single_table_tablespace(
 		&ret);
 
 	if (ret == FALSE) {
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Error creating file ", stderr);
-		ut_print_filename(stderr, path);
-		fputs(".\n", stderr);
-
 		/* The following call will print an error message */
-
-		err = os_file_get_last_error(TRUE);
-
-		if (err == OS_FILE_ALREADY_EXISTS) {
-			fputs("InnoDB: The file already exists though"
-			      " the corresponding table did not\n"
-			      "InnoDB: exist in the InnoDB data dictionary."
-			      " Have you moved InnoDB\n"
-			      "InnoDB: .ibd files around without using the"
-			      " SQL commands\n"
-			      "InnoDB: DISCARD TABLESPACE and"
-			      " IMPORT TABLESPACE, or did\n"
-			      "InnoDB: mysqld crash in the middle of"
-			      " CREATE TABLE? You can\n"
-			      "InnoDB: resolve the problem by"
-			      " removing the file ", stderr);
-			ut_print_filename(stderr, path);
-			fputs("\n"
-			      "InnoDB: under the 'datadir' of MySQL.\n",
-			      stderr);
-
-			mem_free(path);
-			return(DB_TABLESPACE_ALREADY_EXISTS);
+		ulint	error = os_file_get_last_error(true);
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Cannot create file '%s'\n", path);
+
+		if (error == OS_FILE_ALREADY_EXISTS) {
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"The file '%s' already exists though the "
+				"corresponding table did not exist "
+				"in the InnoDB data dictionary. "
+				"Have you moved InnoDB .ibd files "
+				"around without using the SQL commands "
+				"DISCARD TABLESPACE and IMPORT TABLESPACE, "
+				"or did mysqld crash in the middle of "
+				"CREATE TABLE? "
+				"You can resolve the problem by removing "
+				"the file '%s' under the 'datadir' of MySQL.",
+				path, path);
+
+			err = DB_TABLESPACE_EXISTS;
+			goto error_exit_3;
 		}
 
-		if (err == OS_FILE_DISK_FULL) {
-
-			mem_free(path);
-			return(DB_OUT_OF_FILE_SPACE);
+		if (error == OS_FILE_DISK_FULL) {
+			err = DB_OUT_OF_FILE_SPACE;
+			goto error_exit_3;
 		}
 
-		mem_free(path);
-		return(DB_ERROR);
+		err = DB_ERROR;
+		goto error_exit_3;
 	}
 
 	ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE);
 
 	if (!ret) {
 		err = DB_OUT_OF_FILE_SPACE;
-error_exit:
-		os_file_close(file);
-error_exit2:
-		os_file_delete(path);
-
-		mem_free(path);
-		return(err);
+		goto error_exit_2;
 	}
 
 	/* printf("Creating tablespace %s id %lu\n", path, space_id); */
@@ -2910,356 +3395,486 @@ error_exit2:
 	ut_free(buf2);
 
 	if (!ret) {
-		fputs("InnoDB: Error: could not write the first page"
-		      " to tablespace ", stderr);
-		ut_print_filename(stderr, path);
-		putc('\n', stderr);
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Could not write the first page to tablespace "
+			"'%s'", path);
+
 		err = DB_ERROR;
-		goto error_exit;
+		goto error_exit_2;
 	}
 
 	ret = os_file_flush(file);
 
 	if (!ret) {
-		fputs("InnoDB: Error: file flush of tablespace ", stderr);
-		ut_print_filename(stderr, path);
-		fputs(" failed\n", stderr);
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"File flush of tablespace '%s' failed", path);
 		err = DB_ERROR;
-		goto error_exit;
+		goto error_exit_2;
 	}
 
-	os_file_close(file);
+	if (has_data_dir) {
+		/* Now that the IBD file is created, make the ISL file. */
+		err = fil_create_link_file(tablename, path);
+		if (err != DB_SUCCESS) {
+			goto error_exit_2;
+		}
+	}
 
 	success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE);
-
-	if (!success) {
+	if (!success || !fil_node_create(path, size, space_id, FALSE)) {
 		err = DB_ERROR;
-		goto error_exit2;
+		goto error_exit_1;
 	}
 
-	fil_node_create(path, size, space_id, FALSE);
-
 #ifndef UNIV_HOTBACKUP
 	{
 		mtr_t		mtr;
+		ulint		mlog_file_flag = 0;
+
+		if (is_temp) {
+			mlog_file_flag |= MLOG_FILE_FLAG_TEMP;
+		}
 
 		mtr_start(&mtr);
 
 		fil_op_write_log(flags
 				 ? MLOG_FILE_CREATE2
 				 : MLOG_FILE_CREATE,
-				 space_id,
-				 is_temp ? MLOG_FILE_FLAG_TEMP : 0,
-				 flags,
+				 space_id, mlog_file_flag, flags,
 				 tablename, NULL, &mtr);
 
 		mtr_commit(&mtr);
 	}
 #endif
+	err = DB_SUCCESS;
+
+	/* Error code is set.  Cleanup the various variables used.
+	These labels reflect the order in which variables are assigned or
+	actions are done. */
+error_exit_1:
+	if (has_data_dir && err != DB_SUCCESS) {
+		fil_delete_link_file(tablename);
+	}
+error_exit_2:
+	os_file_close(file);
+	if (err != DB_SUCCESS) {
+		os_file_delete(path);
+	}
+error_exit_3:
 	mem_free(path);
-	return(DB_SUCCESS);
+
+	return(err);
 }
 
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
-It is possible, though very improbable, that the lsn's in the tablespace to be
-imported have risen above the current system lsn, if a lengthy purge, ibuf
-merge, or rollback was performed on a backup taken with ibbackup. If that is
-the case, reset page lsn's in the file. We assume that mysqld was shut down
-after it performed these cleanup operations on the .ibd file, so that it at
-the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
-first page of the .ibd file, and we can determine whether we need to reset the
-lsn's just by looking at that flush lsn.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-fil_reset_too_high_lsns(
-/*====================*/
-	const char*	name,		/*!< in: table name in the
-					databasename/tablename format */
-	lsn_t		current_lsn)	/*!< in: reset lsn's if the lsn stamped
-					to FIL_PAGE_FILE_FLUSH_LSN in the
-					first page is too high */
+Report information about a bad tablespace. */
+static
+void
+fil_report_bad_tablespace(
+/*======================*/
+	char*		filepath,	/*!< in: filepath */
+	ulint		found_id,	/*!< in: found space ID */
+	ulint		found_flags,	/*!< in: found flags */
+	ulint		expected_id,	/*!< in: expected space id */
+	ulint		expected_flags)	/*!< in: expected flags */
 {
-	os_file_t	file;
-	char*		filepath;
-	byte*		page;
-	byte*		buf2;
-	lsn_t		flush_lsn;
-	ulint		space_id;
-	os_offset_t	file_size;
-	os_offset_t	offset;
-	ulint		zip_size;
-	ibool		success;
-	page_zip_des_t	page_zip;
-
-	filepath = fil_make_ibd_name(name, FALSE);
-
-	file = os_file_create_simple_no_error_handling(
-		innodb_file_data_key, filepath, OS_FILE_OPEN,
-		OS_FILE_READ_WRITE, &success);
-	if (!success) {
-		/* The following call prints an error message */
-		os_file_get_last_error(TRUE);
-
-		ut_print_timestamp(stderr);
+	ib_logf(IB_LOG_LEVEL_ERROR,
+		"In file '%s', tablespace id and flags are %lu and %lu, "
+		"but in the InnoDB data dictionary they are %lu and %lu. "
+		"Have you moved InnoDB .ibd files around without using the "
+		"commands DISCARD TABLESPACE and IMPORT TABLESPACE? "
+		"Please refer to "
+		REFMAN "innodb-troubleshooting-datadict.html "
+		"for how to resolve the issue.",
+		filepath, (ulong) found_id, (ulong) found_flags,
+		(ulong) expected_id, (ulong) expected_flags);
+}
 
-		fputs("  InnoDB: Error: trying to open a table,"
-		      " but could not\n"
-		      "InnoDB: open the tablespace file ", stderr);
-		ut_print_filename(stderr, filepath);
-		fputs("!\n", stderr);
-		mem_free(filepath);
+struct fsp_open_info {
+	ibool		success;	/*!< Has the tablespace been opened? */
+	ibool		valid;		/*!< Is the tablespace valid? */
+	os_file_t	file;		/*!< File handle */
+	char*		filepath;	/*!< File path to open */
+	lsn_t		lsn;		/*!< Flushed LSN from header page */
+	ulint		id;		/*!< Space ID */
+	ulint		flags;		/*!< Tablespace flags */
+#ifdef UNIV_LOG_ARCHIVE
+	ulint		arch_log_no;	/*!< latest archived log file number */
+#endif /* UNIV_LOG_ARCHIVE */
+};
 
-		return(FALSE);
-	}
+/********************************************************************//**
+Tries to open a single-table tablespace and optionally checks that the
+space id in it is correct. If this does not succeed, print an error message
+to the .err log. This function is used to open a tablespace when we start
+mysqld after the dictionary has been booted, and also in IMPORT TABLESPACE.
 
-	/* Read the first page of the tablespace */
+NOTE that we assume this operation is used either at the database startup
+or under the protection of the dictionary mutex, so that two users cannot
+race here. This operation does not leave the file associated with the
+tablespace open, but closes it after we have looked at the space id in it.
 
-	buf2 = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE));
-	/* Align the memory for file i/o if we might have O_DIRECT set */
-	page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+If the validate boolean is set, we read the first page of the file and
+check that the space id in the file is what we expect. We assume that
+this function runs much faster if no check is made, since accessing the
+file inode probably is much faster (the OS caches them) than accessing
+the first page of the file.  This boolean may be initially FALSE, but if
+a remote tablespace is found it will be changed to true.
 
-	success = os_file_read(file, page, 0, UNIV_PAGE_SIZE);
-	if (!success) {
+If the fix_dict boolean is set, then it is safe to use an internal SQL
+statement to update the dictionary tables if they are incorrect.
 
-		goto func_exit;
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_open_single_table_tablespace(
+/*=============================*/
+	bool		validate,	/*!< in: Do we validate tablespace? */
+	bool		fix_dict,	/*!< in: Can we fix the dictionary? */
+	ulint		id,		/*!< in: space id */
+	ulint		flags,		/*!< in: tablespace flags */
+	const char*	tablename,	/*!< in: table name in the
+					databasename/tablename format */
+	const char*	path_in)	/*!< in: tablespace filepath */
+{
+	dberr_t		err = DB_SUCCESS;
+	bool		dict_filepath_same_as_default = false;
+	bool		link_file_found = false;
+	bool		link_file_is_bad = false;
+	fsp_open_info	def;
+	fsp_open_info	dict;
+	fsp_open_info	remote;
+	ulint		tablespaces_found = 0;
+	ulint		valid_tablespaces_found = 0;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!fix_dict || mutex_own(&(dict_sys->mutex)));
+
+	if (!fsp_flags_is_valid(flags)) {
+		return(DB_CORRUPTION);
+	}
+
+	/* If the tablespace was relocated, we do not
+	compare the DATA_DIR flag */
+	ulint mod_flags = flags & ~FSP_FLAGS_MASK_DATA_DIR;
+
+	memset(&def, 0, sizeof(def));
+	memset(&dict, 0, sizeof(dict));
+	memset(&remote, 0, sizeof(remote));
+
+	/* Discover the correct filepath.  We will always look for an ibd
+	in the default location. If it is remote, it should not be here. */
+	def.filepath = fil_make_ibd_name(tablename, false);
+
+	/* The path_in was read from SYS_DATAFILES. */
+	if (path_in) {
+		if (strcmp(def.filepath, path_in)) {
+			dict.filepath = mem_strdup(path_in);
+			/* possibility of multiple files. */
+			validate = true;
+		} else {
+			dict_filepath_same_as_default = true;
+		}
 	}
 
-	/* We have to read the file flush lsn from the header of the file */
-
-	flush_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
+	link_file_found = fil_open_linked_file(
+		tablename, &remote.filepath, &remote.file);
+	remote.success = link_file_found;
+	if (remote.success) {
+		/* possibility of multiple files. */
+		validate = true;
+		tablespaces_found++;
+
+		/* A link file was found. MySQL does not allow a DATA
+		DIRECTORY to be be the same as the default filepath. */
+		ut_a(strcmp(def.filepath, remote.filepath));
+
+		/* If there was a filepath found in SYS_DATAFILES,
+		we hope it was the same as this remote.filepath found
+		in the ISL file. */
+		if (dict.filepath
+		    && (0 == strcmp(dict.filepath, remote.filepath))) {
+			remote.success = FALSE;
+			os_file_close(remote.file);
+			mem_free(remote.filepath);
+			remote.filepath = NULL;
+			tablespaces_found--;
+		}
+	}
 
-	if (current_lsn >= flush_lsn) {
-		/* Ok */
-		success = TRUE;
+	/* Attempt to open the tablespace at other possible filepaths. */
+	if (dict.filepath) {
+		dict.file = os_file_create_simple_no_error_handling(
+			innodb_file_data_key, dict.filepath, OS_FILE_OPEN,
+			OS_FILE_READ_ONLY, &dict.success);
+		if (dict.success) {
+			/* possibility of multiple files. */
+			validate = true;
+			tablespaces_found++;
+		}
+	}
 
-		goto func_exit;
+	/* Always look for a file at the default location. */
+	ut_a(def.filepath);
+	def.file = os_file_create_simple_no_error_handling(
+		innodb_file_data_key, def.filepath, OS_FILE_OPEN,
+		OS_FILE_READ_ONLY, &def.success);
+	if (def.success) {
+		tablespaces_found++;
 	}
 
-	space_id = fsp_header_get_space_id(page);
-	zip_size = fsp_header_get_zip_size(page);
+	/*  We have now checked all possible tablespace locations and
+	have a count of how many we found.  If things are normal, we
+	only found 1. */
+	if (!validate && tablespaces_found == 1) {
+		goto skip_validate;
+	}
 
-	page_zip_des_init(&page_zip);
-	page_zip_set_size(&page_zip, zip_size);
-	if (zip_size) {
-		page_zip.data = page + UNIV_PAGE_SIZE;
+	/* Read the first page of the datadir tablespace, if found. */
+	if (def.success) {
+		fil_read_first_page(
+			def.file, FALSE, &def.flags, &def.id,
+#ifdef UNIV_LOG_ARCHIVE
+			&space_arch_log_no, &space_arch_log_no,
+#endif /* UNIV_LOG_ARCHIVE */
+			&def.lsn, &def.lsn);
+
+		/* Validate this single-table-tablespace with SYS_TABLES,
+		but do not compare the DATA_DIR flag, in case the
+		tablespace was relocated. */
+		ulint mod_def_flags = def.flags & ~FSP_FLAGS_MASK_DATA_DIR;
+		if (def.id == id && mod_def_flags == mod_flags) {
+			valid_tablespaces_found++;
+			def.valid = TRUE;
+		} else {
+			/* Do not use this tablespace. */
+			fil_report_bad_tablespace(
+				def.filepath, def.id,
+				def.flags, id, flags);
+		}
 	}
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		"  InnoDB: Flush lsn in the tablespace file %lu"
-		" to be imported\n"
-		"InnoDB: is " LSN_PF ", which exceeds current"
-		" system lsn " LSN_PF ".\n"
-		"InnoDB: We reset the lsn's in the file ",
-		(ulong) space_id,
-		flush_lsn, current_lsn);
-	ut_print_filename(stderr, filepath);
-	fputs(".\n", stderr);
-
-	ut_a(ut_is_2pow(zip_size));
-	ut_a(zip_size <= UNIV_ZIP_SIZE_MAX);
-
-	/* Loop through all the pages in the tablespace and reset the lsn and
-	the page checksum if necessary */
-
-	file_size = os_file_get_size(file);
-	ut_a(file_size != (os_offset_t) -1);
+	/* Read the first page of the remote tablespace */
+	if (remote.success) {
+		fil_read_first_page(
+			remote.file, FALSE, &remote.flags, &remote.id,
+#ifdef UNIV_LOG_ARCHIVE
+			&remote.arch_log_no, &remote.arch_log_no,
+#endif /* UNIV_LOG_ARCHIVE */
+			&remote.lsn, &remote.lsn);
+
+		/* Validate this single-table-tablespace with SYS_TABLES,
+		but do not compare the DATA_DIR flag, in case the
+		tablespace was relocated. */
+		ulint mod_remote_flags = remote.flags & ~FSP_FLAGS_MASK_DATA_DIR;
+		if (remote.id == id && mod_remote_flags == mod_flags) {
+			valid_tablespaces_found++;
+			remote.valid = TRUE;
+		} else {
+			/* Do not use this linked tablespace. */
+			fil_report_bad_tablespace(
+				remote.filepath, remote.id,
+				remote.flags, id, flags);
+			link_file_is_bad = true;
+		}
+	}
 
-	for (offset = 0; offset < file_size;
-	     offset += zip_size ? zip_size : UNIV_PAGE_SIZE) {
-		success = os_file_read(file, page, offset,
-				       zip_size ? zip_size : UNIV_PAGE_SIZE);
-		if (!success) {
+	/* Read the first page of the datadir tablespace, if found. */
+	if (dict.success) {
+		fil_read_first_page(
+			dict.file, FALSE, &dict.flags, &dict.id,
+#ifdef UNIV_LOG_ARCHIVE
+			&dict.arch_log_no, &dict.arch_log_no,
+#endif /* UNIV_LOG_ARCHIVE */
+			&dict.lsn, &dict.lsn);
+
+		/* Validate this single-table-tablespace with SYS_TABLES,
+		but do not compare the DATA_DIR flag, in case the
+		tablespace was relocated. */
+		ulint mod_dict_flags = dict.flags & ~FSP_FLAGS_MASK_DATA_DIR;
+		if (dict.id == id && mod_dict_flags == mod_flags) {
+			valid_tablespaces_found++;
+			dict.valid = TRUE;
+		} else {
+			/* Do not use this tablespace. */
+			fil_report_bad_tablespace(
+				dict.filepath, dict.id,
+				dict.flags, id, flags);
+		}
+	}
 
-			goto func_exit;
+	/* Make sense of these three possible locations.
+	First, bail out if no tablespace files were found. */
+	if (valid_tablespaces_found == 0) {
+		/* The following call prints an error message */
+		os_file_get_last_error(true);
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Could not find a valid tablespace file for '%s'. "
+			"See " REFMAN "innodb-troubleshooting-datadict.html "
+			"for how to resolve the issue.",
+			tablename);
+
+		err = DB_CORRUPTION;
+
+		goto cleanup_and_exit;
+	}
+
+	/* Do not open any tablespaces if more than one tablespace with
+	the correct space ID and flags were found. */
+	if (tablespaces_found > 1) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"A tablespace for %s has been found in "
+			"multiple places;", tablename);
+		if (def.success) {
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Default location; %s, LSN=" LSN_PF
+				", Space ID=%lu, Flags=%lu",
+				def.filepath, def.lsn,
+				(ulong) def.id, (ulong) def.flags);
+		}
+		if (remote.success) {
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Remote location; %s, LSN=" LSN_PF
+				", Space ID=%lu, Flags=%lu",
+				remote.filepath, remote.lsn,
+				(ulong) remote.id, (ulong) remote.flags);
 		}
-		if (mach_read_from_8(page + FIL_PAGE_LSN) > current_lsn) {
-			/* We have to reset the lsn */
-
-			if (zip_size) {
-				memcpy(page_zip.data, page, zip_size);
-				buf_flush_init_for_writing(
-					page, &page_zip, current_lsn);
-				success = os_file_write(
-					filepath, file, page_zip.data,
-					offset, zip_size);
+		if (dict.success) {
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Dictionary location; %s, LSN=" LSN_PF
+				", Space ID=%lu, Flags=%lu",
+				dict.filepath, dict.lsn,
+				(ulong) dict.id, (ulong) dict.flags);
+		}
+
+		/* Force-recovery will allow some tablespaces to be
+		skipped by REDO if there was more than one file found.
+		Unlike during the REDO phase of recovery, we now know
+		if the tablespace is valid according to the dictionary,
+		which was not available then. So if we did not force
+		recovery and there is only one good tablespace, ignore
+		any bad tablespaces. */
+		if (valid_tablespaces_found > 1 || srv_force_recovery > 0) {
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Will not open the tablespace for '%s'",
+				tablename);
+
+			if (def.success != def.valid
+			    || dict.success != dict.valid
+			    || remote.success != remote.valid) {
+				err = DB_CORRUPTION;
 			} else {
-				buf_flush_init_for_writing(
-					page, NULL, current_lsn);
-				success = os_file_write(
-					filepath, file, page,
-					offset, UNIV_PAGE_SIZE);
+				err = DB_ERROR;
 			}
+			goto cleanup_and_exit;
+		}
 
-			if (!success) {
+		/* There is only one valid tablespace found and we did
+		not use srv_force_recovery during REDO.  Use this one
+		tablespace and clean up invalid tablespace pointers */
+		if (def.success && !def.valid) {
+			def.success = false;
+			os_file_close(def.file);
+			tablespaces_found--;
+		}
+		if (dict.success && !dict.valid) {
+			dict.success = false;
+			os_file_close(dict.file);
+			/* Leave dict.filepath so that SYS_DATAFILES
+			can be corrected below. */
+			tablespaces_found--;
+		}
+		if (remote.success && !remote.valid) {
+			remote.success = false;
+			os_file_close(remote.file);
+			mem_free(remote.filepath);
+			remote.filepath = NULL;
+			tablespaces_found--;
+		}
+	}
 
-				goto func_exit;
+	/* At this point, there should be only one filepath. */
+	ut_a(tablespaces_found == 1);
+	ut_a(valid_tablespaces_found == 1);
+
+	/* Only fix the dictionary at startup when there is only one thread.
+	Calls to dict_load_table() can be done while holding other latches. */
+	if (!fix_dict) {
+		goto skip_validate;
+	}
+
+	/* We may need to change what is stored in SYS_DATAFILES or
+	SYS_TABLESPACES or adjust the link file.
+	Since a failure to update SYS_TABLESPACES or SYS_DATAFILES does
+	not prevent opening and using the single_table_tablespace either
+	this time or the next, we do not check the return code or fail
+	to open the tablespace. But dict_update_filepath() will issue a
+	warning to the log. */
+	if (dict.filepath) {
+		if (remote.success) {
+			dict_update_filepath(id, remote.filepath);
+		} else if (def.success) {
+			dict_update_filepath(id, def.filepath);
+			if (link_file_is_bad) {
+				fil_delete_link_file(tablename);
 			}
+		} else if (!link_file_found || link_file_is_bad) {
+			ut_ad(dict.success);
+			/* Fix the link file if we got our filepath
+			from the dictionary but a link file did not
+			exist or it did not point to a valid file. */
+			fil_delete_link_file(tablename);
+			fil_create_link_file(tablename, dict.filepath);
 		}
-	}
 
-	success = os_file_flush(file);
-	if (!success) {
+	} else if (remote.success && dict_filepath_same_as_default) {
+		dict_update_filepath(id, remote.filepath);
 
-		goto func_exit;
+	} else if (remote.success && path_in == NULL) {
+		/* SYS_DATAFILES record for this space ID was not found. */
+		dict_insert_tablespace_and_filepath(
+			id, tablename, remote.filepath, flags);
 	}
 
-	/* We now update the flush_lsn stamp at the start of the file */
-	success = os_file_read(file, page, 0,
-			       zip_size ? zip_size : UNIV_PAGE_SIZE);
-	if (!success) {
+skip_validate:
+	if (err != DB_SUCCESS) {
+		; // Don't load the tablespace into the cache
+	} else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE)) {
+		err = DB_ERROR;
+	} else {
+		/* We do not measure the size of the file, that is why
+		we pass the 0 below */
 
-		goto func_exit;
+		if (!fil_node_create(remote.success ? remote.filepath :
+				     dict.success ? dict.filepath :
+				     def.filepath, 0, id, FALSE)) {
+			err = DB_ERROR;
+		}
 	}
 
-	mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
-
-	success = os_file_write(filepath, file, page, 0,
-				zip_size ? zip_size : UNIV_PAGE_SIZE);
-	if (!success) {
-
-		goto func_exit;
+cleanup_and_exit:
+	if (remote.success) {
+		os_file_close(remote.file);
 	}
-	success = os_file_flush(file);
-func_exit:
-	os_file_close(file);
-	ut_free(buf2);
-	mem_free(filepath);
-
-	return(success);
-}
-
-/********************************************************************//**
-Tries to open a single-table tablespace and optionally checks the space id is
-right in it. If does not succeed, prints an error message to the .err log. This
-function is used to open a tablespace when we start up mysqld, and also in
-IMPORT TABLESPACE.
-NOTE that we assume this operation is used either at the database startup
-or under the protection of the dictionary mutex, so that two users cannot
-race here. This operation does not leave the file associated with the
-tablespace open, but closes it after we have looked at the space id in it.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-fil_open_single_table_tablespace(
-/*=============================*/
-	ibool		check_space_id,	/*!< in: should we check that the space
-					id in the file is right; we assume
-					that this function runs much faster
-					if no check is made, since accessing
-					the file inode probably is much
-					faster (the OS caches them) than
-					accessing the first page of the file */
-	ulint		id,		/*!< in: space id */
-	ulint		flags,		/*!< in: tablespace flags */
-	const char*	tablename)	/*!< in: table name in the
-					databasename/tablename format */
-{
-	os_file_t	file;
-	char*		filepath;
-	ibool		success;
-	byte*		buf2;
-	byte*		page;
-	ulint		space_id;
-	ulint		space_flags;
-
-	filepath = fil_make_ibd_name(tablename, FALSE);
-
-	fsp_flags_validate(flags);
-
-	file = os_file_create_simple_no_error_handling(
-		innodb_file_data_key, filepath, OS_FILE_OPEN,
-		OS_FILE_READ_ONLY, &success);
-	if (!success) {
-		/* The following call prints an error message */
-		os_file_get_last_error(TRUE);
-
-		ut_print_timestamp(stderr);
-
-		fputs("  InnoDB: Error: trying to open a table,"
-		      " but could not\n"
-		      "InnoDB: open the tablespace file ", stderr);
-		ut_print_filename(stderr, filepath);
-		fputs("!\n"
-		      "InnoDB: Have you moved InnoDB .ibd files around"
-		      " without using the\n"
-		      "InnoDB: commands DISCARD TABLESPACE and"
-		      " IMPORT TABLESPACE?\n"
-		      "InnoDB: It is also possible that this is"
-		      " a temporary table #sql...,\n"
-		      "InnoDB: and MySQL removed the .ibd file for this.\n"
-		      "InnoDB: Please refer to\n"
-		      "InnoDB: " REFMAN
-		      "innodb-troubleshooting-datadict.html\n"
-		      "InnoDB: for how to resolve the issue.\n", stderr);
-
-		mem_free(filepath);
-
-		return(FALSE);
+	if (remote.filepath) {
+		mem_free(remote.filepath);
 	}
-
-	if (!check_space_id) {
-		space_id = id;
-
-		goto skip_check;
+	if (dict.success) {
+		os_file_close(dict.file);
 	}
-
-	/* Read the first page of the tablespace */
-
-	buf2 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
-	/* Align the memory for file i/o if we might have O_DIRECT set */
-	page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
-
-	success = os_file_read(file, page, 0, UNIV_PAGE_SIZE);
-
-	/* We have to read the tablespace id and flags from the file. */
-
-	space_id = fsp_header_get_space_id(page);
-	space_flags = fsp_header_get_flags(page);
-
-	ut_free(buf2);
-
-	if (UNIV_UNLIKELY(space_id != id || space_flags != flags)) {
-		ut_print_timestamp(stderr);
-
-		fputs("  InnoDB: Error: tablespace id and flags in file ",
-		      stderr);
-		ut_print_filename(stderr, filepath);
-		fprintf(stderr, " are %lu and %lu, but in the InnoDB\n"
-			"InnoDB: data dictionary they are %lu and %lu.\n"
-			"InnoDB: Have you moved InnoDB .ibd files"
-			" around without using the\n"
-			"InnoDB: commands DISCARD TABLESPACE and"
-			" IMPORT TABLESPACE?\n"
-			"InnoDB: Please refer to\n"
-			"InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
-			"InnoDB: for how to resolve the issue.\n",
-			(ulong) space_id, (ulong) space_flags,
-			(ulong) id, (ulong) flags);
-
-		success = FALSE;
-
-		goto func_exit;
+	if (dict.filepath) {
+		mem_free(dict.filepath);
 	}
-
-skip_check:
-	success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE);
-
-	if (!success) {
-		goto func_exit;
+	if (def.success) {
+		os_file_close(def.file);
 	}
+	mem_free(def.filepath);
 
-	/* We do not measure the size of the file, that is why we pass the 0
-	below */
-
-	fil_node_create(filepath, 0, space_id, FALSE);
-func_exit:
-	os_file_close(file);
-	mem_free(filepath);
-
-	return(success);
+	return(err);
 }
 #endif /* !UNIV_HOTBACKUP */
 
@@ -3282,13 +3897,64 @@ fil_make_ibbackup_old_name(
 
 	memcpy(path, name, len);
 	memcpy(path + len, suffix, (sizeof suffix) - 1);
-	ut_sprintf_timestamp_without_extra_chars(path + len + sizeof suffix);
+	ut_sprintf_timestamp_without_extra_chars(
+		path + len + ((sizeof suffix) - 1));
 	return(path);
 }
 #endif /* UNIV_HOTBACKUP */
 
 /********************************************************************//**
 Opens an .ibd file and adds the associated single-table tablespace to the
+InnoDB fil0fil.cc data structures.
+Set fsp->success to TRUE if tablespace is valid, FALSE if not. */
+static
+void
+fil_validate_single_table_tablespace(
+/*=================================*/
+	const char*	tablename,	/*!< in: database/tablename */
+	fsp_open_info*	fsp)		/*!< in/out: tablespace info */
+{
+	fil_read_first_page(
+		fsp->file, FALSE, &fsp->flags, &fsp->id,
+#ifdef UNIV_LOG_ARCHIVE
+		&fsp->arch_log_no, &fsp->arch_log_no,
+#endif /* UNIV_LOG_ARCHIVE */
+		&fsp->lsn, &fsp->lsn);
+
+	if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
+		fprintf(stderr,
+		" InnoDB: Error: Tablespace is not sensible;"
+		" Table: %s  Space ID: %lu  Filepath: %s\n",
+		tablename, (ulong) fsp->id, fsp->filepath);
+		fsp->success = FALSE;
+		return;
+	}
+
+	mutex_enter(&fil_system->mutex);
+	fil_space_t* space = fil_space_get_by_id(fsp->id);
+	mutex_exit(&fil_system->mutex);
+	if (space != NULL) {
+		char* prev_filepath = fil_space_get_first_path(fsp->id);
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Attempted to open a previously opened tablespace. "
+			"Previous tablespace %s uses space ID: %lu at "
+			"filepath: %s. Cannot open tablespace %s which uses "
+			"space ID: %lu at filepath: %s",
+			space->name, (ulong) space->id, prev_filepath,
+			tablename, (ulong) fsp->id, fsp->filepath);
+
+		mem_free(prev_filepath);
+		fsp->success = FALSE;
+		return;
+	}
+
+	fsp->success = TRUE;
+}
+
+
+/********************************************************************//**
+Opens an .ibd file and adds the associated single-table tablespace to the
 InnoDB fil0fil.cc data structures. */
 static
 void
@@ -3296,34 +3962,49 @@ fil_load_single_table_tablespace(
 /*=============================*/
 	const char*	dbname,		/*!< in: database name */
 	const char*	filename)	/*!< in: file name (not a path),
-					including the .ibd extension */
+					including the .ibd or .isl extension */
 {
-	os_file_t	file;
-	char*		filepath;
 	char*		tablename;
-	ibool		success;
-	byte*		buf2;
-	byte*		page;
-	ulint		space_id;
-	ulint		flags;
+	ulint		tablename_len;
+	ulint		dbname_len = strlen(dbname);
+	ulint		filename_len = strlen(filename);
+	fsp_open_info	def;
+	fsp_open_info	remote;
 	os_offset_t	size;
 #ifdef UNIV_HOTBACKUP
 	fil_space_t*	space;
 #endif
-	filepath = static_cast<char*>(
-		mem_alloc(
-			strlen(dbname)
-			+ strlen(filename)
-			+ strlen(fil_path_to_mysql_datadir) + 3));
 
-	sprintf(filepath, "%s/%s/%s", fil_path_to_mysql_datadir, dbname,
-		filename);
-	srv_normalize_path_for_win(filepath);
+	memset(&def, 0, sizeof(def));
+	memset(&remote, 0, sizeof(remote));
 
+	/* The caller assured that the extension is ".ibd" or ".isl". */
+	ut_ad(0 == memcmp(filename + filename_len - 4, ".ibd", 4)
+	      || 0 == memcmp(filename + filename_len - 4, ".isl", 4));
+
+	/* Build up the tablename in the standard form database/table. */
 	tablename = static_cast<char*>(
-		mem_alloc(strlen(dbname) + strlen(filename) + 2));
+		mem_alloc(dbname_len + filename_len + 2));
 	sprintf(tablename, "%s/%s", dbname, filename);
-	tablename[strlen(tablename) - strlen(".ibd")] = 0;
+	tablename_len = strlen(tablename) - strlen(".ibd");
+	tablename[tablename_len] = '\0';
+
+	/* There may be both .ibd and .isl file in the directory.
+	And it is possible that the .isl file refers to a different
+	.ibd file.  If so, we open and compare them the first time
+	one of them is sent to this function.  So if this table has
+	already been loaded, there is nothing to do.*/
+	mutex_enter(&fil_system->mutex);
+	if (fil_space_get_by_name(tablename)) {
+		mem_free(tablename);
+		mutex_exit(&fil_system->mutex);
+		return;
+	}
+	mutex_exit(&fil_system->mutex);
+
+	/* Build up the filepath of the .ibd tablespace in the datadir.
+	This must be freed independent of def.success. */
+	def.filepath = fil_make_ibd_name(tablename, false);
 
 #ifdef __WIN__
 # ifndef UNIV_HOTBACKUP
@@ -3333,31 +4014,56 @@ fil_load_single_table_tablespace(
 	file path to lower case, so that we are consistent with InnoDB's
 	internal data dictionary. */
 
-	dict_casedn_str(filepath);
+	dict_casedn_str(def.filepath);
 # endif /* !UNIV_HOTBACKUP */
 #endif
-	file = os_file_create_simple_no_error_handling(
-		innodb_file_data_key, filepath, OS_FILE_OPEN,
-		OS_FILE_READ_ONLY, &success);
-	if (!success) {
-		/* The following call prints an error message */
-		os_file_get_last_error(TRUE);
 
+	/* Check for a link file which locates a remote tablespace. */
+	remote.success = fil_open_linked_file(
+		tablename, &remote.filepath, &remote.file);
+
+	/* Read the first page of the remote tablespace */
+	if (remote.success) {
+		fil_validate_single_table_tablespace(tablename, &remote);
+		if (!remote.success) {
+			os_file_close(remote.file);
+			mem_free(remote.filepath);
+		}
+	}
+
+
+	/* Try to open the tablespace in the datadir. */
+	def.file = os_file_create_simple_no_error_handling(
+		innodb_file_data_key, def.filepath, OS_FILE_OPEN,
+		OS_FILE_READ_ONLY, &def.success);
+
+	/* Read the first page of the remote tablespace */
+	if (def.success) {
+		fil_validate_single_table_tablespace(tablename, &def);
+		if (!def.success) {
+			os_file_close(def.file);
+		}
+	}
+
+	if (!def.success && !remote.success) {
+		/* The following call prints an error message */
+		os_file_get_last_error(true);
+		fprintf(stderr,
+			"InnoDB: Error: could not open single-table"
+			" tablespace file %s\n", def.filepath);
+no_good_file:
 		fprintf(stderr,
-			"InnoDB: Error: could not open single-table tablespace"
-			" file\n"
-			"InnoDB: %s!\n"
 			"InnoDB: We do not continue the crash recovery,"
 			" because the table may become\n"
-			"InnoDB: corrupt if we cannot apply the log records"
-			" in the InnoDB log to it.\n"
+			"InnoDB: corrupt if we cannot apply the log"
+			" records in the InnoDB log to it.\n"
 			"InnoDB: To fix the problem and start mysqld:\n"
 			"InnoDB: 1) If there is a permission problem"
 			" in the file and mysqld cannot\n"
 			"InnoDB: open the file, you should"
 			" modify the permissions.\n"
-			"InnoDB: 2) If the table is not needed, or you can"
-			" restore it from a backup,\n"
+			"InnoDB: 2) If the table is not needed, or you"
+			" can restore it from a backup,\n"
 			"InnoDB: then you can remove the .ibd file,"
 			" and InnoDB will do a normal\n"
 			"InnoDB: crash recovery and ignore that table.\n"
@@ -3366,123 +4072,84 @@ fil_load_single_table_tablespace(
 			"InnoDB: the .ibd file, you can set"
 			" innodb_force_recovery > 0 in my.cnf\n"
 			"InnoDB: and force InnoDB to continue crash"
-			" recovery here.\n", filepath);
-
+			" recovery here.\n");
+will_not_choose:
 		mem_free(tablename);
-		mem_free(filepath);
-
-		if (srv_force_recovery > 0) {
-			fprintf(stderr,
-				"InnoDB: innodb_force_recovery"
-				" was set to %lu. Continuing crash recovery\n"
-				"InnoDB: even though we cannot access"
-				" the .ibd file of this table.\n",
-				srv_force_recovery);
-			return;
+		if (remote.success) {
+			mem_free(remote.filepath);
 		}
-
-		exit(1);
-	}
-
-	size = os_file_get_size(file);
-
-	if (UNIV_UNLIKELY(size == (os_offset_t) -1)) {
-		/* The following call prints an error message */
-		os_file_get_last_error(TRUE);
-
-		fprintf(stderr,
-			"InnoDB: Error: could not measure the size"
-			" of single-table tablespace file\n"
-			"InnoDB: %s!\n"
-			"InnoDB: We do not continue crash recovery,"
-			" because the table will become\n"
-			"InnoDB: corrupt if we cannot apply the log records"
-			" in the InnoDB log to it.\n"
-			"InnoDB: To fix the problem and start mysqld:\n"
-			"InnoDB: 1) If there is a permission problem"
-			" in the file and mysqld cannot\n"
-			"InnoDB: access the file, you should"
-			" modify the permissions.\n"
-			"InnoDB: 2) If the table is not needed,"
-			" or you can restore it from a backup,\n"
-			"InnoDB: then you can remove the .ibd file,"
-			" and InnoDB will do a normal\n"
-			"InnoDB: crash recovery and ignore that table.\n"
-			"InnoDB: 3) If the file system or the disk is broken,"
-			" and you cannot remove\n"
-			"InnoDB: the .ibd file, you can set"
-			" innodb_force_recovery > 0 in my.cnf\n"
-			"InnoDB: and force InnoDB to continue"
-			" crash recovery here.\n", filepath);
-
-		os_file_close(file);
-		mem_free(tablename);
-		mem_free(filepath);
+		mem_free(def.filepath);
 
 		if (srv_force_recovery > 0) {
-			fprintf(stderr,
-				"InnoDB: innodb_force_recovery"
-				" was set to %lu. Continuing crash recovery\n"
-				"InnoDB: even though we cannot access"
-				" the .ibd file of this table.\n",
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"innodb_force_recovery was set to %lu. "
+				"Continuing crash recovery even though we "
+				"cannot access the .ibd file of this table.",
 				srv_force_recovery);
 			return;
 		}
 
+		/* If debug code, cause a core dump and call stack. For
+		release builds just exit and rely on the messages above. */
+		ut_ad(0);
 		exit(1);
 	}
 
-	/* TODO: What to do in other cases where we cannot access an .ibd
-	file during a crash recovery? */
+	if (def.success && remote.success) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Tablespaces for %s have been found in two places;\n"
+			"Location 1: SpaceID: %lu  LSN: %lu  File: %s\n"
+			"Location 2: SpaceID: %lu  LSN: %lu  File: %s\n"
+			"You must delete one of them.",
+			tablename, (ulong) def.id, (ulong) def.lsn,
+			def.filepath, (ulong) remote.id, (ulong) remote.lsn,
+			remote.filepath);
 
-	/* Every .ibd file is created >= 4 pages in size. Smaller files
-	cannot be ok. */
+		def.success = FALSE;
+		os_file_close(def.file);
+		os_file_close(remote.file);
+		goto will_not_choose;
+	}
 
-#ifndef UNIV_HOTBACKUP
-	if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
-		fprintf(stderr,
-			"InnoDB: Error: the size of single-table"
-			" tablespace file %s\n"
-			"InnoDB: is only " UINT64PF
-			", should be at least %lu!\n",
-			filepath,
-			size, (ulong) (4 * UNIV_PAGE_SIZE));
-		os_file_close(file);
-		mem_free(tablename);
-		mem_free(filepath);
+	/* At this point, only one tablespace is open */
+	ut_a(def.success == !remote.success);
 
-		return;
-	}
-#endif
-	/* Read the first page of the tablespace if the size is big enough */
+	fsp_open_info*	fsp = def.success ? &def : &remote;
 
-	buf2 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
-	/* Align the memory for file i/o if we might have O_DIRECT set */
-	page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+	/* Get and test the file size. */
+	size = os_file_get_size(fsp->file);
 
-	if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
-		success = os_file_read(file, page, 0, UNIV_PAGE_SIZE);
+	if (size == (os_offset_t) -1) {
+		/* The following call prints an error message */
+		os_file_get_last_error(true);
 
-		/* We have to read the tablespace id from the file */
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"could not measure the size of single-table "
+			"tablespace file %s", fsp->filepath);
 
-		space_id = fsp_header_get_space_id(page);
-		flags = fsp_header_get_flags(page);
-	} else {
-		space_id = ULINT_UNDEFINED;
-		flags = 0;
+		os_file_close(fsp->file);
+		goto no_good_file;
 	}
 
+	/* Every .ibd file is created >= 4 pages in size. Smaller files
+	cannot be ok. */
+	ulong minimum_size = FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE;
+	if (size < minimum_size) {
 #ifndef UNIV_HOTBACKUP
-	if (space_id == ULINT_UNDEFINED || space_id == 0) {
-		fprintf(stderr,
-			"InnoDB: Error: tablespace id %lu in file %s"
-			" is not sensible\n",
-			(ulong) space_id,
-			filepath);
-		goto func_exit;
-	}
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"The size of single-table tablespace file %s "
+			"is only " UINT64PF ", should be at least %lu!",
+			fsp->filepath, size, minimum_size);
+		os_file_close(fsp->file);
+		goto no_good_file;
 #else
-	if (space_id == ULINT_UNDEFINED || space_id == 0) {
+		fsp->id = ULINT_UNDEFINED;
+		fsp->flags = 0;
+#endif /* !UNIV_HOTBACKUP */
+	}
+
+#ifdef UNIV_HOTBACKUP
+	if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
 		char*	new_path;
 
 		fprintf(stderr,
@@ -3494,18 +4161,19 @@ fil_load_single_table_tablespace(
 			" is not sensible.\n"
 			"InnoDB: This can happen in an ibbackup run,"
 			" and is not dangerous.\n",
-			filepath, space_id, filepath, size);
-		os_file_close(file);
+			fsp->filepath, fsp->id, fsp->filepath, size);
+		os_file_close(fsp->file);
 
-		new_path = fil_make_ibbackup_old_name(filepath);
-		ut_a(os_file_rename(innodb_file_data_key, filepath, new_path));
+		new_path = fil_make_ibbackup_old_name(fsp->filepath);
+
+		bool	success = os_file_rename(
+			innodb_file_data_key, fsp->filepath, new_path));
+
+		ut_a(success);
 
-		ut_free(buf2);
-		mem_free(tablename);
-		mem_free(filepath);
 		mem_free(new_path);
 
-		return;
+		goto func_exit_after_close;
 	}
 
 	/* A backup may contain the same space several times, if the space got
@@ -3517,7 +4185,7 @@ fil_load_single_table_tablespace(
 
 	mutex_enter(&fil_system->mutex);
 
-	space = fil_space_get_by_id(space_id);
+	space = fil_space_get_by_id(fsp->id);
 
 	if (space) {
 		char*	new_path;
@@ -3529,52 +4197,64 @@ fil_load_single_table_tablespace(
 			"InnoDB: was scanned earlier. This can happen"
 			" if you have renamed tables\n"
 			"InnoDB: during an ibbackup run.\n",
-			filepath, space_id, filepath,
+			fsp->filepath, fsp->id, fsp->filepath,
 			space->name);
-		os_file_close(file);
+		os_file_close(fsp->file);
 
-		new_path = fil_make_ibbackup_old_name(filepath);
+		new_path = fil_make_ibbackup_old_name(fsp->filepath);
 
 		mutex_exit(&fil_system->mutex);
 
-		ut_a(os_file_rename(innodb_file_data_key, filepath, new_path));
+		bool	success = os_file_rename(
+			innodb_file_data_key, fsp->filepath, new_path);
+
+		ut_a(success);
 
-		ut_free(buf2);
-		mem_free(tablename);
-		mem_free(filepath);
 		mem_free(new_path);
 
-		return;
+		goto func_exit_after_close;
 	}
 	mutex_exit(&fil_system->mutex);
-#endif
-	success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE);
-
-	if (!success) {
+#endif /* UNIV_HOTBACKUP */
+	ibool file_space_create_success = fil_space_create(
+		tablename, fsp->id, fsp->flags, FIL_TABLESPACE);
 
+	if (!file_space_create_success) {
 		if (srv_force_recovery > 0) {
 			fprintf(stderr,
-				"InnoDB: innodb_force_recovery"
-				" was set to %lu. Continuing crash recovery\n"
-				"InnoDB: even though the tablespace creation"
-				" of this table failed.\n",
+				"InnoDB: innodb_force_recovery was set"
+				" to %lu. Continuing crash recovery\n"
+				"InnoDB: even though the tablespace"
+				" creation of this table failed.\n",
 				srv_force_recovery);
 			goto func_exit;
 		}
 
-		exit(1);
+		/* Exit here with a core dump, stack, etc. */
+		ut_a(file_space_create_success);
 	}
 
 	/* We do not use the size information we have about the file, because
 	the rounding formula for extents and pages is somewhat complex; we
 	let fil_node_open() do that task. */
 
-	fil_node_create(filepath, 0, space_id, FALSE);
+	if (!fil_node_create(fsp->filepath, 0, fsp->id, FALSE)) {
+		ut_error;
+	}
+
 func_exit:
-	os_file_close(file);
-	ut_free(buf2);
+	os_file_close(fsp->file);
+
+#ifdef UNIV_HOTBACKUP
+func_exit_after_close:
+#else
+	ut_ad(!mutex_own(&fil_system->mutex));
+#endif
 	mem_free(tablename);
-	mem_free(filepath);
+	if (remote.success) {
+		mem_free(remote.filepath);
+	}
+	mem_free(def.filepath);
 }
 
 /***********************************************************************//**
@@ -3587,29 +4267,25 @@ static
 int
 fil_file_readdir_next_file(
 /*=======================*/
-	ulint*		err,	/*!< out: this is set to DB_ERROR if an error
+	dberr_t*	err,	/*!< out: this is set to DB_ERROR if an error
 				was encountered, otherwise not changed */
 	const char*	dirname,/*!< in: directory name or path */
 	os_file_dir_t	dir,	/*!< in: directory stream */
-	os_file_stat_t*	info)	/*!< in/out: buffer where the info is returned */
+	os_file_stat_t*	info)	/*!< in/out: buffer where the
+				info is returned */
 {
-	ulint	i;
-	int	ret;
-
-	for (i = 0; i < 100; i++) {
-		ret = os_file_readdir_next_file(dirname, dir, info);
+	for (ulint i = 0; i < 100; i++) {
+		int	ret = os_file_readdir_next_file(dirname, dir, info);
 
 		if (ret != -1) {
 
 			return(ret);
 		}
 
-		fprintf(stderr,
-			"InnoDB: Error: os_file_readdir_next_file()"
-			" returned -1 in\n"
-			"InnoDB: directory %s\n"
-			"InnoDB: Crash recovery may have failed"
-			" for some .ibd files!\n", dirname);
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"os_file_readdir_next_file() returned -1 in "
+			"directory %s, crash recovery may have failed "
+			"for some .ibd files!", dirname);
 
 		*err = DB_ERROR;
 	}
@@ -3626,7 +4302,7 @@ in the doublewrite buffer, also to know where to apply log records where the
 space id is != 0.
 @return	DB_SUCCESS or error number */
 UNIV_INTERN
-ulint
+dberr_t
 fil_load_single_table_tablespaces(void)
 /*===================================*/
 {
@@ -3637,7 +4313,7 @@ fil_load_single_table_tablespaces(void)
 	os_file_dir_t	dbdir;
 	os_file_stat_t	dbinfo;
 	os_file_stat_t	fileinfo;
-	ulint		err		= DB_SUCCESS;
+	dberr_t		err		= DB_SUCCESS;
 
 	/* The datadir of MySQL is always the default directory of mysqld */
 
@@ -3686,7 +4362,6 @@ fil_load_single_table_tablespaces(void)
 		dbdir = os_file_opendir(dbpath, FALSE);
 
 		if (dbdir != NULL) {
-			/* printf("Opened dir %s\n", dbinfo.name); */
 
 			/* We found a database directory; loop through it,
 			looking for possible .ibd files in it */
@@ -3694,8 +4369,6 @@ fil_load_single_table_tablespaces(void)
 			ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
 							 &fileinfo);
 			while (ret == 0) {
-				/* printf(
-				"     Looking at file %s\n", fileinfo.name); */
 
 				if (fileinfo.type == OS_FILE_TYPE_DIR) {
 
@@ -3704,11 +4377,14 @@ fil_load_single_table_tablespaces(void)
 
 				/* We found a symlink or a file */
 				if (strlen(fileinfo.name) > 4
-				    && 0 == strcmp(fileinfo.name
+				    && (0 == strcmp(fileinfo.name
+						   + strlen(fileinfo.name) - 4,
+						   ".ibd")
+					|| 0 == strcmp(fileinfo.name
 						   + strlen(fileinfo.name) - 4,
-						   ".ibd")) {
-					/* The name ends in .ibd; try opening
-					the file */
+						   ".isl"))) {
+					/* The name ends in .ibd or .isl;
+					try opening the file */
 					fil_load_single_table_tablespace(
 						dbinfo.name, fileinfo.name);
 				}
@@ -3808,6 +4484,29 @@ fil_tablespace_exists_in_mem(
 }
 
 /*******************************************************************//**
+Report that a tablespace for a table was not found. */
+static
+void
+fil_report_missing_tablespace(
+/*===========================*/
+	const char*	name,			/*!< in: table name */
+	ulint		space_id)		/*!< in: table's space id */
+{
+	char index_name[MAX_FULL_NAME_LEN + 1];
+
+	innobase_format_name(index_name, sizeof(index_name), name, TRUE);
+
+	ib_logf(IB_LOG_LEVEL_ERROR,
+		"Table %s in the InnoDB data dictionary has tablespace id %lu, "
+		"but tablespace with that id or name does not exist. Have "
+		"you deleted or moved .ibd files? This may also be a table "
+		"created with CREATE TEMPORARY TABLE whose .ibd and .frm "
+		"files MySQL automatically removed, but the table still "
+		"exists in the InnoDB internal data dictionary.",
+		name, space_id);
+}
+
+/*******************************************************************//**
 Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
 cache. Note that if we have not done a crash recovery at the database startup,
 there may be many tablespaces which are not yet in the memory cache.
@@ -3817,19 +4516,25 @@ ibool
 fil_space_for_table_exists_in_mem(
 /*==============================*/
 	ulint		id,		/*!< in: space id */
-	const char*	name,		/*!< in: table name in the standard
-					'databasename/tablename' format */
+	const char*	name,		/*!< in: table name used in
+					fil_space_create().  Either the
+					standard 'dbname/tablename' format
+					or table->dir_path_of_temp_table */
 	ibool		mark_space,	/*!< in: in crash recovery, at database
 					startup we mark all spaces which have
 					an associated table in the InnoDB
 					data dictionary, so that
 					we can print a warning about orphaned
 					tablespaces */
-	ibool		print_error_if_does_not_exist)
+	ibool		print_error_if_does_not_exist,
 					/*!< in: print detailed error
 					information to the .err log if a
 					matching tablespace is not found from
 					memory */
+	bool		adjust_space,	/*!< in: whether to adjust space id
+					when find table space mismatch */
+	mem_heap_t*	heap,		/*!< in: heap memory */
+	table_id_t	table_id)	/*!< in: table id */
 {
 	fil_space_t*	fnamespace;
 	fil_space_t*	space;
@@ -3858,6 +4563,47 @@ fil_space_for_table_exists_in_mem(
 		return(TRUE);
 	}
 
+	/* Info from "fnamespace" comes from the ibd file itself, it can
+	be different from data obtained from System tables since it is
+	not transactional. If adjust_space is set, and the mismatching
+	space are between a user table and its temp table, we shall
+	adjust the ibd file name according to system table info */
+	if (adjust_space
+	    && space != NULL
+	    && row_is_mysql_tmp_table_name(space->name)
+	    && !row_is_mysql_tmp_table_name(name)) {
+
+		mutex_exit(&fil_system->mutex);
+
+		DBUG_EXECUTE_IF("ib_crash_before_adjust_fil_space",
+				DBUG_SUICIDE(););
+
+		if (fnamespace) {
+			char*	tmp_name;
+
+			tmp_name = dict_mem_create_temporary_tablename(
+				heap, name, table_id);
+
+			fil_rename_tablespace(fnamespace->name, fnamespace->id,
+					      tmp_name, NULL);
+		}
+
+		DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space",
+				DBUG_SUICIDE(););
+
+		fil_rename_tablespace(space->name, id, name, NULL);
+
+		DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space",
+				DBUG_SUICIDE(););
+
+		mutex_enter(&fil_system->mutex);
+		fnamespace = fil_space_get_by_name(name);
+		ut_ad(space == fnamespace);
+		mutex_exit(&fil_system->mutex);
+
+		return(TRUE);
+	}
+
 	if (!print_error_if_does_not_exist) {
 
 		mutex_exit(&fil_system->mutex);
@@ -3867,22 +4613,9 @@ fil_space_for_table_exists_in_mem(
 
 	if (space == NULL) {
 		if (fnamespace == NULL) {
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: Error: table ", stderr);
-			ut_print_filename(stderr, name);
-			fprintf(stderr, "\n"
-				"InnoDB: in InnoDB data dictionary"
-				" has tablespace id %lu,\n"
-				"InnoDB: but tablespace with that id"
-				" or name does not exist. Have\n"
-				"InnoDB: you deleted or moved .ibd files?\n"
-				"InnoDB: This may also be a table created with"
-				" CREATE TEMPORARY TABLE\n"
-				"InnoDB: whose .ibd and .frm files"
-				" MySQL automatically removed, but the\n"
-				"InnoDB: table still exists in the"
-				" InnoDB internal data dictionary.\n",
-				(ulong) id);
+			if (print_error_if_does_not_exist) {
+				fil_report_missing_tablespace(name, id);
+			}
 		} else {
 			ut_print_timestamp(stderr);
 			fputs("  InnoDB: Error: table ", stderr);
@@ -3941,7 +4674,7 @@ error_exit:
 Checks if a single-table tablespace for a given table name exists in the
 tablespace memory cache.
 @return	space id, ULINT_UNDEFINED if not found */
-static
+UNIV_INTERN
 ulint
 fil_get_space_id_for_table(
 /*=======================*/
@@ -3996,6 +4729,8 @@ fil_extend_space_to_desired_size(
 	ulint		pages_added;
 	ibool		success;
 
+	ut_ad(!srv_read_only_mode);
+
 retry:
 	pages_added = 0;
 	success = TRUE;
@@ -4070,7 +4805,7 @@ retry:
 				 node->name, node->handle, buf,
 				 offset, page_size * n_pages,
 				 NULL, NULL);
-#endif
+#endif /* UNIV_HOTBACKUP */
 		if (success) {
 			os_has_said_disk_full = FALSE;
 		} else {
@@ -4143,7 +4878,7 @@ fil_extend_tablespaces_to_stored_len(void)
 	byte*		buf;
 	ulint		actual_size;
 	ulint		size_in_header;
-	ulint		error;
+	dberr_t		error;
 	ibool		success;
 
 	buf = mem_alloc(UNIV_PAGE_SIZE);
@@ -4177,7 +4912,7 @@ fil_extend_tablespaces_to_stored_len(void)
 				"InnoDB: Check that you have free disk space"
 				" and retry!\n",
 				space->name, size_in_header, actual_size);
-			exit(1);
+			ut_a(success);
 		}
 
 		mutex_enter(&fil_system->mutex);
@@ -4347,12 +5082,21 @@ fil_node_complete_io(
 	node->n_pending--;
 
 	if (type == OS_FILE_WRITE) {
+		ut_ad(!srv_read_only_mode);
 		system->modification_counter++;
 		node->modification_counter = system->modification_counter;
 
-		if (!node->space->is_in_unflushed_spaces) {
+		if (fil_buffering_disabled(node->space)) {
+
+			/* We don't need to keep track of unflushed
+			changes as user has explicitly disabled
+			buffering. */
+			ut_ad(!node->space->is_in_unflushed_spaces);
+			node->flush_counter = node->modification_counter;
 
-			node->space->is_in_unflushed_spaces = TRUE;
+		} else if (!node->space->is_in_unflushed_spaces) {
+
+			node->space->is_in_unflushed_spaces = true;
 			UT_LIST_ADD_FIRST(unflushed_spaces,
 					  system->unflushed_spaces,
 					  node->space);
@@ -4399,7 +5143,7 @@ Reads or writes data. This operation is asynchronous (aio).
 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
 i/o on a tablespace which does not exist */
 UNIV_INTERN
-ulint
+dberr_t
 fil_io(
 /*===*/
 	ulint	type,		/*!< in: OS_FILE_READ or OS_FILE_WRITE,
@@ -4462,9 +5206,11 @@ fil_io(
 #ifndef UNIV_HOTBACKUP
 # ifndef UNIV_LOG_DEBUG
 	/* ibuf bitmap pages must be read in the sync aio mode: */
-	ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
+	ut_ad(recv_no_ibuf_operations
+	      || type == OS_FILE_WRITE
 	      || !ibuf_bitmap_page(zip_size, block_offset)
-	      || sync || is_log);
+	      || sync
+	      || is_log);
 # endif /* UNIV_LOG_DEBUG */
 	if (sync) {
 		mode = OS_AIO_SYNC;
@@ -4483,9 +5229,10 @@ fil_io(
 #endif /* !UNIV_HOTBACKUP */
 
 	if (type == OS_FILE_READ) {
-		srv_data_read+= len;
+		srv_stats.data_read.add(len);
 	} else if (type == OS_FILE_WRITE) {
-		srv_data_written+= len;
+		ut_ad(!srv_read_only_mode);
+		srv_stats.data_written.add(len);
 	}
 
 	/* Reserve the fil_system mutex and make sure that we can open at
@@ -4497,48 +5244,43 @@ fil_io(
 
 	/* If we are deleting a tablespace we don't allow any read
 	operations on that. However, we do allow write operations. */
-	if (!space || (type == OS_FILE_READ && space->stop_new_ops)) {
+	if (space == 0 || (type == OS_FILE_READ && space->stop_new_ops)) {
 		mutex_exit(&fil_system->mutex);
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Error: trying to do i/o"
-			" to a tablespace which does not exist.\n"
-			"InnoDB: i/o type %lu, space id %lu,"
-			" page no. %lu, i/o length %lu bytes\n",
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Trying to do i/o to a tablespace which does "
+			"not exist. i/o type %lu, space id %lu, "
+			"page no. %lu, i/o length %lu bytes",
 			(ulong) type, (ulong) space_id, (ulong) block_offset,
 			(ulong) len);
 
 		return(DB_TABLESPACE_DELETED);
 	}
 
-	ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));
+	ut_ad(mode != OS_AIO_IBUF || space->purpose == FIL_TABLESPACE);
 
 	node = UT_LIST_GET_FIRST(space->chain);
 
 	for (;;) {
-		if (UNIV_UNLIKELY(node == NULL)) {
+		if (node == NULL) {
 			if (ignore_nonexistent_pages) {
 				mutex_exit(&fil_system->mutex);
 				return(DB_ERROR);
 			}
-			/* else */
 
 			fil_report_invalid_page_access(
 				block_offset, space_id, space->name,
 				byte_offset, len, type);
 
 			ut_error;
-		}
 
-		if (fil_is_user_tablespace_id(space->id) && node->size == 0) {
+		} else  if (fil_is_user_tablespace_id(space->id)
+			   && node->size == 0) {
+
 			/* We do not know the size of a single-table tablespace
 			before we open the file */
-
 			break;
-		}
-
-		if (node->size > block_offset) {
+		} else if (node->size > block_offset) {
 			/* Found! */
 			break;
 		} else {
@@ -4600,6 +5342,7 @@ fil_io(
 	if (type == OS_FILE_READ) {
 		ret = os_file_read(node->handle, buf, offset, len);
 	} else {
+		ut_ad(!srv_read_only_mode);
 		ret = os_file_write(node->name, node->handle, buf,
 				    offset, len);
 	}
@@ -4607,7 +5350,7 @@ fil_io(
 	/* Queue the aio request */
 	ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
 		     offset, len, node, message);
-#endif
+#endif /* UNIV_HOTBACKUP */
 	ut_a(ret);
 
 	if (mode == OS_AIO_SYNC) {
@@ -4649,24 +5392,24 @@ fil_aio_wait(
 	if (srv_use_native_aio) {
 		srv_set_io_thread_op_info(segment, "native aio handle");
 #ifdef WIN_ASYNC_IO
-		ret = os_aio_windows_handle(segment, 0, &fil_node,
-					    &message, &type);
+		ret = os_aio_windows_handle(
+			segment, 0, &fil_node, &message, &type);
 #elif defined(LINUX_NATIVE_AIO)
-		ret = os_aio_linux_handle(segment, &fil_node,
-					  &message, &type);
+		ret = os_aio_linux_handle(
+			segment, &fil_node, &message, &type);
 #else
 		ut_error;
 		ret = 0; /* Eliminate compiler warning */
-#endif
+#endif /* WIN_ASYNC_IO */
 	} else {
 		srv_set_io_thread_op_info(segment, "simulated aio handle");
 
-		ret = os_aio_simulated_handle(segment, &fil_node,
-					      &message, &type);
+		ret = os_aio_simulated_handle(
+			segment, &fil_node, &message, &type);
 	}
 
 	ut_a(ret);
-	if (UNIV_UNLIKELY(fil_node == NULL)) {
+	if (fil_node == NULL) {
 		ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
 		return;
 	}
@@ -4722,6 +5465,28 @@ fil_flush(
 		return;
 	}
 
+	if (fil_buffering_disabled(space)) {
+
+		/* No need to flush. User has explicitly disabled
+		buffering. */
+		ut_ad(!space->is_in_unflushed_spaces);
+		ut_ad(fil_space_is_flushed(space));
+		ut_ad(space->n_pending_flushes == 0);
+
+#ifdef UNIV_DEBUG
+		for (node = UT_LIST_GET_FIRST(space->chain);
+		     node != NULL;
+		     node = UT_LIST_GET_NEXT(chain, node)) {
+			ut_ad(node->modification_counter
+			      == node->flush_counter);
+			ut_ad(node->n_pending_flushes == 0);
+		}
+#endif /* UNIV_DEBUG */
+
+		mutex_exit(&fil_system->mutex);
+		return;
+	}
+
 	space->n_pending_flushes++;	/*!< prevent dropping of the space while
 					we are flushing */
 	node = UT_LIST_GET_FIRST(space->chain);
@@ -4745,7 +5510,7 @@ fil_flush(
 
 				goto skip_flush;
 			}
-#endif
+#endif /* __WIN__ */
 retry:
 			if (node->n_pending_flushes > 0) {
 				/* We want to avoid calling os_file_flush() on
@@ -4788,7 +5553,7 @@ skip_flush:
 				if (space->is_in_unflushed_spaces
 				    && fil_space_is_flushed(space)) {
 
-					space->is_in_unflushed_spaces = FALSE;
+					space->is_in_unflushed_spaces = false;
 
 					UT_LIST_REMOVE(
 						unflushed_spaces,
@@ -5025,3 +5790,401 @@ fil_close(void)
 
 	fil_system = NULL;
 }
+
+/********************************************************************//**
+Initializes a buffer control block when the buf_pool is created. */
+static
+void
+fil_buf_block_init(
+/*===============*/
+	buf_block_t*	block,		/*!< in: pointer to control block */
+	byte*		frame)		/*!< in: pointer to buffer frame */
+{
+	UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
+
+	block->frame = frame;
+
+	block->page.io_fix = BUF_IO_NONE;
+	/* There are assertions that check for this. */
+	block->page.buf_fix_count = 1;
+	block->page.state = BUF_BLOCK_READY_FOR_USE;
+
+	page_zip_des_init(&block->page.zip);
+}
+
+struct fil_iterator_t {
+	os_file_t	file;			/*!< File handle */
+	const char*	filepath;		/*!< File path name */
+	os_offset_t	start;			/*!< From where to start */
+	os_offset_t	end;			/*!< Where to stop */
+	os_offset_t	file_size;		/*!< File size in bytes */
+	ulint		page_size;		/*!< Page size */
+	ulint		n_io_buffers;		/*!< Number of pages to use
+						for IO */
+	byte*		io_buffer;		/*!< Buffer to use for IO */
+};
+
+/********************************************************************//**
+TODO: This can be made parallel trivially by chunking up the file and creating
+a callback per thread. . Main benefit will be to use multiple CPUs for
+checksums and compressed tables. We have to do compressed tables block by
+block right now. Secondly we need to decompress/compress and copy too much
+of data. These are CPU intensive.
+
+Iterate over all the pages in the tablespace.
+@param iter - Tablespace iterator
+@param block - block to use for IO
+@param callback - Callback to inspect and update page contents
+@retval DB_SUCCESS or error code */
+static
+dberr_t
+fil_iterate(
+/*========*/
+	const fil_iterator_t&	iter,
+	buf_block_t*		block,
+	PageCallback&		callback)
+{
+	os_offset_t		offset;
+	ulint			page_no = 0;
+	ulint			space_id = callback.get_space_id();
+	ulint			n_bytes = iter.n_io_buffers * iter.page_size;
+
+	ut_ad(!srv_read_only_mode);
+
+	/* TODO: For compressed tables we do a lot of useless
+	copying for non-index pages. Unfortunately, it is
+	required by buf_zip_decompress() */
+
+	for (offset = iter.start; offset < iter.end; offset += n_bytes) {
+
+		byte*		io_buffer = iter.io_buffer;
+
+		block->frame = io_buffer;
+
+		if (callback.get_zip_size() > 0) {
+			page_zip_des_init(&block->page.zip);
+			page_zip_set_size(&block->page.zip, iter.page_size);
+			block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
+			ut_d(block->page.zip.m_external = true);
+			ut_ad(iter.page_size == callback.get_zip_size());
+
+			/* Zip IO is done in the compressed page buffer. */
+			io_buffer = block->page.zip.data;
+		} else {
+			io_buffer = iter.io_buffer;
+		}
+
+		/* We have to read the exact number of bytes. Otherwise the
+		InnoDB IO functions croak on failed reads. */
+
+		n_bytes = static_cast<ulint>(
+			ut_min(static_cast<os_offset_t>(n_bytes),
+			       iter.end - offset));
+
+		ut_ad(n_bytes > 0);
+		ut_ad(!(n_bytes % iter.page_size));
+
+		if (!os_file_read(iter.file, io_buffer, offset,
+				  (ulint) n_bytes)) {
+
+			ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed");
+
+			return(DB_IO_ERROR);
+		}
+
+		bool		updated = false;
+		os_offset_t	page_off = offset;
+		ulint		n_pages_read = (ulint) n_bytes / iter.page_size;
+
+		for (ulint i = 0; i < n_pages_read; ++i) {
+
+			buf_block_set_file_page(block, space_id, page_no++);
+
+			dberr_t	err;
+
+			if ((err = callback(page_off, block)) != DB_SUCCESS) {
+
+				return(err);
+
+			} else if (!updated) {
+				updated = buf_block_get_state(block)
+					== BUF_BLOCK_FILE_PAGE;
+			}
+
+			buf_block_set_state(block, BUF_BLOCK_NOT_USED);
+			buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
+
+			page_off += iter.page_size;
+			block->frame += iter.page_size;
+		}
+
+		/* A page was updated in the set, write back to disk. */
+		if (updated
+		    && !os_file_write(
+				iter.filepath, iter.file, io_buffer,
+				offset, (ulint) n_bytes)) {
+
+			ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed");
+
+			return(DB_IO_ERROR);
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/********************************************************************//**
+Iterate over all the pages in the tablespace.
+@param table - the table definiton in the server
+@param n_io_buffers - number of blocks to read and write together
+@param callback - functor that will do the page updates
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_tablespace_iterate(
+/*===================*/
+	dict_table_t*	table,
+	ulint		n_io_buffers,
+	PageCallback&	callback)
+{
+	dberr_t		err;
+	os_file_t	file;
+	char*		filepath;
+
+	ut_a(n_io_buffers > 0);
+	ut_ad(!srv_read_only_mode);
+
+	DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
+			return(DB_CORRUPTION););
+
+	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+		dict_get_and_save_data_dir_path(table, false);
+		ut_a(table->data_dir_path);
+
+		filepath = os_file_make_remote_pathname(
+			table->data_dir_path, table->name, "ibd");
+	} else {
+		filepath = fil_make_ibd_name(table->name, false);
+	}
+
+	{
+		ibool	success;
+
+		file = os_file_create_simple_no_error_handling(
+			innodb_file_data_key, filepath,
+			OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
+
+		DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
+		{
+			static bool once;
+
+			if (!once || ut_rnd_interval(0, 10) == 5) {
+				once = true;
+				success = FALSE;
+				os_file_close(file);
+			}
+		});
+
+		if (!success) {
+			/* The following call prints an error message */
+			os_file_get_last_error(true);
+
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Trying to import a tablespace, but could not "
+				"open the tablespace file %s", filepath);
+
+			mem_free(filepath);
+
+			return(DB_TABLESPACE_NOT_FOUND);
+
+		} else {
+			err = DB_SUCCESS;
+		}
+	}
+
+	callback.set_file(filepath, file);
+
+	os_offset_t	file_size = os_file_get_size(file);
+	ut_a(file_size != (os_offset_t) -1);
+
+	/* The block we will use for every physical page */
+	buf_block_t	block;
+
+	memset(&block, 0x0, sizeof(block));
+
+	/* Allocate a page to read in the tablespace header, so that we
+	can determine the page size and zip_size (if it is compressed).
+	We allocate an extra page in case it is a compressed table. One
+	page is to ensure alignement. */
+
+	void*	page_ptr = mem_alloc(3 * UNIV_PAGE_SIZE);
+	byte*	page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
+
+	fil_buf_block_init(&block, page);
+
+	/* Read the first page and determine the page and zip size. */
+
+	if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE)) {
+
+		err = DB_IO_ERROR;
+
+	} else if ((err = callback.init(file_size, &block)) == DB_SUCCESS) {
+		fil_iterator_t	iter;
+
+		iter.file = file;
+		iter.start = 0;
+		iter.end = file_size;
+		iter.filepath = filepath;
+		iter.file_size = file_size;
+		iter.n_io_buffers = n_io_buffers;
+		iter.page_size = callback.get_page_size();
+
+		/* Compressed pages can't be optimised for block IO for now.
+		We do the IMPORT page by page. */
+
+		if (callback.get_zip_size() > 0) {
+			iter.n_io_buffers = 1;
+			ut_a(iter.page_size == callback.get_zip_size());
+		}
+
+		/** Add an extra page for compressed page scratch area. */
+
+		void*	io_buffer = mem_alloc(
+			(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
+
+		iter.io_buffer = static_cast<byte*>(
+			ut_align(io_buffer, UNIV_PAGE_SIZE));
+
+		err = fil_iterate(iter, &block, callback);
+
+		mem_free(io_buffer);
+	}
+
+	if (err == DB_SUCCESS) {
+
+		ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk");
+
+		if (!os_file_flush(file)) {
+			ib_logf(IB_LOG_LEVEL_INFO, "os_file_flush() failed!");
+			err = DB_IO_ERROR;
+		} else {
+			ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk - done!");
+		}
+	}
+
+	os_file_close(file);
+
+	mem_free(page_ptr);
+	mem_free(filepath);
+
+	return(err);
+}
+
+/**
+Set the tablespace compressed table size.
+@return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
+dberr_t
+PageCallback::set_zip_size(const buf_frame_t* page) UNIV_NOTHROW
+{
+	m_zip_size = fsp_header_get_zip_size(page);
+
+	if (!ut_is_2pow(m_zip_size) || m_zip_size > UNIV_ZIP_SIZE_MAX) {
+		return(DB_CORRUPTION);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/********************************************************************//**
+Delete the tablespace file and any related files like .cfg.
+This should not be called for temporary tables. */
+UNIV_INTERN
+void
+fil_delete_file(
+/*============*/
+	const char*	ibd_name)	/*!< in: filepath of the ibd
+					tablespace */
+{
+	/* Force a delete of any stale .ibd files that are lying around. */
+
+	ib_logf(IB_LOG_LEVEL_INFO, "Deleting %s", ibd_name);
+
+	os_file_delete_if_exists(ibd_name);
+
+	char*	cfg_name = fil_make_cfg_name(ibd_name);
+
+	os_file_delete_if_exists(cfg_name);
+
+	mem_free(cfg_name);
+}
+
+/**
+Iterate over all the spaces in the space list and fetch the
+tablespace names. It will return a copy of the name that must be
+freed by the caller using: delete[].
+@return DB_SUCCESS if all OK. */
+UNIV_INTERN
+dberr_t
+fil_get_space_names(
+/*================*/
+	space_name_list_t&	space_name_list)
+				/*!< in/out: List to append to */
+{
+	fil_space_t*	space;
+	dberr_t		err = DB_SUCCESS;
+
+	mutex_enter(&fil_system->mutex);
+
+	for (space = UT_LIST_GET_FIRST(fil_system->space_list);
+	     space != NULL;
+	     space = UT_LIST_GET_NEXT(space_list, space)) {
+
+		if (space->purpose == FIL_TABLESPACE) {
+			ulint	len;
+			char*	name;
+
+			len = strlen(space->name);
+			name = new(std::nothrow) char[len + 1];
+
+			if (name == 0) {
+				/* Caller to free elements allocated so far. */
+				err = DB_OUT_OF_MEMORY;
+				break;
+			}
+
+			memcpy(name, space->name, len);
+			name[len] = 0;
+
+			space_name_list.push_back(name);
+		}
+	}
+
+	mutex_exit(&fil_system->mutex);
+
+	return(err);
+}
+
+/****************************************************************//**
+Generate redo logs for swapping two .ibd files */
+UNIV_INTERN
+void
+fil_mtr_rename_log(
+/*===============*/
+	ulint		old_space_id,	/*!< in: tablespace id of the old
+					table. */
+	const char*	old_name,	/*!< in: old table name */
+	ulint		new_space_id,	/*!< in: tablespace id of the new
+					table */
+	const char*	new_name,	/*!< in: new table name */
+	const char*	tmp_name)	/*!< in: temp table name used while
+					swapping */
+{
+	mtr_t           mtr;
+	mtr_start(&mtr);
+	fil_op_write_log(MLOG_FILE_RENAME, old_space_id,
+			 0, 0, old_name, tmp_name, &mtr);
+	fil_op_write_log(MLOG_FILE_RENAME, new_space_id,
+			 0, 0, new_name, old_name, &mtr);
+	mtr_commit(&mtr);
+}
+
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index 398dd24afed..dc843a89fb9 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -93,15 +93,13 @@ fseg_n_reserved_pages_low(
 /********************************************************************//**
 Marks a page used. The page must reside within the extents of the given
 segment. */
-static
+static __attribute__((nonnull))
 void
 fseg_mark_page_used(
 /*================*/
 	fseg_inode_t*	seg_inode,/*!< in: segment inode */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
 	ulint		page,	/*!< in: page offset */
+	xdes_t*		descr,  /*!< in: extent descriptor */
 	mtr_t*		mtr);	/*!< in/out: mini-transaction */
 /**********************************************************************//**
 Returns the first extent descriptor for a segment. We think of the extent
@@ -214,30 +212,18 @@ Gets a descriptor bit of a page.
 @return	TRUE if free */
 UNIV_INLINE
 ibool
-xdes_get_bit(
-/*=========*/
+xdes_mtr_get_bit(
+/*=============*/
 	const xdes_t*	descr,	/*!< in: descriptor */
 	ulint		bit,	/*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
 	ulint		offset,	/*!< in: page offset within extent:
 				0 ... FSP_EXTENT_SIZE - 1 */
-	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	mtr_t*		mtr)	/*!< in: mini-transaction */
 {
-	ulint	index;
-	ulint	byte_index;
-	ulint	bit_index;
-
+	ut_ad(mtr->state == MTR_ACTIVE);
 	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
-	ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
-	ut_ad(offset < FSP_EXTENT_SIZE);
 
-	index = bit + XDES_BITS_PER_PAGE * offset;
-
-	byte_index = index / 8;
-	bit_index = index % 8;
-
-	return(ut_bit_get_nth(mtr_read_ulint(descr + XDES_BITMAP + byte_index,
-					     MLOG_1BYTE, mtr),
-			      bit_index));
+	return(xdes_get_bit(descr, bit, offset));
 }
 
 /**********************************************************************//**
@@ -287,7 +273,8 @@ xdes_find_bit(
 	xdes_t*	descr,	/*!< in: descriptor */
 	ulint	bit,	/*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
 	ibool	val,	/*!< in: desired bit value */
-	ulint	hint,	/*!< in: hint of which bit position would be desirable */
+	ulint	hint,	/*!< in: hint of which bit position would
+			be desirable */
 	mtr_t*	mtr)	/*!< in/out: mini-transaction */
 {
 	ulint	i;
@@ -297,14 +284,14 @@ xdes_find_bit(
 	ut_ad(hint < FSP_EXTENT_SIZE);
 	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
 	for (i = hint; i < FSP_EXTENT_SIZE; i++) {
-		if (val == xdes_get_bit(descr, bit, i, mtr)) {
+		if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) {
 
 			return(i);
 		}
 	}
 
 	for (i = 0; i < hint; i++) {
-		if (val == xdes_get_bit(descr, bit, i, mtr)) {
+		if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) {
 
 			return(i);
 		}
@@ -324,7 +311,8 @@ xdes_find_bit_downward(
 	xdes_t*	descr,	/*!< in: descriptor */
 	ulint	bit,	/*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
 	ibool	val,	/*!< in: desired bit value */
-	ulint	hint,	/*!< in: hint of which bit position would be desirable */
+	ulint	hint,	/*!< in: hint of which bit position would
+			be desirable */
 	mtr_t*	mtr)	/*!< in/out: mini-transaction */
 {
 	ulint	i;
@@ -334,14 +322,14 @@ xdes_find_bit_downward(
 	ut_ad(hint < FSP_EXTENT_SIZE);
 	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
 	for (i = hint + 1; i > 0; i--) {
-		if (val == xdes_get_bit(descr, bit, i - 1, mtr)) {
+		if (val == xdes_mtr_get_bit(descr, bit, i - 1, mtr)) {
 
 			return(i - 1);
 		}
 	}
 
 	for (i = FSP_EXTENT_SIZE - 1; i > hint; i--) {
-		if (val == xdes_get_bit(descr, bit, i, mtr)) {
+		if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) {
 
 			return(i);
 		}
@@ -360,13 +348,12 @@ xdes_get_n_used(
 	const xdes_t*	descr,	/*!< in: descriptor */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
-	ulint	i;
 	ulint	count	= 0;
 
 	ut_ad(descr && mtr);
 	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
-	for (i = 0; i < FSP_EXTENT_SIZE; i++) {
-		if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
+	for (ulint i = 0; i < FSP_EXTENT_SIZE; ++i) {
+		if (FALSE == xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
 			count++;
 		}
 	}
@@ -471,76 +458,11 @@ xdes_init(
 }
 
 /********************************************************************//**
-Calculates the page where the descriptor of a page resides.
-@return	descriptor page offset */
-UNIV_INLINE
-ulint
-xdes_calc_descriptor_page(
-/*======================*/
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	offset)		/*!< in: page offset */
-{
-#ifndef DOXYGEN /* Doxygen gets confused of these */
-# if UNIV_PAGE_SIZE_MAX <= XDES_ARR_OFFSET				\
-			   + (UNIV_PAGE_SIZE_MAX / FSP_EXTENT_SIZE_MAX)	\
-			   * XDES_SIZE_MAX
-#  error
-# endif
-# if UNIV_ZIP_SIZE_MIN <= XDES_ARR_OFFSET				\
-			  + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE_MIN)	\
-			  * XDES_SIZE_MIN
-#  error
-# endif
-#endif /* !DOXYGEN */
-
-	ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET
-	      + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE)
-	      * XDES_SIZE);
-	ut_ad(UNIV_ZIP_SIZE_MIN > XDES_ARR_OFFSET
-	      + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE)
-	      * XDES_SIZE);
-
-	ut_ad(ut_is_2pow(zip_size));
-
-	if (!zip_size) {
-		return(ut_2pow_round(offset, UNIV_PAGE_SIZE));
-	} else {
-		ut_ad(zip_size > XDES_ARR_OFFSET
-		      + (zip_size / FSP_EXTENT_SIZE) * XDES_SIZE);
-		return(ut_2pow_round(offset, zip_size));
-	}
-}
-
-/********************************************************************//**
-Calculates the descriptor index within a descriptor page.
-@return	descriptor index */
-UNIV_INLINE
-ulint
-xdes_calc_descriptor_index(
-/*=======================*/
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	offset)		/*!< in: page offset */
-{
-	ut_ad(ut_is_2pow(zip_size));
-
-	if (!zip_size) {
-		return(ut_2pow_remainder(offset, UNIV_PAGE_SIZE)
-		       / FSP_EXTENT_SIZE);
-	} else {
-		return(ut_2pow_remainder(offset, zip_size) / FSP_EXTENT_SIZE);
-	}
-}
-
-/********************************************************************//**
 Gets pointer to a the extent descriptor of a page. The page where the extent
-descriptor resides is x-locked. If the page offset is equal to the free limit
-of the space, adds new extents from above the free limit to the space free
-list, if not free limit == space size. This adding is necessary to make the
-descriptor defined, as they are uninitialized above the free limit.
+descriptor resides is x-locked. This function no longer extends the data
+file.
 @return pointer to the extent descriptor, NULL if the page does not
-exist in the space or if the offset exceeds the free limit */
+exist in the space or if the offset is >= the free limit */
 UNIV_INLINE __attribute__((nonnull, warn_unused_result))
 xdes_t*
 xdes_get_descriptor_with_space_hdr(
@@ -570,19 +492,10 @@ xdes_get_descriptor_with_space_hdr(
 	zip_size = fsp_flags_get_zip_size(
 		mach_read_from_4(sp_header + FSP_SPACE_FLAGS));
 
-	/* If offset is >= size or > limit, return NULL */
-
-	if ((offset >= size) || (offset > limit)) {
-
+	if ((offset >= size) || (offset >= limit)) {
 		return(NULL);
 	}
 
-	/* If offset is == limit, fill free list of the space. */
-
-	if (offset == limit) {
-		fsp_fill_free_list(FALSE, space, sp_header, mtr);
-	}
-
 	descr_page_no = xdes_calc_descriptor_page(zip_size, offset);
 
 	if (descr_page_no == 0) {
@@ -668,7 +581,7 @@ UNIV_INLINE
 ulint
 xdes_get_offset(
 /*============*/
-	xdes_t*	descr)	/*!< in: extent descriptor */
+	const xdes_t*	descr)	/*!< in: extent descriptor */
 {
 	ut_ad(descr);
 
@@ -784,7 +697,7 @@ fsp_header_init_fields(
 	ulint	space_id,	/*!< in: space id */
 	ulint	flags)		/*!< in: tablespace flags (FSP_SPACE_FLAGS) */
 {
-	fsp_flags_validate(flags);
+	ut_a(fsp_flags_is_valid(flags));
 
 	mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page,
 			space_id);
@@ -872,11 +785,13 @@ fsp_header_get_space_id(
 
 	id = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
+	DBUG_EXECUTE_IF("fsp_header_get_space_id_failure",
+			id = ULINT_UNDEFINED;);
+
 	if (id != fsp_id) {
-		fprintf(stderr,
-			"InnoDB: Error: space id in fsp header %lu,"
-			" but in the page header %lu\n",
-			(ulong) fsp_id, (ulong) id);
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Space id in fsp header %lu,but in the page header "
+			"%lu", fsp_id, id);
 
 		return(ULINT_UNDEFINED);
 	}
@@ -1348,7 +1263,7 @@ fsp_alloc_from_free_frag(
 	ulint		frag_n_used;
 
 	ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
-	ut_a(xdes_get_bit(descr, XDES_FREE_BIT, bit, mtr));
+	ut_a(xdes_mtr_get_bit(descr, XDES_FREE_BIT, bit, mtr));
 	xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr);
 
 	/* Update the FRAG_N_USED field */
@@ -1583,7 +1498,9 @@ fsp_free_page(
 		ut_error;
 	}
 
-	if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) {
+	if (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
+			     page % FSP_EXTENT_SIZE, mtr)) {
+
 		fprintf(stderr,
 			"InnoDB: Error: File space extent descriptor"
 			" of page %lu says it is free\n"
@@ -1728,16 +1645,15 @@ fsp_seg_inode_page_find_free(
 	ulint	zip_size,/*!< in: compressed page size, or 0 */
 	mtr_t*	mtr)	/*!< in/out: mini-transaction */
 {
-	fseg_inode_t*	inode;
-
 	for (; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
 
+		fseg_inode_t*	inode;
+
 		inode = fsp_seg_inode_page_get_nth_inode(
 			page, i, zip_size, mtr);
 
 		if (!mach_read_from_8(inode + FSEG_ID)) {
 			/* This is unused */
-
 			return(i);
 		}
 
@@ -1763,11 +1679,11 @@ fsp_alloc_seg_inode_page(
 	page_t*		page;
 	ulint		space;
 	ulint		zip_size;
-	ulint		i;
 
 	ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
 
 	space = page_get_space_id(page_align(space_header));
+
 	zip_size = fsp_flags_get_zip_size(
 		mach_read_from_4(FSP_SPACE_FLAGS + space_header));
 
@@ -1788,16 +1704,18 @@ fsp_alloc_seg_inode_page(
 	mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_INODE,
 			 MLOG_2BYTES, mtr);
 
-	for (i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
+	for (ulint i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
 
-		inode = fsp_seg_inode_page_get_nth_inode(page, i,
-							 zip_size, mtr);
+		inode = fsp_seg_inode_page_get_nth_inode(
+			page, i, zip_size, mtr);
 
 		mlog_write_ull(inode + FSEG_ID, 0, mtr);
 	}
 
-	flst_add_last(space_header + FSP_SEG_INODES_FREE,
-		      page + FSEG_INODE_PAGE_NODE, mtr);
+	flst_add_last(
+		space_header + FSP_SEG_INODES_FREE,
+		page + FSEG_INODE_PAGE_NODE, mtr);
+
 	return(TRUE);
 }
 
@@ -2486,8 +2404,8 @@ fseg_alloc_free_page_low(
 	/*-------------------------------------------------------------*/
 	if ((xdes_get_state(descr, mtr) == XDES_FSEG)
 	    && mach_read_from_8(descr + XDES_ID) == seg_id
-	    && (xdes_get_bit(descr, XDES_FREE_BIT,
-			     hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
+	    && (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
+				 hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
 take_hinted_page:
 		/* 1. We can take the hinted page
 		=================================*/
@@ -2652,10 +2570,12 @@ got_hinted_page:
 
 		ut_ad(xdes_get_descriptor(space, zip_size, ret_page, mtr)
 		      == ret_descr);
-		ut_ad(xdes_get_bit(ret_descr, XDES_FREE_BIT,
-				   ret_page % FSP_EXTENT_SIZE, mtr) == TRUE);
 
-		fseg_mark_page_used(seg_inode, space, zip_size, ret_page, mtr);
+		ut_ad(xdes_mtr_get_bit(
+				ret_descr, XDES_FREE_BIT,
+				ret_page % FSP_EXTENT_SIZE, mtr));
+
+		fseg_mark_page_used(seg_inode, ret_page, ret_descr, mtr);
 	}
 
 	return(fsp_page_create(
@@ -3053,27 +2973,21 @@ fsp_get_available_space_in_free_extents(
 /********************************************************************//**
 Marks a page used. The page must reside within the extents of the given
 segment. */
-static
+static __attribute__((nonnull))
 void
 fseg_mark_page_used(
 /*================*/
 	fseg_inode_t*	seg_inode,/*!< in: segment inode */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
 	ulint		page,	/*!< in: page offset */
+	xdes_t*		descr,  /*!< in: extent descriptor */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
-	xdes_t*	descr;
 	ulint	not_full_n_used;
 
-	ut_ad(seg_inode && mtr);
 	ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
 	ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
 	      == FSEG_MAGIC_N_VALUE);
 
-	descr = xdes_get_descriptor(space, zip_size, page, mtr);
-
 	ut_ad(mtr_read_ulint(seg_inode + FSEG_ID, MLOG_4BYTES, mtr)
 	      == mtr_read_ulint(descr + XDES_ID, MLOG_4BYTES, mtr));
 
@@ -3086,8 +3000,9 @@ fseg_mark_page_used(
 			      descr + XDES_FLST_NODE, mtr);
 	}
 
-	ut_ad(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
-	      == TRUE);
+	ut_ad(xdes_mtr_get_bit(
+			descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr));
+
 	/* We mark the page as used */
 	xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, FALSE, mtr);
 
@@ -3142,8 +3057,8 @@ fseg_free_page_low(
 
 	descr = xdes_get_descriptor(space, zip_size, page, mtr);
 
-	ut_a(descr);
-	if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) {
+	if (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
+			     page % FSP_EXTENT_SIZE, mtr)) {
 		fputs("InnoDB: Dump of the tablespace extent descriptor: ",
 		      stderr);
 		ut_print_buf(stderr, descr, 40);
@@ -3278,6 +3193,49 @@ fseg_free_page(
 }
 
 /**********************************************************************//**
+Checks if a single page of a segment is free.
+@return	true if free */
+UNIV_INTERN
+bool
+fseg_page_is_free(
+/*==============*/
+	fseg_header_t*	seg_header,	/*!< in: segment header */
+	ulint		space,		/*!< in: space id */
+	ulint		page)		/*!< in: page offset */
+{
+	mtr_t		mtr;
+	ibool		is_free;
+	ulint		flags;
+	rw_lock_t*	latch;
+	xdes_t*		descr;
+	ulint		zip_size;
+	fseg_inode_t*	seg_inode;
+
+	latch = fil_space_get_latch(space, &flags);
+	zip_size = dict_tf_get_zip_size(flags);
+
+	mtr_start(&mtr);
+	mtr_x_lock(latch, &mtr);
+
+	seg_inode = fseg_inode_get(seg_header, space, zip_size, &mtr);
+
+	ut_a(seg_inode);
+	ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
+	      == FSEG_MAGIC_N_VALUE);
+	ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
+
+	descr = xdes_get_descriptor(space, zip_size, page, &mtr);
+	ut_a(descr);
+
+	is_free = xdes_mtr_get_bit(
+		descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, &mtr);
+
+	mtr_commit(&mtr);
+
+	return(is_free);
+}
+
+/**********************************************************************//**
 Frees an extent of a segment to the space free list. */
 static
 void
@@ -3308,7 +3266,7 @@ fseg_free_extent(
 	first_page_in_extent = page - (page % FSP_EXTENT_SIZE);
 
 	for (i = 0; i < FSP_EXTENT_SIZE; i++) {
-		if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
+		if (!xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
 
 			/* Drop search system page hash index if the page is
 			found in the pool and is hashed */
@@ -3388,9 +3346,9 @@ fseg_free_step(
 	/* Check that the header resides on a page which has not been
 	freed yet */
 
-	ut_a(descr);
-	ut_a(xdes_get_bit(descr, XDES_FREE_BIT,
-			  header_page % FSP_EXTENT_SIZE, mtr) == FALSE);
+	ut_a(xdes_mtr_get_bit(descr, XDES_FREE_BIT,
+			      header_page % FSP_EXTENT_SIZE, mtr) == FALSE);
+
 	inode = fseg_inode_try_get(header, space, zip_size, mtr);
 
 	if (UNIV_UNLIKELY(inode == NULL)) {
diff --git a/storage/innobase/fts/fts0ast.cc b/storage/innobase/fts/fts0ast.cc
index c01c43a021f..972f5acf461 100644
--- a/storage/innobase/fts/fts0ast.cc
+++ b/storage/innobase/fts/fts0ast.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -98,9 +98,21 @@ fts_ast_create_node_text(
 	void*		arg,			/*!< in: ast state instance */
 	const char*	ptr)			/*!< in: ast text string */
 {
+	ulint		len = strlen(ptr);
+	fts_ast_node_t*	node = NULL;
+
+	ut_ad(len >= 2);
+
+	if (len == 2) {
+		ut_ad(ptr[0] == '\"');
+		ut_ad(ptr[1] == '\"');
+		return(NULL);
+	}
+
+	node = fts_ast_node_create();
+
 	/*!< We ignore the actual quotes "" */
-	ulint		len = strlen(ptr) - 2;
-	fts_ast_node_t*	node = fts_ast_node_create();
+	len -= 2;
 
 	node->type = FTS_AST_TEXT;
 	node->text.ptr = static_cast<byte*>(ut_malloc(len + 1));
@@ -381,34 +393,100 @@ fts_ast_node_print(
 }
 
 /******************************************************************//**
-Traverse the AST - in-order traversal.
+Traverse the AST - in-order traversal, except for the FTS_IGNORE
+nodes, which will be ignored in the first pass of each level, and
+visited in a second pass after all other nodes in the same level are visited.
 @return DB_SUCCESS if all went well */
 UNIV_INTERN
-ulint
+dberr_t
 fts_ast_visit(
 /*==========*/
 	fts_ast_oper_t		oper,		/*!< in: current operator */
 	fts_ast_node_t*		node,		/*!< in: current root node */
 	fts_ast_callback	visitor,	/*!< in: callback function */
-	void*			arg)		/*!< in: arg for callback */
+	void*			arg,		/*!< in: arg for callback */
+	bool*			has_ignore)	/*!< out: true, if the operator
+						was ignored during processing,
+						currently we only ignore
+						FTS_IGNORE operator */
 {
-	ulint			error = DB_SUCCESS;
+	dberr_t			error = DB_SUCCESS;
+	fts_ast_node_t*		oper_node = NULL;
+	fts_ast_node_t*		start_node;
+	bool			revisit = false;
+	bool			will_be_ignored = false;
+
+	start_node = node->list.head;
 
 	ut_a(node->type == FTS_AST_LIST
 	     || node->type == FTS_AST_SUBEXP_LIST);
 
+	/* In the first pass of the tree, at the leaf level of the
+	tree, FTS_IGNORE operation will be ignored. It will be
+	repeated at the level above the leaf level */
 	for (node = node->list.head;
-	     node && error == DB_SUCCESS;
+	     node && (error == DB_SUCCESS);
 	     node = node->next) {
 
 		if (node->type == FTS_AST_LIST) {
-			error = fts_ast_visit(oper, node, visitor, arg);
+			error = fts_ast_visit(oper, node, visitor,
+					      arg, &will_be_ignored);
+
+			/* If will_be_ignored is set to true, then
+			we encountered and ignored a FTS_IGNORE operator,
+			and a second pass is needed to process FTS_IGNORE
+			operator */
+			if (will_be_ignored) {
+				revisit = true;
+			}
 		} else if (node->type == FTS_AST_SUBEXP_LIST) {
 			error = fts_ast_visit_sub_exp(node, visitor, arg);
 		} else if (node->type == FTS_AST_OPER) {
 			oper = node->oper;
+			oper_node = node;
 		} else {
-			visitor(oper, node, arg);
+			if (node->visited) {
+				continue;
+			}
+
+			ut_a(oper == FTS_NONE || !oper_node
+			     || oper_node->oper == oper);
+
+			if (oper == FTS_IGNORE) {
+				*has_ignore = true;
+				/* Change the operator to FTS_IGNORE_SKIP,
+				so that it is processed in the second pass */
+				oper_node->oper = FTS_IGNORE_SKIP;
+				continue;
+			}
+
+			if (oper == FTS_IGNORE_SKIP) {
+				/* This must be the second pass, now we process
+				the FTS_IGNORE operator */
+				visitor(FTS_IGNORE, node, arg);
+			} else {
+				visitor(oper, node, arg);
+			}
+
+			node->visited = true;
+		}
+	}
+
+	/* Second pass to process the skipped FTS_IGNORE operation.
+	It is only performed at the level above leaf level */
+	if (revisit) {
+		for (node = start_node;
+		     node && error == DB_SUCCESS;
+		     node = node->next) {
+
+			if (node->type == FTS_AST_LIST) {
+				/* In this pass, it will process all those
+				operators ignored in the first pass, and those
+				whose operators are set to FTS_IGNORE_SKIP */
+				error = fts_ast_visit(
+					oper, node, visitor, arg,
+					&will_be_ignored);
+			}
 		}
 	}
 
diff --git a/storage/innobase/fts/fts0blex.cc b/storage/innobase/fts/fts0blex.cc
index b3350010db0..1abd737ec06 100644
--- a/storage/innobase/fts/fts0blex.cc
+++ b/storage/innobase/fts/fts0blex.cc
@@ -35,7 +35,7 @@
 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 
 /* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
+ * if you want the limit (max/min) macros for int types. 
  */
 #ifndef __STDC_LIMIT_MACROS
 #define __STDC_LIMIT_MACROS 1
@@ -247,7 +247,7 @@ struct yy_buffer_state
 
     int yy_bs_lineno; /**< The line count. */
     int yy_bs_column; /**< The column count. */
-
+ 
 	/* Whether to try to fill the input buffer when we reach the
 	 * end of it.
 	 */
@@ -305,9 +305,9 @@ YY_BUFFER_STATE fts0b_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner
 YY_BUFFER_STATE fts0b_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
 YY_BUFFER_STATE fts0b_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
 
-void *fts0balloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) );
-void *fts0brealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) );
-void fts0bfree (void * , yyscan_t yyscanner __attribute__((unused)) );
+void *fts0balloc (yy_size_t ,    yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
+void *fts0brealloc (void *,yy_size_t ,    yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
+void fts0bfree (void * ,    yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
 
 #define yy_new_buffer fts0b_create_buffer
 
@@ -347,7 +347,7 @@ typedef int yy_state_type;
 static yy_state_type yy_get_previous_state (yyscan_t yyscanner );
 static yy_state_type yy_try_NUL_trans (yy_state_type current_state  ,yyscan_t yyscanner);
 static int yy_get_next_buffer (yyscan_t yyscanner );
-static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) );
+static void yy_fatal_error (yyconst char msg[] ,    yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
 
 /* Done after the current pattern has been matched and before the
  * corresponding action - sets up yytext.
@@ -368,10 +368,10 @@ struct yy_trans_info
 	flex_int32_t yy_verify;
 	flex_int32_t yy_nxt;
 	};
-static yyconst flex_int16_t yy_accept[18] =
+static yyconst flex_int16_t yy_accept[19] =
     {   0,
-        4,    4,    8,    4,    1,    6,    1,    7,    2,    3,
-        4,    1,    1,    0,    5,    3,    0
+        4,    4,    8,    4,    1,    6,    1,    7,    7,    2,
+        3,    4,    1,    1,    0,    5,    3,    0
     } ;
 
 static yyconst flex_int32_t yy_ec[256] =
@@ -379,17 +379,17 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1,    1,    1,    1,    2,    3,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    4,    1,    5,    1,    1,    1,    1,    1,    6,
-        6,    6,    6,    1,    6,    1,    1,    7,    7,    7,
-        7,    7,    7,    7,    7,    7,    7,    1,    1,    6,
-        1,    6,    1,    6,    1,    1,    1,    1,    1,    1,
+        1,    4,    1,    5,    1,    1,    6,    1,    1,    7,
+        7,    7,    7,    1,    7,    1,    1,    8,    8,    8,
+        8,    8,    8,    8,    8,    8,    8,    1,    1,    7,
+        1,    7,    1,    7,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
 
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    6,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    7,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
@@ -406,35 +406,39 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1
     } ;
 
-static yyconst flex_int32_t yy_meta[8] =
+static yyconst flex_int32_t yy_meta[9] =
     {   0,
-        1,    2,    3,    4,    5,    5,    1
+        1,    2,    3,    4,    5,    5,    5,    1
     } ;
 
-static yyconst flex_int16_t yy_base[21] =
+static yyconst flex_int16_t yy_base[22] =
     {   0,
-        0,    0,   21,    0,    6,   22,    0,   13,   22,    7,
-        0,    0,    0,    4,   22,    0,   22,   10,   11,   15
+        0,    0,   22,    0,    7,   23,    0,   14,   23,   23,
+        7,    0,    0,    0,    5,   23,    0,   23,   11,   12,
+       16
     } ;
 
-static yyconst flex_int16_t yy_def[21] =
+static yyconst flex_int16_t yy_def[22] =
     {   0,
-       17,    1,   17,   18,   18,   17,   19,   20,   17,   18,
-       18,    5,   19,   20,   17,   10,    0,   17,   17,   17
+       18,    1,   18,   19,   19,   18,   20,   21,   18,   18,
+       19,   19,    5,   20,   21,   18,   11,    0,   18,   18,
+       18
     } ;
 
-static yyconst flex_int16_t yy_nxt[30] =
+static yyconst flex_int16_t yy_nxt[32] =
     {   0,
-        4,    5,    6,    7,    8,    9,   10,   12,   15,   13,
-       11,   11,   13,   16,   13,   14,   14,   15,   14,   14,
-       17,    3,   17,   17,   17,   17,   17,   17,   17
+        4,    5,    6,    7,    8,    9,   10,   11,   13,   16,
+       14,   12,   12,   14,   17,   14,   15,   15,   16,   15,
+       15,   18,    3,   18,   18,   18,   18,   18,   18,   18,
+       18
     } ;
 
-static yyconst flex_int16_t yy_chk[30] =
+static yyconst flex_int16_t yy_chk[32] =
     {   0,
-        1,    1,    1,    1,    1,    1,    1,    5,   14,    5,
-       18,   18,   19,   10,   19,   20,   20,    8,   20,   20,
-        3,   17,   17,   17,   17,   17,   17,   17,   17
+        1,    1,    1,    1,    1,    1,    1,    1,    5,   15,
+        5,   19,   19,   20,   11,   20,   21,   21,    8,   21,
+       21,    3,   18,   18,   18,   18,   18,   18,   18,   18,
+       18
     } ;
 
 /* The intent behind this definition is that it'll catch
@@ -477,7 +481,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
 #define YY_DECL int fts_blexer(YYSTYPE* val, yyscan_t yyscanner)
 
 #define YY_NO_INPUT 1
-#line 480 "fts0blex.cc"
+#line 484 "fts0blex.cc"
 
 #define INITIAL 0
 
@@ -575,11 +579,11 @@ extern int fts0bwrap (yyscan_t yyscanner );
 #endif
 
 #ifndef yytext_ptr
-static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)));
+static void yy_flex_strncpy (char *,yyconst char *,int ,    yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)));
 #endif
 
 #ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)));
+static int yy_flex_strlen (yyconst char * ,    yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)));
 #endif
 
 #ifndef YY_NO_INPUT
@@ -699,12 +703,12 @@ YY_DECL
 	register yy_state_type yy_current_state;
 	register char *yy_cp, *yy_bp;
 	register int yy_act;
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 #line 43 "fts0blex.l"
 
 
-#line 707 "fts0blex.cc"
+#line 711 "fts0blex.cc"
 
 	if ( !yyg->yy_init )
 		{
@@ -757,13 +761,13 @@ yy_match:
 			while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 				{
 				yy_current_state = (int) yy_def[yy_current_state];
-				if ( yy_current_state >= 18 )
+				if ( yy_current_state >= 19 )
 					yy_c = yy_meta[(unsigned int) yy_c];
 				}
 			yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
 			++yy_cp;
 			}
-		while ( yy_current_state != 17 );
+		while ( yy_current_state != 18 );
 		yy_cp = yyg->yy_last_accepting_cpos;
 		yy_current_state = yyg->yy_last_accepting_state;
 
@@ -835,7 +839,7 @@ YY_RULE_SETUP
 #line 73 "fts0blex.l"
 ECHO;
 	YY_BREAK
-#line 838 "fts0blex.cc"
+#line 842 "fts0blex.cc"
 case YY_STATE_EOF(INITIAL):
 	yyterminate();
 
@@ -978,7 +982,7 @@ case YY_STATE_EOF(INITIAL):
  */
 static int yy_get_next_buffer (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
 	register char *source = yyg->yytext_ptr;
 	register int number_to_move, i;
@@ -1044,9 +1048,9 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 				else
 					b->yy_buf_size *= 2;
 
-				b->yy_ch_buf = (char*)
+				b->yy_ch_buf = (char *)
 					/* Include room in for 2 EOB chars. */
-					fts0brealloc((void*) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner );
+					fts0brealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner );
 				}
 			else
 				/* Can't grow it, we don't own it. */
@@ -1095,7 +1099,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 	if ((yy_size_t) (yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
 		/* Extend the array by 50%, plus the number we really need. */
 		yy_size_t new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1);
-		YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char*) fts0brealloc((void*) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner );
+		YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) fts0brealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner );
 		if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
 			YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
 	}
@@ -1115,7 +1119,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 {
 	register yy_state_type yy_current_state;
 	register char *yy_cp;
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	yy_current_state = yyg->yy_start;
 
@@ -1130,7 +1134,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 		while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 			{
 			yy_current_state = (int) yy_def[yy_current_state];
-			if ( yy_current_state >= 18 )
+			if ( yy_current_state >= 19 )
 				yy_c = yy_meta[(unsigned int) yy_c];
 			}
 		yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
@@ -1147,7 +1151,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
     static yy_state_type yy_try_NUL_trans  (yy_state_type yy_current_state , yyscan_t yyscanner)
 {
 	register int yy_is_jam;
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner; /* This var may be unused depending upon options. */
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */
 	register char *yy_cp = yyg->yy_c_buf_p;
 
 	register YY_CHAR yy_c = 1;
@@ -1159,11 +1163,11 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 	while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 		{
 		yy_current_state = (int) yy_def[yy_current_state];
-		if ( yy_current_state >= 18 )
+		if ( yy_current_state >= 19 )
 			yy_c = yy_meta[(unsigned int) yy_c];
 		}
 	yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
-	yy_is_jam = (yy_current_state == 17);
+	yy_is_jam = (yy_current_state == 18);
 
 	return yy_is_jam ? 0 : yy_current_state;
 }
@@ -1177,7 +1181,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 
 {
 	int c;
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	*yyg->yy_c_buf_p = yyg->yy_hold_char;
 
@@ -1235,7 +1239,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 			}
 		}
 
-	c = *(unsigned char*) yyg->yy_c_buf_p;	/* cast for 8-bit char's */
+	c = *(unsigned char *) yyg->yy_c_buf_p;	/* cast for 8-bit char's */
 	*yyg->yy_c_buf_p = '\0';	/* preserve yytext */
 	yyg->yy_hold_char = *++yyg->yy_c_buf_p;
 
@@ -1250,7 +1254,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
  */
     void fts0brestart  (FILE * input_file , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	if ( ! YY_CURRENT_BUFFER ){
         fts0bensure_buffer_stack (yyscanner);
@@ -1268,7 +1272,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
  */
     void fts0b_switch_to_buffer  (YY_BUFFER_STATE  new_buffer , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	/* TODO. We should be able to replace this entire function body
 	 * with
@@ -1300,7 +1304,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 
 static void fts0b_load_buffer_state  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
 	yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
 	yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
@@ -1316,7 +1320,7 @@ static void fts0b_load_buffer_state  (yyscan_t yyscanner)
     YY_BUFFER_STATE fts0b_create_buffer  (FILE * file, int  size , yyscan_t yyscanner)
 {
 	YY_BUFFER_STATE b;
-
+    
 	b = (YY_BUFFER_STATE) fts0balloc(sizeof( struct yy_buffer_state ) ,yyscanner );
 	if ( ! b )
 		YY_FATAL_ERROR( "out of dynamic memory in fts0b_create_buffer()" );
@@ -1326,7 +1330,7 @@ static void fts0b_load_buffer_state  (yyscan_t yyscanner)
 	/* yy_ch_buf has to be 2 characters longer than the size given because
 	 * we need to put in 2 end-of-buffer characters.
 	 */
-	b->yy_ch_buf = (char*) fts0balloc(b->yy_buf_size + 2 ,yyscanner );
+	b->yy_ch_buf = (char *) fts0balloc(b->yy_buf_size + 2 ,yyscanner );
 	if ( ! b->yy_ch_buf )
 		YY_FATAL_ERROR( "out of dynamic memory in fts0b_create_buffer()" );
 
@@ -1343,7 +1347,7 @@ static void fts0b_load_buffer_state  (yyscan_t yyscanner)
  */
     void fts0b_delete_buffer (YY_BUFFER_STATE  b , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	if ( ! b )
 		return;
@@ -1352,9 +1356,9 @@ static void fts0b_load_buffer_state  (yyscan_t yyscanner)
 		YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
 
 	if ( b->yy_is_our_buffer )
-		fts0bfree((void*) b->yy_ch_buf ,yyscanner );
+		fts0bfree((void *) b->yy_ch_buf ,yyscanner );
 
-	fts0bfree((void*) b ,yyscanner );
+	fts0bfree((void *) b ,yyscanner );
 }
 
 /* Initializes or reinitializes a buffer.
@@ -1365,7 +1369,7 @@ static void fts0b_load_buffer_state  (yyscan_t yyscanner)
 
 {
 	int oerrno = errno;
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	fts0b_flush_buffer(b ,yyscanner);
 
@@ -1382,7 +1386,7 @@ static void fts0b_load_buffer_state  (yyscan_t yyscanner)
     }
 
         b->yy_is_interactive = 0;
-
+    
 	errno = oerrno;
 }
 
@@ -1392,7 +1396,7 @@ static void fts0b_load_buffer_state  (yyscan_t yyscanner)
  */
     void fts0b_flush_buffer (YY_BUFFER_STATE  b , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	if ( ! b )
 		return;
 
@@ -1422,7 +1426,7 @@ static void fts0b_load_buffer_state  (yyscan_t yyscanner)
  */
 void fts0bpush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	if (new_buffer == NULL)
 		return;
 
@@ -1453,7 +1457,7 @@ void fts0bpush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
  */
 void fts0bpop_buffer_state (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	if (!YY_CURRENT_BUFFER)
 		return;
 
@@ -1474,7 +1478,7 @@ void fts0bpop_buffer_state (yyscan_t yyscanner)
 static void fts0bensure_buffer_stack (yyscan_t yyscanner)
 {
 	int num_to_alloc;
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	if (!yyg->yy_buffer_stack) {
 
@@ -1483,14 +1487,14 @@ static void fts0bensure_buffer_stack (yyscan_t yyscanner)
 		 * immediate realloc on the next call.
          */
 		num_to_alloc = 1;
-		yyg->yy_buffer_stack = (struct yy_buffer_state**) fts0balloc
+		yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0balloc
 								(num_to_alloc * sizeof(struct yy_buffer_state*)
 								, yyscanner);
 		if ( ! yyg->yy_buffer_stack )
 			YY_FATAL_ERROR( "out of dynamic memory in fts0bensure_buffer_stack()" );
-
+								  
 		memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*));
-
+				
 		yyg->yy_buffer_stack_max = num_to_alloc;
 		yyg->yy_buffer_stack_top = 0;
 		return;
@@ -1502,7 +1506,7 @@ static void fts0bensure_buffer_stack (yyscan_t yyscanner)
 		int grow_size = 8 /* arbitrary grow size */;
 
 		num_to_alloc = yyg->yy_buffer_stack_max + grow_size;
-		yyg->yy_buffer_stack = (struct yy_buffer_state**) fts0brealloc
+		yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0brealloc
 								(yyg->yy_buffer_stack,
 								num_to_alloc * sizeof(struct yy_buffer_state*)
 								, yyscanner);
@@ -1519,12 +1523,12 @@ static void fts0bensure_buffer_stack (yyscan_t yyscanner)
  * @param base the character buffer
  * @param size the size in bytes of the character buffer
  * @param yyscanner The scanner object.
- * @return the newly allocated buffer state object.
+ * @return the newly allocated buffer state object. 
  */
 YY_BUFFER_STATE fts0b_scan_buffer  (char * base, yy_size_t  size , yyscan_t yyscanner)
 {
 	YY_BUFFER_STATE b;
-
+    
 	if ( size < 2 ||
 	     base[size-2] != YY_END_OF_BUFFER_CHAR ||
 	     base[size-1] != YY_END_OF_BUFFER_CHAR )
@@ -1560,7 +1564,7 @@ YY_BUFFER_STATE fts0b_scan_buffer  (char * base, yy_size_t  size , yyscan_t yysc
  */
 YY_BUFFER_STATE fts0b_scan_string (yyconst char * yystr , yyscan_t yyscanner)
 {
-
+    
 	return fts0b_scan_bytes(yystr,strlen(yystr) ,yyscanner);
 }
 
@@ -1577,10 +1581,10 @@ YY_BUFFER_STATE fts0b_scan_bytes  (yyconst char * yybytes, int  _yybytes_len , y
 	char *buf;
 	yy_size_t n;
 	int i;
-
+    
 	/* Get memory for full buffer, including space for trailing EOB's. */
 	n = _yybytes_len + 2;
-	buf = (char*) fts0balloc(n ,yyscanner );
+	buf = (char *) fts0balloc(n ,yyscanner );
 	if ( ! buf )
 		YY_FATAL_ERROR( "out of dynamic memory in fts0b_scan_bytes()" );
 
@@ -1605,7 +1609,7 @@ YY_BUFFER_STATE fts0b_scan_bytes  (yyconst char * yybytes, int  _yybytes_len , y
 #define YY_EXIT_FAILURE 2
 #endif
 
-static void yy_fatal_error (yyconst char* msg ,  yyscan_t yyscanner __attribute__((unused)))
+static void yy_fatal_error (yyconst char* msg ,     yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
 {
     	(void) fprintf( stderr, "%s\n", msg );
 	exit( YY_EXIT_FAILURE );
@@ -1635,7 +1639,7 @@ static void yy_fatal_error (yyconst char* msg ,  yyscan_t yyscanner __attribute_
  */
 YY_EXTRA_TYPE fts0bget_extra  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     return yyextra;
 }
 
@@ -1644,11 +1648,11 @@ YY_EXTRA_TYPE fts0bget_extra  (yyscan_t yyscanner)
  */
 int fts0bget_lineno  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
-
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+    
         if (! YY_CURRENT_BUFFER)
             return 0;
-
+    
     return yylineno;
 }
 
@@ -1657,11 +1661,11 @@ int fts0bget_lineno  (yyscan_t yyscanner)
  */
 int fts0bget_column  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
-
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+    
         if (! YY_CURRENT_BUFFER)
             return 0;
-
+    
     return yycolumn;
 }
 
@@ -1670,7 +1674,7 @@ int fts0bget_column  (yyscan_t yyscanner)
  */
 FILE *fts0bget_in  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     return yyin;
 }
 
@@ -1679,7 +1683,7 @@ FILE *fts0bget_in  (yyscan_t yyscanner)
  */
 FILE *fts0bget_out  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     return yyout;
 }
 
@@ -1688,7 +1692,7 @@ FILE *fts0bget_out  (yyscan_t yyscanner)
  */
 int fts0bget_leng  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     return yyleng;
 }
 
@@ -1698,7 +1702,7 @@ int fts0bget_leng  (yyscan_t yyscanner)
 
 char *fts0bget_text  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     return yytext;
 }
 
@@ -1708,7 +1712,7 @@ char *fts0bget_text  (yyscan_t yyscanner)
  */
 void fts0bset_extra (YY_EXTRA_TYPE  user_defined , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     yyextra = user_defined ;
 }
 
@@ -1718,12 +1722,12 @@ void fts0bset_extra (YY_EXTRA_TYPE  user_defined , yyscan_t yyscanner)
  */
 void fts0bset_lineno (int  line_number , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
         /* lineno is only valid if an input buffer exists. */
         if (! YY_CURRENT_BUFFER )
-           yy_fatal_error( "fts0bset_lineno called with no buffer" , yyscanner);
-
+           yy_fatal_error( "fts0bset_lineno called with no buffer" , yyscanner); 
+    
     yylineno = line_number;
 }
 
@@ -1733,12 +1737,12 @@ void fts0bset_lineno (int  line_number , yyscan_t yyscanner)
  */
 void fts0bset_column (int  column_no , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
         /* column is only valid if an input buffer exists. */
         if (! YY_CURRENT_BUFFER )
-           yy_fatal_error( "fts0bset_column called with no buffer" , yyscanner);
-
+           yy_fatal_error( "fts0bset_column called with no buffer" , yyscanner); 
+    
     yycolumn = column_no;
 }
 
@@ -1750,25 +1754,25 @@ void fts0bset_column (int  column_no , yyscan_t yyscanner)
  */
 void fts0bset_in (FILE *  in_str , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     yyin = in_str ;
 }
 
 void fts0bset_out (FILE *  out_str , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     yyout = out_str ;
 }
 
 int fts0bget_debug  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     return yy_flex_debug;
 }
 
 void fts0bset_debug (int  bdebug , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     yy_flex_debug = bdebug ;
 }
 
@@ -1821,26 +1825,26 @@ int fts0blex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals )
         errno = EINVAL;
         return 1;
     }
-
+	
     *ptr_yy_globals = (yyscan_t) fts0balloc ( sizeof( struct yyguts_t ), &dummy_yyguts );
-
+	
     if (*ptr_yy_globals == NULL){
         errno = ENOMEM;
         return 1;
     }
-
+    
     /* By setting to 0xAA, we expose bugs in
     yy_init_globals. Leave at 0x00 for releases. */
     memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
-
+    
     fts0bset_extra (yy_user_defined, *ptr_yy_globals);
-
+    
     return yy_init_globals ( *ptr_yy_globals );
 }
 
 static int yy_init_globals (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     /* Initialization is the same as for the non-reentrant scanner.
      * This function is called from fts0blex_destroy(), so don't allocate here.
      */
@@ -1848,7 +1852,7 @@ static int yy_init_globals (yyscan_t yyscanner)
     yyg->yy_buffer_stack = 0;
     yyg->yy_buffer_stack_top = 0;
     yyg->yy_buffer_stack_max = 0;
-    yyg->yy_c_buf_p = (char*) 0;
+    yyg->yy_c_buf_p = (char *) 0;
     yyg->yy_init = 0;
     yyg->yy_start = 0;
 
@@ -1861,8 +1865,8 @@ static int yy_init_globals (yyscan_t yyscanner)
     yyin = stdin;
     yyout = stdout;
 #else
-    yyin = (FILE*) 0;
-    yyout = (FILE*) 0;
+    yyin = (FILE *) 0;
+    yyout = (FILE *) 0;
 #endif
 
     /* For future reference: Set errno on error, since we are called by
@@ -1874,7 +1878,7 @@ static int yy_init_globals (yyscan_t yyscanner)
 /* fts0blex_destroy is for both reentrant and non-reentrant scanners. */
 int fts0blex_destroy  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
     /* Pop the buffer stack, destroying each element. */
 	while(YY_CURRENT_BUFFER){
@@ -1906,7 +1910,7 @@ int fts0blex_destroy  (yyscan_t yyscanner)
  */
 
 #ifndef yytext_ptr
-static void yy_flex_strncpy (char* s1, yyconst char * s2, int n ,  yyscan_t yyscanner __attribute__((unused)))
+static void yy_flex_strncpy (char* s1, yyconst char * s2, int n ,     yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
 {
 	register int i;
 	for ( i = 0; i < n; ++i )
@@ -1915,7 +1919,7 @@ static void yy_flex_strncpy (char* s1, yyconst char * s2, int n ,  yyscan_t yysc
 #endif
 
 #ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * s ,  yyscan_t yyscanner __attribute__((unused)))
+static int yy_flex_strlen (yyconst char * s ,     yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
 {
 	register int n;
 	for ( n = 0; s[n]; ++n )
@@ -1925,26 +1929,26 @@ static int yy_flex_strlen (yyconst char * s ,  yyscan_t yyscanner __attribute__(
 }
 #endif
 
-void *fts0balloc (yy_size_t  size ,  yyscan_t yyscanner __attribute__((unused)))
+void *fts0balloc (yy_size_t  size ,     yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
 {
-	return (void*) malloc( size );
+	return (void *) malloc( size );
 }
 
-void *fts0brealloc  (void * ptr, yy_size_t  size ,  yyscan_t yyscanner __attribute__((unused)))
+void *fts0brealloc  (void * ptr, yy_size_t  size ,     yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
 {
-	/* The cast to (char*) in the following accommodates both
+	/* The cast to (char *) in the following accommodates both
 	 * implementations that use char* generic pointers, and those
 	 * that use void* generic pointers.  It works with the latter
 	 * because both ANSI C and C++ allow castless assignment from
 	 * any pointer type to void*, and deal with argument conversions
 	 * as though doing an assignment.
 	 */
-	return (void*) realloc( (char*) ptr, size );
+	return (void *) realloc( (char *) ptr, size );
 }
 
-void fts0bfree (void * ptr ,  yyscan_t yyscanner __attribute__((unused)))
+void fts0bfree (void * ptr ,     yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
 {
-	free( (char*) ptr );	/* see fts0brealloc() for (char*) cast */
+	free( (char *) ptr );	/* see fts0brealloc() for (char *) cast */
 }
 
 #define YYTABLES_NAME "yytables"
diff --git a/storage/innobase/fts/fts0blex.l b/storage/innobase/fts/fts0blex.l
index b84b0cea294..6193f0df187 100644
--- a/storage/innobase/fts/fts0blex.l
+++ b/storage/innobase/fts/fts0blex.l
@@ -56,7 +56,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
 	return(FTS_NUMB);
 }
 
-[^" \n*()+\-<>~@]*		{
+[^" \n*()+\-<>~@%]*		{
 	val->token = strdup(fts0bget_text(yyscanner));
 
 	return(FTS_TERM);
diff --git a/storage/innobase/fts/fts0config.cc b/storage/innobase/fts/fts0config.cc
index 3f849ef183c..9cac680101c 100644
--- a/storage/innobase/fts/fts0config.cc
+++ b/storage/innobase/fts/fts0config.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -70,7 +70,7 @@ Get value from the config table. The caller must ensure that enough
 space is allocated for value to hold the column contents.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_get_value(
 /*=================*/
 	trx_t*		trx,			/*!< transaction */
@@ -83,7 +83,7 @@ fts_config_get_value(
 {
 	pars_info_t*	info;
 	que_t*		graph;
-	ulint		error;
+	dberr_t		error;
 	ulint		name_len = strlen(name);
 
 	info = pars_info_create();
@@ -162,7 +162,7 @@ must ensure that enough space is allocated for value to hold the
 column contents.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_get_index_value(
 /*=======================*/
 	trx_t*		trx,			/*!< transaction */
@@ -173,7 +173,7 @@ fts_config_get_index_value(
 						config table */
 {
 	char*		name;
-	ulint		error;
+	dberr_t		error;
 	fts_table_t	fts_table;
 
 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE,
@@ -193,7 +193,7 @@ fts_config_get_index_value(
 Set the value in the config table for name.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_set_value(
 /*=================*/
 	trx_t*		trx,			/*!< transaction */
@@ -206,7 +206,7 @@ fts_config_set_value(
 {
 	pars_info_t*	info;
 	que_t*		graph;
-	ulint		error;
+	dberr_t		error;
 	undo_no_t	undo_no;
 	undo_no_t	n_rows_updated;
 	ulint		name_len = strlen(name);
@@ -262,7 +262,7 @@ fts_config_set_value(
 Set the value specific to an FTS index in the config table.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_set_index_value(
 /*=======================*/
 	trx_t*		trx,			/*!< transaction */
@@ -273,7 +273,7 @@ fts_config_set_index_value(
 						config table */
 {
 	char*		name;
-	ulint		error;
+	dberr_t		error;
 	fts_table_t	fts_table;
 
 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE,
@@ -293,7 +293,7 @@ fts_config_set_index_value(
 Get an ulint value from the config table.
 @return DB_SUCCESS if all OK else error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_get_index_ulint(
 /*=======================*/
 	trx_t*		trx,			/*!< in: transaction */
@@ -301,7 +301,7 @@ fts_config_get_index_ulint(
 	const char*	name,			/*!< in: param name */
 	ulint*		int_value)		/*!< out: value */
 {
-	ulint		error;
+	dberr_t		error;
 	fts_string_t	value;
 
 	/* We set the length of value to the max bytes it can hold. This
@@ -314,8 +314,8 @@ fts_config_get_index_ulint(
 	if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
 		ut_print_timestamp(stderr);
 
-		fprintf(stderr, "  InnoDB: Error: (%lu) reading `%s'\n",
-			error, name);
+		fprintf(stderr, "  InnoDB: Error: (%s) reading `%s'\n",
+			ut_strerr(error), name);
 	} else {
 		*int_value = strtoul((char*) value.f_str, NULL, 10);
 	}
@@ -329,7 +329,7 @@ fts_config_get_index_ulint(
 Set an ulint value in the config table.
 @return DB_SUCCESS if all OK else error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_set_index_ulint(
 /*=======================*/
 	trx_t*		trx,			/*!< in: transaction */
@@ -337,7 +337,7 @@ fts_config_set_index_ulint(
 	const char*	name,			/*!< in: param name */
 	ulint		int_value)		/*!< in: value */
 {
-	ulint		error;
+	dberr_t		error;
 	fts_string_t	value;
 
 	/* We set the length of value to the max bytes it can hold. This
@@ -356,8 +356,8 @@ fts_config_set_index_ulint(
 	if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
 		ut_print_timestamp(stderr);
 
-		fprintf(stderr, "  InnoDB: Error: (%lu) writing `%s'\n",
-			error, name);
+		fprintf(stderr, "  InnoDB: Error: (%s) writing `%s'\n",
+			ut_strerr(error), name);
 	}
 
 	ut_free(value.f_str);
@@ -369,7 +369,7 @@ fts_config_set_index_ulint(
 Get an ulint value from the config table.
 @return DB_SUCCESS if all OK else error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_get_ulint(
 /*=================*/
 	trx_t*		trx,			/*!< in: transaction */
@@ -378,7 +378,7 @@ fts_config_get_ulint(
 	const char*	name,			/*!< in: param name */
 	ulint*		int_value)		/*!< out: value */
 {
-	ulint		error;
+	dberr_t		error;
 	fts_string_t	value;
 
 	/* We set the length of value to the max bytes it can hold. This
@@ -391,8 +391,8 @@ fts_config_get_ulint(
 	if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
 		ut_print_timestamp(stderr);
 
-		fprintf(stderr, "  InnoDB: Error: (%lu) reading `%s'\n",
-			error, name);
+		fprintf(stderr, "  InnoDB: Error: (%s) reading `%s'\n",
+			ut_strerr(error), name);
 	} else {
 		*int_value = strtoul((char*) value.f_str, NULL, 10);
 	}
@@ -406,7 +406,7 @@ fts_config_get_ulint(
 Set an ulint value in the config table.
 @return DB_SUCCESS if all OK else error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_set_ulint(
 /*=================*/
 	trx_t*		trx,			/*!< in: transaction */
@@ -415,7 +415,7 @@ fts_config_set_ulint(
 	const char*	name,			/*!< in: param name */
 	ulint		int_value)		/*!< in: value */
 {
-	ulint		error;
+	dberr_t		error;
 	fts_string_t	value;
 
 	/* We set the length of value to the max bytes it can hold. This
@@ -434,8 +434,8 @@ fts_config_set_ulint(
 	if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
 		ut_print_timestamp(stderr);
 
-		fprintf(stderr, "  InnoDB: Error: (%lu) writing `%s'\n",
-			error, name);
+		fprintf(stderr, "  InnoDB: Error: (%s) writing `%s'\n",
+			ut_strerr(error), name);
 	}
 
 	ut_free(value.f_str);
@@ -447,7 +447,7 @@ fts_config_set_ulint(
 Increment the value in the config table for column name.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_increment_value(
 /*=======================*/
 	trx_t*		trx,			/*!< transaction */
@@ -458,7 +458,7 @@ fts_config_increment_value(
 	ulint		delta)			/*!< in: increment by this
 						much */
 {
-	ulint		error;
+	dberr_t		error;
 	fts_string_t	value;
 	que_t*		graph = NULL;
 	ulint		name_len = strlen(name);
@@ -520,8 +520,8 @@ fts_config_increment_value(
 
 		ut_print_timestamp(stderr);
 
-		fprintf(stderr, "  InnoDB: Error: (%lu) "
-			"while incrementing %s.\n", error, name);
+		fprintf(stderr, "  InnoDB: Error: (%s) "
+			"while incrementing %s.\n", ut_strerr(error), name);
 	}
 
 	ut_free(value.f_str);
@@ -533,7 +533,7 @@ fts_config_increment_value(
 Increment the per index value in the config table for column name.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_increment_index_value(
 /*=============================*/
 	trx_t*		trx,			/*!< transaction */
@@ -544,7 +544,7 @@ fts_config_increment_index_value(
 						much */
 {
 	char*		name;
-	ulint		error;
+	dberr_t		error;
 	fts_table_t	fts_table;
 
 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE,
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
index f716b980501..a81d3043e9c 100644
--- a/storage/innobase/fts/fts0fts.cc
+++ b/storage/innobase/fts/fts0fts.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -36,12 +36,8 @@ Full Text Search interface
 #include "dict0priv.h"
 #include "dict0stats.h"
 #include "btr0pcur.h"
-#include "row0row.h"
-#include "ha_prototypes.h"
 
-#ifdef UNIV_NONINL
-#include "fts0priv.ic"
-#endif
+#include "ha_prototypes.h"
 
 #define FTS_MAX_ID_LEN	32
 
@@ -63,9 +59,6 @@ UNIV_INTERN ulong	fts_min_token_size;
 ib_time_t elapsed_time = 0;
 ulint n_nodes = 0;
 
-typedef struct fts_schema_struct fts_schema_t;
-typedef struct fts_sys_table_struct fts_sys_table_t;
-
 /** Error condition reported by fts_utf8_decode() */
 const ulint UTF8_ERROR = 0xFFFFFFFF;
 
@@ -142,7 +135,7 @@ const char *fts_default_stopword[] =
 };
 
 /** For storing table info when checking for orphaned tables. */
-struct fts_sys_table_struct {
+struct fts_aux_table_t {
 	table_id_t	id;		/*!< Table id */
 	table_id_t	parent_id;	/*!< Parent table id */
 	table_id_t	index_id;	/*!< Table FT index id */
@@ -246,7 +239,7 @@ static const char* fts_config_table_insert_values_sql =
 		FTS_OPTIMIZE_LIMIT_IN_SECS  "', '180');\n"
 	""
 	"INSERT INTO %s VALUES ('"
-		FTS_SYNCED_DOC_ID "', '1');\n"
+		FTS_SYNCED_DOC_ID "', '0');\n"
 	""
 	"INSERT INTO %s VALUES ('"
 		FTS_TOTAL_DELETED_COUNT "', '0');\n"
@@ -257,12 +250,13 @@ static const char* fts_config_table_insert_values_sql =
 /****************************************************************//**
 Run SYNC on the table, i.e., write out data from the cache to the
 FTS auxiliary INDEX table and clear the cache at the end.
-@return DB_SUCCESS if all OK */
+@return DB_SUCCESS if all OK  */
 static
-ulint
+dberr_t
 fts_sync(
 /*=====*/
-	fts_sync_t*	sync);		/*!< in: sync state */
+	fts_sync_t*	sync)		/*!< in: sync state */
+	__attribute__((nonnull));
 
 /****************************************************************//**
 Release all resources help by the words rb tree e.g., the node ilist. */
@@ -270,7 +264,8 @@ static
 void
 fts_words_free(
 /*===========*/
-	ib_rbt_t*	words);		/*!< in: rb tree of words */
+	ib_rbt_t*	words)		/*!< in: rb tree of words */
+	__attribute__((nonnull));
 #ifdef FTS_CACHE_SIZE_DEBUG
 /****************************************************************//**
 Read the max cache size parameter from the config table. */
@@ -294,19 +289,35 @@ fts_add_doc_by_id(
 	doc_id_t	doc_id,		/*!< in: doc id */
 	ib_vector_t*	fts_indexes __attribute__((unused)));
 					/*!< in: affected fts indexes */
+#ifdef FTS_DOC_STATS_DEBUG
 /****************************************************************//**
 Check whether a particular word (term) exists in the FTS index.
 @return DB_SUCCESS if all went fine */
 static
-ulint
+dberr_t
 fts_is_word_in_index(
 /*=================*/
 	trx_t*		trx,		/*!< in: FTS query state */
 	que_t**		graph,		/*!< out: Query graph */
 	fts_table_t*	fts_table,	/*!< in: table instance */
 	const fts_string_t* word,	/*!< in: the word to check */
-	ibool*		found);		/*!< out: TRUE if exists */
+	ibool*		found)		/*!< out: TRUE if exists */
+	__attribute__((nonnull, warn_unused_result));
+#endif /* FTS_DOC_STATS_DEBUG */
 
+/******************************************************************//**
+Update the last document id. This function could create a new
+transaction to update the last document id.
+@return DB_SUCCESS if OK */
+static
+dberr_t
+fts_update_sync_doc_id(
+/*===================*/
+	const dict_table_t*	table,		/*!< in: table */
+	const char*		table_name,	/*!< in: table name, or NULL */
+	doc_id_t		doc_id,		/*!< in: last document id */
+	trx_t*			trx)		/*!< in: update trx, or NULL */
+	__attribute__((nonnull(1)));
 /********************************************************************
 Check if we should stop. */
 UNIV_INLINE
@@ -443,7 +454,7 @@ fts_load_user_stopword(
 {
 	pars_info_t*	info;
 	que_t*		graph;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	ibool		ret = TRUE;
 	trx_t*		trx;
 	ibool		has_lock = fts->fts_status & TABLE_DICT_LOCKED;
@@ -507,9 +518,9 @@ fts_load_user_stopword(
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, "  InnoDB: Error: %lu "
+				fprintf(stderr, "  InnoDB: Error '%s' "
 					"while reading user stopword table.\n",
-					error);
+					ut_strerr(error));
 				ret = FALSE;
 				break;
 			}
@@ -542,7 +553,7 @@ fts_index_cache_init(
 
 	index_cache->words = rbt_create_arg_cmp(
 		sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
-		index_cache->charset);
+		(void*) index_cache->charset);
 
 	ut_a(index_cache->doc_stats == NULL);
 
@@ -670,7 +681,7 @@ fts_add_index(
 
 	ib_vector_push(fts->indexes, &index);
 
-	index_cache = (fts_index_cache_t*) fts_find_index_cache(cache, index);
+	index_cache = fts_find_index_cache(cache, index);
 
 	if (!index_cache) {
 		/* Add new index cache structure */
@@ -805,7 +816,7 @@ fts_check_cached_index(
 Drop auxiliary tables related to an FTS index
 @return DB_SUCCESS or error number */
 UNIV_INTERN
-ulint
+dberr_t
 fts_drop_index(
 /*===========*/
 	dict_table_t*	table,	/*!< in: Table where indexes are dropped */
@@ -813,7 +824,7 @@ fts_drop_index(
 	trx_t*		trx)	/*!< in: Transaction for the drop */
 {
 	ib_vector_t*	indexes = table->fts->indexes;
-	ulint		err = DB_SUCCESS;
+	dberr_t		err = DB_SUCCESS;
 
 	ut_a(indexes);
 
@@ -821,6 +832,8 @@ fts_drop_index(
 	    && (index == static_cast<dict_index_t*>(
 			ib_vector_getp(table->fts->indexes, 0))))
 	   || ib_vector_is_empty(indexes)) {
+		doc_id_t	current_doc_id;
+		doc_id_t	first_doc_id;
 
 		/* If we are dropping the only FTS index of the table,
 		remove it from optimize thread */
@@ -844,17 +857,20 @@ fts_drop_index(
 			return(err);
 		}
 
+		current_doc_id = table->fts->cache->next_doc_id;
+		first_doc_id = table->fts->cache->first_doc_id;
 		fts_cache_clear(table->fts->cache, TRUE);
 		fts_cache_destroy(table->fts->cache);
 		table->fts->cache = fts_cache_create(table);
+		table->fts->cache->next_doc_id = current_doc_id;
+		table->fts->cache->first_doc_id = first_doc_id;
 	} else {
 		fts_cache_t*            cache = table->fts->cache;
 		fts_index_cache_t*      index_cache;
 
 		rw_lock_x_lock(&cache->init_lock);
 
-		index_cache = (fts_index_cache_t*) fts_find_index_cache(
-			cache, index);
+		index_cache = fts_find_index_cache(cache, index);
 
 		if (index_cache->words) {
 			fts_words_free(index_cache->words);
@@ -1215,7 +1231,7 @@ fts_tokenizer_word_get(
 	if (rbt_search(cache->stopword_info.cached_stopword,
 		       &parent, text) == 0) {
 
-		return NULL;
+		return(NULL);
 	}
 
 	/* Check if we found a match, if not then add word to tree. */
@@ -1445,38 +1461,40 @@ fts_cache_add_doc(
 /****************************************************************//**
 Drops a table. If the table can't be found we return a SUCCESS code.
 @return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_drop_table(
 /*===========*/
 	trx_t*		trx,			/*!< in: transaction */
 	const char*	table_name)		/*!< in: table to drop */
 {
-	ulint		error = DB_SUCCESS;
+	dict_table_t*	table;
+	dberr_t		error = DB_SUCCESS;
 
-	/* Check that the table exists in our data dictionary. */
-	if (dict_table_get_low(table_name)) {
+	/* Check that the table exists in our data dictionary.
+	Similar to regular drop table case, we will open table with
+	DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
+	table = dict_table_open_on_name(
+		table_name, TRUE, FALSE,
+		static_cast<dict_err_ignore_t>(
+                        DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
 
-#ifdef FTS_INTERNAL_DIAG_PRINT
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: Dropping %s\n", table_name);
-#endif
+	if (table != 0) {
 
-		error = row_drop_table_for_mysql(table_name, trx, TRUE);
+		dict_table_close(table, TRUE, FALSE);
+
+		/* Pass nonatomic=false (dont allow data dict unlock),
+		because the transaction may hold locks on SYS_* tables from
+		previous calls to fts_drop_table(). */
+		error = row_drop_table_for_mysql(table_name, trx, true, false);
 
-		/* We only return the status of the last error. */
 		if (error != DB_SUCCESS) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, "  InnoDB: Error: (%lu) dropping "
-				"FTS index table %s\n", error, table_name);
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Unable to drop FTS index aux table %s: %s",
+				table_name, ut_strerr(error));
 		}
 	} else {
-		ut_print_timestamp(stderr);
-
-		/* FIXME: Should provide appropriate error return code
-		rather than printing message indiscriminately. */
-		fprintf(stderr, "  InnoDB: %s not found.\n",
-			table_name);
+		error = DB_FAIL;
 	}
 
 	return(error);
@@ -1487,8 +1505,8 @@ Drops the common ancillary tables needed for supporting an FTS index
 on the given table. row_mysql_lock_data_dictionary must have been called
 before this.
 @return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_drop_common_tables(
 /*===================*/
 	trx_t*		trx,			/*!< in: transaction */
@@ -1496,10 +1514,10 @@ fts_drop_common_tables(
 						index */
 {
 	ulint		i;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 
 	for (i = 0; fts_common_tables[i] != NULL; ++i) {
-		ulint	err;
+		dberr_t	err;
 		char*	table_name;
 
 		fts_table->suffix = fts_common_tables[i];
@@ -1509,7 +1527,7 @@ fts_drop_common_tables(
 		err = fts_drop_table(trx, table_name);
 
 		/* We only return the status of the last error. */
-		if (err != DB_SUCCESS) {
+		if (err != DB_SUCCESS && err != DB_FAIL) {
 			error = err;
 		}
 
@@ -1520,11 +1538,11 @@ fts_drop_common_tables(
 }
 
 /****************************************************************//**
-Since we do a horizontal split on the index table, we need to drop the
+Since we do a horizontal split on the index table, we need to drop
 all the split tables.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_drop_index_split_tables(
 /*========================*/
 	trx_t*		trx,			/*!< in: transaction */
@@ -1533,12 +1551,12 @@ fts_drop_index_split_tables(
 {
 	ulint		i;
 	fts_table_t	fts_table;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 
 	FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
 
 	for (i = 0; fts_index_selector[i].value; ++i) {
-		ulint	err;
+		dberr_t	err;
 		char*	table_name;
 
 		fts_table.suffix = fts_get_suffix(i);
@@ -1548,7 +1566,7 @@ fts_drop_index_split_tables(
 		err = fts_drop_table(trx, table_name);
 
 		/* We only return the status of the last error. */
-		if (err != DB_SUCCESS) {
+		if (err != DB_SUCCESS && err != DB_FAIL) {
 			error = err;
 		}
 
@@ -1562,23 +1580,21 @@ fts_drop_index_split_tables(
 Drops FTS auxiliary tables for an FTS index
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_drop_index_tables(
 /*==================*/
 	trx_t*		trx,		/*!< in: transaction */
 	dict_index_t*	index)		/*!< in: Index to drop */
 {
-	ulint			err;
-	ulint			error = DB_SUCCESS;
 	fts_table_t		fts_table;
-	ulint			j;
+	dberr_t			error = DB_SUCCESS;
 
 	static const char*	index_tables[] = {
 		"DOC_ID",
 		NULL
 	};
 
-	err = fts_drop_index_split_tables(trx, index);
+	dberr_t	err = fts_drop_index_split_tables(trx, index);
 
 	/* We only return the status of the last error. */
 	if (err != DB_SUCCESS) {
@@ -1587,18 +1603,17 @@ fts_drop_index_tables(
 
 	FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
 
-	for (j = 0; index_tables[j] != NULL; ++j) {
-		ulint	err;
+	for (ulint i = 0; index_tables[i] != NULL; ++i) {
 		char*	table_name;
 
-		fts_table.suffix = index_tables[j];
+		fts_table.suffix = index_tables[i];
 
 		table_name = fts_get_table_name(&fts_table);
 
 		err = fts_drop_table(trx, table_name);
 
 		/* We only return the status of the last error. */
-		if (err != DB_SUCCESS) {
+		if (err != DB_SUCCESS && err != DB_FAIL) {
 			error = err;
 		}
 
@@ -1613,18 +1628,20 @@ Drops FTS ancillary tables needed for supporting an FTS index
 on the given table. row_mysql_lock_data_dictionary must have been called
 before this.
 @return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_drop_all_index_tables(
 /*======================*/
 	trx_t*		trx,			/*!< in: transaction */
 	fts_t*		fts)			/*!< in: fts instance */
 {
-	ulint		i;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 
-	for (i = 0; i < ib_vector_size(fts->indexes); ++i) {
-		ulint		err;
+	for (ulint i = 0;
+	     fts->indexes != 0 && i < ib_vector_size(fts->indexes);
+	     ++i) {
+
+		dberr_t		err;
 		dict_index_t*	index;
 
 		index = static_cast<dict_index_t*>(
@@ -1646,17 +1663,19 @@ given table. row_mysql_lock_data_dictionary must have been called before
 this.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_drop_tables(
 /*============*/
 	trx_t*		trx,		/*!< in: transaction */
 	dict_table_t*	table)		/*!< in: table has the FTS index */
 {
-	ulint		error;
+	dberr_t		error;
 	fts_table_t	fts_table;
 
 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
 
+	/* TODO: This is not atomic and can cause problems during recovery. */
+
 	error = fts_drop_common_tables(trx, &fts_table);
 
 	if (error == DB_SUCCESS) {
@@ -1692,20 +1711,20 @@ on the given table. row_mysql_lock_data_dictionary must have been called
 before this.
 @return DB_SUCCESS if succeed */
 UNIV_INTERN
-ulint
+dberr_t
 fts_create_common_tables(
 /*=====================*/
-	trx_t*		trx,			/*!< in: transaction */
-	const dict_table_t* table,		/*!< in: table with FTS index */
-	const char*	name,			/*!< in: table name normalized.*/
-	ibool		skip_doc_id_index)	/*!< in: Skip index on doc id */
-
+	trx_t*		trx,		/*!< in: transaction */
+	const dict_table_t* table,	/*!< in: table with FTS index */
+	const char*	name,		/*!< in: table name normalized.*/
+	bool		skip_doc_id_index)/*!< in: Skip index on doc id */
 {
 	char*		sql;
-	ulint		error;
+	dberr_t		error;
 	que_t*		graph;
 	fts_table_t	fts_table;
 	mem_heap_t*	heap = mem_heap_create(1024);
+	pars_info_t*	info;
 
 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
 
@@ -1744,17 +1763,23 @@ fts_create_common_tables(
 		goto func_exit;
 	}
 
+	info = pars_info_create();
+
+	pars_info_bind_id(info, TRUE, "table_name", name);
+	pars_info_bind_id(info, TRUE, "index_name", FTS_DOC_ID_INDEX_NAME);
+	pars_info_bind_id(info, TRUE, "doc_id_col_name", FTS_DOC_ID_COL_NAME);
+
 	/* Create the FTS DOC_ID index on the hidden column. Currently this
 	is common for any FT index created on the table. */
 	graph = fts_parse_sql_no_dict_lock(
 		NULL,
-		NULL,
+		info,
 		mem_heap_printf(
 			heap,
 			"BEGIN\n"
 			""
-			"CREATE UNIQUE INDEX %s ON %s(%s);\n",
-			FTS_DOC_ID_INDEX_NAME, name, FTS_DOC_ID_COL_NAME));
+			"CREATE UNIQUE INDEX $index_name ON $table_name("
+			"$doc_id_col_name);\n"));
 
 	error = fts_eval_sql(trx, graph);
 	que_graph_free(graph);
@@ -1794,7 +1819,7 @@ fts_create_one_index_table(
 	dict_field_t*		field;
 	dict_table_t*		new_table = NULL;
 	char*			table_name = fts_get_table_name(fts_table);
-	ulint			error;
+	dberr_t			error;
 	CHARSET_INFO*		charset;
 
 	ut_ad(index->type & DICT_FTS);
@@ -1828,14 +1853,14 @@ fts_create_one_index_table(
 	dict_mem_table_add_col(new_table, heap, "ilist", DATA_BLOB,
 			       4130048,	0);
 
-	error = row_create_table_for_mysql(new_table, trx);
+	error = row_create_table_for_mysql(new_table, trx, true);
 
 	if (error != DB_SUCCESS) {
-		trx->error_state = static_cast<db_err>(error);
+		trx->error_state = error;
 		dict_mem_table_free(new_table);
 		new_table = NULL;
-		fprintf(stderr, "  InnoDB: Warning: Fail to create FTS "
-				"  index table %s \n", table_name);
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Fail to create FTS index table %s", table_name);
 	}
 
 	mem_free(table_name);
@@ -1848,7 +1873,7 @@ Wrapper function of fts_create_index_tables_low(), create auxiliary
 tables for an FTS index
 @return: DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_create_index_tables_low(
 /*========================*/
 	trx_t*		trx,		/*!< in: transaction */
@@ -1862,7 +1887,7 @@ fts_create_index_tables_low(
 	char*		sql;
 	que_t*		graph;
 	fts_table_t	fts_table;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	mem_heap_t*	heap = mem_heap_create(1024);
 
 	fts_table.type = FTS_INDEX_TABLE;
@@ -1874,6 +1899,7 @@ fts_create_index_tables_low(
 	/* Create the FTS auxiliary tables that are specific
 	to an FTS index. */
 	sql = fts_prepare_sql(&fts_table, fts_create_index_tables_sql);
+
 	graph = fts_parse_sql_no_dict_lock(NULL, NULL, sql);
 	mem_free(sql);
 
@@ -1903,9 +1929,7 @@ fts_create_index_tables_low(
 		que_graph_free(graph);
 	}
 
-	if (error == DB_SUCCESS) {
-		error = fts_sql_commit(trx);
-	} else {
+	if (error != DB_SUCCESS) {
 		/* We have special error handling here */
 
 		trx->error_state = DB_SUCCESS;
@@ -1928,18 +1952,25 @@ FTS index on the given table. row_mysql_lock_data_dictionary must have
 been called before this.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_create_index_tables(
 /*====================*/
 	trx_t*			trx,	/*!< in: transaction */
 	const dict_index_t*	index)	/*!< in: the index instance */
 {
+	dberr_t		err;
 	dict_table_t*	table;
 
 	table = dict_table_get_low(index->table_name);
 	ut_a(table != NULL);
 
-	return(fts_create_index_tables_low(trx, index, table->name, table->id));
+	err = fts_create_index_tables_low(trx, index, table->name, table->id);
+
+	if (err == DB_SUCCESS) {
+		trx_commit(trx);
+	}
+
+	return(err);
 }
 #if 0
 /******************************************************************//**
@@ -1953,22 +1984,22 @@ fts_get_state_str(
 {
 	switch (state) {
 	case FTS_INSERT:
-		return "INSERT";
+		return("INSERT");
 
 	case FTS_MODIFY:
-		return "MODIFY";
+		return("MODIFY");
 
 	case FTS_DELETE:
-		return "DELETE";
+		return("DELETE");
 
 	case FTS_NOTHING:
-		return "NOTHING";
+		return("NOTHING");
 
 	case FTS_INVALID:
-		return "INVALID";
+		return("INVALID");
 
 	default:
-		return "UNKNOWN";
+		return("UNKNOWN");
 	}
 }
 #endif
@@ -2321,7 +2352,7 @@ fts_get_max_cache_size(
 	trx_t*		trx,			/*!< in: transaction */
 	fts_table_t*	fts_table)		/*!< in: table instance */
 {
-	ulint		error;
+	dberr_t		error;
 	fts_string_t	value;
 	ulint		cache_size_in_mb;
 
@@ -2381,32 +2412,19 @@ fts_get_max_cache_size(
 }
 #endif
 
-/*********************************************************************//**
-Get the total number of documents in the FTS.
-@return estimated number of rows in the table */
-UNIV_INTERN
-ulint
-fts_get_total_document_count(
-/*=========================*/
-	dict_table_t*   table)		/*!< in: table instance */
-{
-	ut_ad(table->stat_initialized);
-
-	return((ulint) table->stat_n_rows);
-}
-
+#ifdef FTS_DOC_STATS_DEBUG
 /*********************************************************************//**
 Get the total number of words in the FTS for a particular FTS index.
 @return DB_SUCCESS if all OK else error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_get_total_word_count(
 /*=====================*/
 	trx_t*		trx,			/*!< in: transaction */
 	dict_index_t*	index,			/*!< in: for this index */
 	ulint*		total)			/* out: total words */
 {
-	ulint		error;
+	dberr_t		error;
 	fts_string_t	value;
 
 	*total = 0;
@@ -2426,14 +2444,15 @@ fts_get_total_word_count(
 		*total = strtoul((char*) value.f_str, NULL, 10);
 	} else {
 		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: Error: (%lu) reading total words "
-			"value from config table\n", error);
+		fprintf(stderr, "  InnoDB: Error: (%s) reading total words "
+			"value from config table\n", ut_strerr(error));
 	}
 
 	ut_free(value.f_str);
 
 	return(error);
 }
+#endif /* FTS_DOC_STATS_DEBUG */
 
 /*********************************************************************//**
 Update the next and last Doc ID in the CONFIG table to be the input
@@ -2443,8 +2462,9 @@ UNIV_INTERN
 void
 fts_update_next_doc_id(
 /*===================*/
+	trx_t*			trx,		/*!< in/out: transaction */
 	const dict_table_t*	table,		/*!< in: table */
-	const char*		table_name,	/*!< in: table name */
+	const char*		table_name,	/*!< in: table name, or NULL */
 	doc_id_t		doc_id)		/*!< in: DOC ID to set */
 {
 	table->fts->cache->synced_doc_id = doc_id;
@@ -2453,7 +2473,7 @@ fts_update_next_doc_id(
 	table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
 
 	fts_update_sync_doc_id(
-		table, table_name, table->fts->cache->synced_doc_id, NULL);
+		table, table_name, table->fts->cache->synced_doc_id, trx);
 
 }
 
@@ -2461,7 +2481,7 @@ fts_update_next_doc_id(
 Get the next available document id.
 @return DB_SUCCESS if OK */
 UNIV_INTERN
-ulint
+dberr_t
 fts_get_next_doc_id(
 /*================*/
 	const dict_table_t*	table,		/*!< in: table */
@@ -2494,8 +2514,8 @@ fts_get_next_doc_id(
 This function fetch the Doc ID from CONFIG table, and compare with
 the Doc ID supplied. And store the larger one to the CONFIG table.
 @return DB_SUCCESS if OK */
-UNIV_INTERN
-ulint
+static __attribute__((nonnull))
+dberr_t
 fts_cmp_set_sync_doc_id(
 /*====================*/
 	const dict_table_t*	table,		/*!< in: table */
@@ -2509,7 +2529,7 @@ fts_cmp_set_sync_doc_id(
 {
 	trx_t*		trx;
 	pars_info_t*	info;
-	ulint		error;
+	dberr_t		error;
 	fts_table_t	fts_table;
 	que_t*		graph = NULL;
 	fts_cache_t*	cache = table->fts->cache;
@@ -2559,8 +2579,6 @@ retry:
 		goto func_exit;
 	}
 
-	ut_a(*doc_id > 0);
-
 	if (read_only) {
 		goto func_exit;
 	}
@@ -2594,8 +2612,8 @@ func_exit:
 		*doc_id = 0;
 
 		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: Error: (%lu) "
-			"while getting next doc id.\n", error);
+		fprintf(stderr, "  InnoDB: Error: (%s) "
+			"while getting next doc id.\n", ut_strerr(error));
 
 		fts_sql_rollback(trx);
 
@@ -2614,23 +2632,23 @@ func_exit:
 Update the last document id. This function could create a new
 transaction to update the last document id.
 @return DB_SUCCESS if OK */
-UNIV_INTERN
-ulint
+static
+dberr_t
 fts_update_sync_doc_id(
 /*===================*/
 	const dict_table_t*	table,		/*!< in: table */
-	const char*		table_name,	/*!< in: table name */
+	const char*		table_name,	/*!< in: table name, or NULL */
 	doc_id_t		doc_id,		/*!< in: last document id */
-	trx_t*			trx)		/*!< in: update trx */
+	trx_t*			trx)		/*!< in: update trx, or NULL */
 {
 	byte		id[FTS_MAX_ID_LEN];
 	pars_info_t*	info;
 	fts_table_t	fts_table;
 	ulint		id_len;
 	que_t*		graph = NULL;
-	ulint		error;
+	dberr_t		error;
 	ibool		local_trx = FALSE;
-	fts_cache_t*	cache = table->fts->cache;;
+	fts_cache_t*	cache = table->fts->cache;
 
 	fts_table.suffix = "CONFIG";
 	fts_table.table_id = table->id;
@@ -2651,8 +2669,7 @@ fts_update_sync_doc_id(
 
 	info = pars_info_create();
 
-	// FIXME: Get rid of snprintf
-	id_len = snprintf(
+	id_len = ut_snprintf(
 		(char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
 
 	pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
@@ -2672,9 +2689,10 @@ fts_update_sync_doc_id(
 			fts_sql_commit(trx);
 			cache->synced_doc_id = doc_id;
 		} else {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, "  InnoDB: Error: (%lu) "
-				"while updating last doc id.\n", error);
+
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"(%s) while updating last doc id.",
+				ut_strerr(error));
 
 			fts_sql_rollback(trx);
 		}
@@ -2725,15 +2743,15 @@ fts_doc_ids_free(
 /*********************************************************************//**
 Do commit-phase steps necessary for the insertion of a new row.
 @return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_add(
 /*====*/
 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
 	fts_trx_row_t*	row)			/*!< in: row */
 {
 	dict_table_t*	table = ftt->table;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	doc_id_t	doc_id = row->doc_id;
 
 	ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
@@ -2757,8 +2775,8 @@ fts_add(
 /*********************************************************************//**
 Do commit-phase steps necessary for the deletion of a row.
 @return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_delete(
 /*=======*/
 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
@@ -2766,7 +2784,7 @@ fts_delete(
 {
 	que_t*		graph;
 	fts_table_t	fts_table;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	doc_id_t	write_doc_id;
 	dict_table_t*	table = ftt->table;
 	doc_id_t	doc_id = row->doc_id;
@@ -2848,14 +2866,14 @@ fts_delete(
 /*********************************************************************//**
 Do commit-phase steps necessary for the modification of a row.
 @return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_modify(
 /*=======*/
 	fts_trx_table_t*	ftt,		/*!< in: FTS trx table */
 	fts_trx_row_t*		row)		/*!< in: row */
 {
-	ulint			error;
+	dberr_t	error;
 
 	ut_a(row->state == FTS_MODIFY);
 
@@ -2872,7 +2890,7 @@ fts_modify(
 Create a new document id.
 @return DB_SUCCESS if all went well else error */
 UNIV_INTERN
-ulint
+dberr_t
 fts_create_doc_id(
 /*==============*/
 	dict_table_t*	table,		/*!< in: row is of this table. */
@@ -2882,7 +2900,7 @@ fts_create_doc_id(
 	mem_heap_t*	heap)		/*!< in: heap */
 {
 	doc_id_t	doc_id;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 
 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
 
@@ -2919,15 +2937,15 @@ fts_create_doc_id(
 The given transaction is about to be committed; do whatever is necessary
 from the FTS system's POV.
 @return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_commit_table(
 /*=============*/
 	fts_trx_table_t*	ftt)		/*!< in: FTS table to commit*/
 {
 	const ib_rbt_node_t*	node;
 	ib_rbt_t*		rows;
-	ulint			error = DB_SUCCESS;
+	dberr_t			error = DB_SUCCESS;
 	fts_cache_t*		cache = ftt->table->fts->cache;
 	trx_t*			trx = trx_allocate_for_background();
 
@@ -2979,13 +2997,13 @@ The given transaction is about to be committed; do whatever is necessary
 from the FTS system's POV.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_commit(
 /*=======*/
 	trx_t*	trx)				/*!< in: transaction */
 {
 	const ib_rbt_node_t*	node;
-	ulint			error;
+	dberr_t			error;
 	ib_rbt_t*		tables;
 	fts_savepoint_t*	savepoint;
 
@@ -3008,10 +3026,9 @@ fts_commit(
 }
 
 /*********************************************************************//**
-Create a new empty document.
-@return new document */
+Initialize a document. */
 UNIV_INTERN
-fts_doc_t*
+void
 fts_doc_init(
 /*=========*/
 	fts_doc_t*	doc)			/*!< in: doc to initialize */
@@ -3021,8 +3038,6 @@ fts_doc_init(
 	memset(doc, 0, sizeof(*doc));
 
 	doc->self_heap = ib_heap_allocator_create(heap);
-
-	return(doc);
 }
 
 /*********************************************************************//**
@@ -3075,7 +3090,7 @@ fts_fetch_row_id(
 /*********************************************************************//**
 Callback function for fetch that stores the text of an FTS document,
 converting each column to UTF-16.
-@return: always returns FALSE */
+@return always FALSE */
 UNIV_INTERN
 ibool
 fts_query_expansion_fetch_doc(
@@ -3467,13 +3482,15 @@ fts_get_max_doc_id(
 
 	dfield = dict_index_get_nth_field(index, 0);
 
+#if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
 	ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
+#endif
 
 	mtr_start(&mtr);
 
 	/* fetch the largest indexes value */
 	btr_pcur_open_at_index_side(
-		FALSE, index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+		false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
 
 	if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
 		const rec_t*    rec = NULL;
@@ -3516,13 +3533,14 @@ func_exit:
 Fetch document with the given document id.
 @return DB_SUCCESS if OK else error */
 UNIV_INTERN
-ulint
+dberr_t
 fts_doc_fetch_by_doc_id(
 /*====================*/
 	fts_get_doc_t*	get_doc,	/*!< in: state */
 	doc_id_t	doc_id,		/*!< in: id of document to
 					fetch */
-	dict_index_t*	index_to_use,	/*!< in: caller supplied FTS index */
+	dict_index_t*	index_to_use,	/*!< in: caller supplied FTS index,
+					or NULL */
 	ulint		option,		/*!< in: search option, if it is
 					greater than doc_id or equal */
 	fts_sql_callback
@@ -3530,7 +3548,7 @@ fts_doc_fetch_by_doc_id(
 	void*		arg)		/*!< in: callback arg */
 {
 	pars_info_t*	info;
-	ulint		error;
+	dberr_t		error;
 	const char*	select_str;
 	doc_id_t	write_doc_id;
 	dict_index_t*	index;
@@ -3555,6 +3573,7 @@ fts_doc_fetch_by_doc_id(
 	pars_info_bind_function(info, "my_func", callback, arg);
 
 	select_str = fts_get_select_columns_str(index, info, info->heap);
+	pars_info_bind_id(info, TRUE, "table_name", index->table_name);
 
 	if (!get_doc || !get_doc->get_document_graph) {
 		if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
@@ -3564,7 +3583,7 @@ fts_doc_fetch_by_doc_id(
 				mem_heap_printf(info->heap,
 					"DECLARE FUNCTION my_func;\n"
 					"DECLARE CURSOR c IS"
-					" SELECT %s FROM %s"
+					" SELECT %s FROM $table_name"
 					" WHERE %s = :doc_id;\n"
 					"BEGIN\n"
 					""
@@ -3576,20 +3595,32 @@ fts_doc_fetch_by_doc_id(
 					"  END IF;\n"
 					"END LOOP;\n"
 					"CLOSE c;",
-					select_str, index->table_name,
-					FTS_DOC_ID_COL_NAME));
+					select_str, FTS_DOC_ID_COL_NAME));
 		} else {
 			ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
 
+			/* This is used for crash recovery of table with
+			hidden DOC ID or FTS indexes. We will scan the table
+			to re-processing user table rows whose DOC ID or
+			FTS indexed documents have not been sync-ed to disc
+			during recent crash.
+			In the case that all fulltext indexes are dropped
+			for a table, we will keep the "hidden" FTS_DOC_ID
+			column, and this scan is to retreive the largest
+			DOC ID being used in the table to determine the
+			appropriate next DOC ID.
+			In the case of there exists fulltext index(es), this
+			operation will re-tokenize any docs that have not
+			been sync-ed to the disk, and re-prime the FTS
+			cached */
 			graph = fts_parse_sql(
 				NULL,
 				info,
 				mem_heap_printf(info->heap,
 					"DECLARE FUNCTION my_func;\n"
 					"DECLARE CURSOR c IS"
-					" SELECT %s, %s FROM %s"
-					" WHERE %s > :doc_id"
-					" ORDER BY %s;\n"
+					" SELECT %s, %s FROM $table_name"
+					" WHERE %s > :doc_id;\n"
 					"BEGIN\n"
 					""
 					"OPEN c;\n"
@@ -3601,9 +3632,7 @@ fts_doc_fetch_by_doc_id(
 					"END LOOP;\n"
 					"CLOSE c;",
 					FTS_DOC_ID_COL_NAME,
-					select_str, index->table_name,
-					FTS_DOC_ID_COL_NAME,
-					FTS_DOC_ID_COL_NAME));
+					select_str, FTS_DOC_ID_COL_NAME));
 		}
 		if (get_doc) {
 			get_doc->get_document_graph = graph;
@@ -3633,7 +3662,7 @@ fts_doc_fetch_by_doc_id(
 Write out a single word's data as new entry/entries in the INDEX table.
 @return DB_SUCCESS if all OK. */
 UNIV_INTERN
-ulint
+dberr_t
 fts_write_node(
 /*===========*/
 	trx_t*		trx,			/*!< in: transaction */
@@ -3643,7 +3672,7 @@ fts_write_node(
 	fts_node_t*	node)			/*!< in: node columns */
 {
 	pars_info_t*	info;
-	ulint		error;
+	dberr_t		error;
 	ib_uint32_t	doc_count;
 	ib_time_t	start_time;
 	doc_id_t	last_doc_id;
@@ -3698,8 +3727,8 @@ fts_write_node(
 /*********************************************************************//**
 Add rows to the DELETED_CACHE table.
 @return DB_SUCCESS if all went well else error code*/
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_sync_add_deleted_cache(
 /*=======================*/
 	fts_sync_t*	sync,			/*!< in: sync state */
@@ -3710,7 +3739,7 @@ fts_sync_add_deleted_cache(
 	que_t*		graph;
 	fts_table_t	fts_table;
 	doc_id_t	dummy = 0;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	ulint		n_elems = ib_vector_size(doc_ids);
 
 	ut_a(ib_vector_size(doc_ids) > 0);
@@ -3748,9 +3777,10 @@ fts_sync_add_deleted_cache(
 }
 
 /*********************************************************************//**
-Write the words and ilist to disk.*/
-static
-ulint
+Write the words and ilist to disk.
+@return DB_SUCCESS if all went well else error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_sync_write_words(
 /*=================*/
 	trx_t*		trx,			/*!< in: transaction */
@@ -3761,10 +3791,12 @@ fts_sync_write_words(
 	ulint		n_nodes = 0;
 	ulint		n_words = 0;
 	const ib_rbt_node_t* rbt_node;
-	ulint		n_new_words = 0;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	ibool		print_error = FALSE;
+#ifdef FTS_DOC_STATS_DEBUG
 	dict_table_t*	table = index_cache->index->table;
+	ulint		n_new_words = 0;
+#endif /* FTS_DOC_STATS_DEBUG */
 
 	FTS_INIT_INDEX_TABLE(
 		&fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
@@ -3789,9 +3821,10 @@ fts_sync_write_words(
 
 		fts_table.suffix = fts_get_suffix(selected);
 
+#ifdef FTS_DOC_STATS_DEBUG
 		/* Check if the word exists in the FTS index and if not
 		then we need to increment the total word count stats. */
-		if (error == DB_SUCCESS) {
+		if (error == DB_SUCCESS && fts_enable_diag_print) {
 			ibool	found = FALSE;
 
 			error = fts_is_word_in_index(
@@ -3805,6 +3838,7 @@ fts_sync_write_words(
 				++n_new_words;
 			}
 		}
+#endif /* FTS_DOC_STATS_DEBUG */
 
 		n_nodes += ib_vector_size(word->nodes);
 
@@ -3829,9 +3863,9 @@ fts_sync_write_words(
 
 		if (error != DB_SUCCESS && !print_error) {
 			ut_print_timestamp(stderr);
-			fprintf(stderr, "  InnoDB: Error (%lu) writing "
+			fprintf(stderr, "  InnoDB: Error (%s) writing "
 				"word node to FTS auxiliary index "
-				"table.\n", error);
+				"table.\n", ut_strerr(error));
 
 			print_error = TRUE;
 		}
@@ -3840,19 +3874,23 @@ fts_sync_write_words(
 		ut_free(rbt_remove_node(index_cache->words, rbt_node));
 	}
 
-	if (error == DB_SUCCESS && n_new_words > 0) {
+#ifdef FTS_DOC_STATS_DEBUG
+	if (error == DB_SUCCESS && n_new_words > 0 && fts_enable_diag_print) {
 		fts_table_t	fts_table;
 
 		FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
 
 		/* Increment the total number of words in the FTS index */
-		fts_config_increment_index_value(
+		error = fts_config_increment_index_value(
 			trx, index_cache->index, FTS_TOTAL_WORD_COUNT,
 			n_new_words);
 	}
+#endif /* FTS_DOC_STATS_DEBUG */
 
-	printf("Avg number of nodes: %lf\n",
-	       (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
+	if (fts_enable_diag_print) {
+		printf("Avg number of nodes: %lf\n",
+		       (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
+	}
 
 	return(error);
 }
@@ -3861,8 +3899,8 @@ fts_sync_write_words(
 /*********************************************************************//**
 Write a single documents statistics to disk.
 @return DB_SUCCESS if all went well else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_sync_write_doc_stat(
 /*====================*/
 	trx_t*			trx,		/*!< in: transaction */
@@ -3872,7 +3910,7 @@ fts_sync_write_doc_stat(
 {
 	pars_info_t*	info;
 	doc_id_t	doc_id;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	ib_uint32_t	word_count;
 
 	if (*graph) {
@@ -3918,9 +3956,9 @@ fts_sync_write_doc_stat(
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, "  InnoDB: Error: %lu "
+				fprintf(stderr, "  InnoDB: Error: (%s) "
 					"while writing to FTS doc_id.\n",
-					error);
+					ut_strerr(error));
 
 				break;			/* Exit the loop. */
 			}
@@ -3940,7 +3978,7 @@ fts_sync_write_doc_stats(
 	trx_t*			trx,		/*!< in: transaction */
 	const fts_index_cache_t*index_cache)	/*!< in: index cache */
 {
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	que_t*		graph = NULL;
 	fts_doc_stats_t*  doc_stat;
 
@@ -3973,7 +4011,6 @@ fts_sync_write_doc_stats(
 
 	return(error);
 }
-#endif /* FTS_DOC_STATS_DEBUG */
 
 /*********************************************************************//**
 Callback to check the existince of a word.
@@ -4007,13 +4044,12 @@ fts_lookup_word(
 }
 
 /*********************************************************************//**
-Check whether a particular word (term) exists in the FTS index. */
+Check whether a particular word (term) exists in the FTS index.
+@return DB_SUCCESS if all went well else error code */
 static
-ulint
+dberr_t
 fts_is_word_in_index(
 /*=================*/
-						/* out: DB_SUCCESS if all went
-						well else error code */
 	trx_t*		trx,			/*!< in: FTS query state */
 	que_t**		graph,			/* out: Query graph */
 	fts_table_t*	fts_table,		/*!< in: table instance */
@@ -4022,7 +4058,7 @@ fts_is_word_in_index(
 	ibool*		found)			/* out: TRUE if exists */
 {
 	pars_info_t*	info;
-	ulint		error;
+	dberr_t		error;
 
 	trx->op_info = "looking up word in FTS index";
 
@@ -4073,8 +4109,9 @@ fts_is_word_in_index(
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, "  InnoDB: Error: %lu "
-					"while reading FTS index.\n", error);
+				fprintf(stderr, "  InnoDB: Error: (%s) "
+					"while reading FTS index.\n",
+					ut_strerr(error));
 
 				break;			/* Exit the loop. */
 			}
@@ -4083,6 +4120,7 @@ fts_is_word_in_index(
 
 	return(error);
 }
+#endif /* FTS_DOC_STATS_DEBUG */
 
 /*********************************************************************//**
 Begin Sync, create transaction, acquire locks, etc. */
@@ -4101,29 +4139,36 @@ fts_sync_begin(
 
 	sync->trx = trx_allocate_for_background();
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr, "  SYNC deleted count: %ld size: %lu bytes\n",
-		ib_vector_size(cache->deleted_doc_ids), cache->total_size);
+	if (fts_enable_diag_print) {
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"FTS SYNC for table %s, deleted count: %ld size: "
+			"%lu bytes",
+			sync->table->name,
+			ib_vector_size(cache->deleted_doc_ids),
+			cache->total_size);
+	}
 }
 
 /*********************************************************************//**
 Run SYNC on the table, i.e., write out data from the index specific
-cache to the FTS aux INDEX table and FTS aux doc id stats table. */
-static
-ulint
+cache to the FTS aux INDEX table and FTS aux doc id stats table.
+@return DB_SUCCESS if all OK */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_sync_index(
 /*===========*/
-						/* out: DB_SUCCESS if all OK */
 	fts_sync_t*		sync,		/*!< in: sync state */
 	fts_index_cache_t*	index_cache)	/*!< in: index cache */
 {
 	trx_t*		trx = sync->trx;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 
 	trx->op_info = "doing SYNC index";
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr, "  SYNC words: %ld\n", rbt_size(index_cache->words));
+	if (fts_enable_diag_print) {
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"SYNC words: %ld", rbt_size(index_cache->words));
+	}
 
 	ut_ad(rbt_validate(index_cache->words));
 
@@ -4146,13 +4191,13 @@ fts_sync_index(
 /*********************************************************************//**
 Commit the SYNC, change state of processed doc ids etc.
 @return DB_SUCCESS if all OK */
-static
-ulint
+static  __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_sync_commit(
 /*============*/
 	fts_sync_t*	sync)			/*!< in: sync state */
 {
-	ulint		error;
+	dberr_t		error;
 	trx_t*		trx = sync->trx;
 	fts_cache_t*	cache = sync->table->fts->cache;
 	doc_id_t	last_doc_id;
@@ -4191,13 +4236,18 @@ fts_sync_commit(
 		fts_sql_rollback(trx);
 
 		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: Error: (%lu) during SYNC.\n", error);
+		fprintf(stderr, "  InnoDB: Error: (%s) during SYNC.\n",
+			ut_strerr(error));
 	}
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr, "  InnoDB: SYNC time : %lusecs: elapsed %lf ins/sec\n",
-		(ulong) (ut_time() - sync->start_time),
-		(double) n_nodes/ (double) elapsed_time);
+	if (fts_enable_diag_print && elapsed_time) {
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"SYNC for table %s: SYNC time : %lu secs: "
+			"elapsed %lf ins/sec",
+			sync->table->name,
+			(ulong) (ut_time() - sync->start_time),
+			(double) n_nodes/ (double) elapsed_time);
+	}
 
 	trx_free_for_background(trx);
 
@@ -4226,13 +4276,13 @@ Run SYNC on the table, i.e., write out data from the cache to the
 FTS auxiliary INDEX table and clear the cache at the end.
 @return DB_SUCCESS if all OK */
 static
-ulint
+dberr_t
 fts_sync(
 /*=====*/
 	fts_sync_t*	sync)		/*!< in: sync state */
 {
 	ulint		i;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	fts_cache_t*	cache = sync->table->fts->cache;
 
 	rw_lock_x_lock(&cache->lock);
@@ -4275,34 +4325,28 @@ fts_sync(
 
 /****************************************************************//**
 Run SYNC on the table, i.e., write out data from the cache to the
-FTS auxiliary INDEX table and clear the cache at the end.
-@return DB_SUCCESS if all OK */
+FTS auxiliary INDEX table and clear the cache at the end. */
 UNIV_INTERN
-ulint
+void
 fts_sync_table(
 /*===========*/
 	dict_table_t*	table)		/*!< in: table */
 {
-	ulint	error = DB_SUCCESS;
-
 	ut_ad(table->fts);
 
 	if (table->fts->cache) {
 		fts_sync(table->fts->cache->sync);
 	}
-
-	return(error);
 }
 
 /********************************************************************
 Process next token from document starting at the given position, i.e., add
-the token's start position to the token's list of positions. */
+the token's start position to the token's list of positions.
+@return number of characters handled in this call */
 static
 ulint
 fts_process_token(
 /*==============*/
-					/* out: number of characters
-					handled in this call */
 	fts_doc_t*	doc,		/* in/out: document to
 					tokenize */
 	fts_doc_t*	result,		/* out: if provided, save
@@ -4406,7 +4450,7 @@ fts_tokenize_document(
 	ut_a(doc->charset);
 
 	doc->tokens = rbt_create_arg_cmp(
-		sizeof(fts_token_t), innobase_fts_text_cmp, doc->charset);
+                                         sizeof(fts_token_t), innobase_fts_text_cmp, (void*) doc->charset);
 
 	for (ulint i = 0; i < doc->text.f_len; i += inc) {
 		inc = fts_process_token(doc, result, i, 0);
@@ -4473,6 +4517,7 @@ fts_get_docs_create(
 		memset(get_doc, 0x0, sizeof(*get_doc));
 
 		get_doc->index_cache = fts_get_index_cache(cache, *index);
+		get_doc->cache = cache;
 
 		/* Must find the index cache. */
 		ut_a(get_doc->index_cache != NULL);
@@ -4520,11 +4565,14 @@ fts_init_doc_id(
 
 	rw_lock_x_lock(&table->fts->cache->lock);
 
+	/* Return if the table is already initialized for DOC ID */
 	if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
 		rw_lock_x_unlock(&table->fts->cache->lock);
 		return(0);
 	}
 
+	DEBUG_SYNC_C("fts_initialize_doc_id");
+
 	/* Then compare this value with the ID value stored in the CONFIG
 	table. The larger one will be our new initial Doc ID */
 	fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
@@ -4591,7 +4639,7 @@ fts_get_rows_count(
 	trx_t*		trx;
 	pars_info_t*	info;
 	que_t*		graph;
-	ulint		error;
+	dberr_t		error;
 	ulint		count = 0;
 
 	trx = trx_allocate_for_background();
@@ -4639,9 +4687,9 @@ fts_get_rows_count(
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, "  InnoDB: Error: %lu "
+				fprintf(stderr, "  InnoDB: Error: (%s) "
 					"while reading FTS table.\n",
-					error);
+					ut_strerr(error));
 
 				break;			/* Exit the loop. */
 			}
@@ -4678,7 +4726,7 @@ fts_update_max_cache_size(
 
 	trx_free_for_background(trx);
 }
-#endif
+#endif /* FTS_CACHE_SIZE_DEBUG */
 
 /*********************************************************************//**
 Free the modified rows of a table. */
@@ -4861,13 +4909,13 @@ fts_get_doc_id_from_rec(
 
 	col_no = dict_col_get_clust_pos(
 		&table->cols[table->fts->doc_col], clust_index);
+	ut_ad(col_no != ULINT_UNDEFINED);
 
-	/* We have no choice but to cast rec here :-( */
-	data = rec_get_nth_field((rec_t*) rec, offsets, col_no, &len);
+	data = rec_get_nth_field(rec, offsets, col_no, &len);
 
 	ut_a(len == 8);
-	ut_a(len == sizeof(doc_id));
-	doc_id = (doc_id_t) mach_read_from_8(data);
+	ut_ad(8 == sizeof(doc_id));
+	doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
 
 	return(doc_id);
 }
@@ -4876,7 +4924,7 @@ fts_get_doc_id_from_rec(
 Search the index specific cache for a particular FTS index.
 @return the index specific cache else NULL */
 UNIV_INTERN
-const fts_index_cache_t*
+fts_index_cache_t*
 fts_find_index_cache(
 /*=================*/
 	const fts_cache_t*	cache,		/*!< in: cache to search */
@@ -4884,7 +4932,8 @@ fts_find_index_cache(
 {
 	/* We cast away the const because our internal function, takes
 	non-const cache arg and returns a non-const pointer. */
-	return(fts_get_index_cache((fts_cache_t*) cache, index));
+	return(static_cast<fts_index_cache_t*>(
+		fts_get_index_cache((fts_cache_t*) cache, index)));
 }
 
 /*********************************************************************//**
@@ -4960,7 +5009,7 @@ fts_cache_append_deleted_doc_ids(
 {
 	ulint			i;
 
-	mutex_enter((mutex_t*) &cache->deleted_lock);
+	mutex_enter((ib_mutex_t*) &cache->deleted_lock);
 
 	for (i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
 		fts_update_t*	update;
@@ -4971,7 +5020,7 @@ fts_cache_append_deleted_doc_ids(
 		ib_vector_push(vector, &update->doc_id);
 	}
 
-	mutex_exit((mutex_t*) &cache->deleted_lock);
+	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
 }
 
 /*********************************************************************//**
@@ -5043,11 +5092,11 @@ UNIV_INTERN
 void
 fts_add_doc_id_column(
 /*==================*/
-	dict_table_t*	table)		/*!< in/out: Table with FTS index */
+	dict_table_t*	table,	/*!< in/out: Table with FTS index */
+	mem_heap_t*	heap)	/*!< in: temporary memory heap, or NULL */
 {
 	dict_mem_table_add_col(
-		table,
-		table->heap,
+		table, heap,
 		FTS_DOC_ID_COL_NAME,
 		DATA_INT,
 		dtype_form_prtype(
@@ -5069,7 +5118,7 @@ fts_update_doc_id(
 	doc_id_t*	next_doc_id)	/*!< in/out: buffer for writing */
 {
 	doc_id_t	doc_id;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 
 	if (*next_doc_id) {
 		doc_id = *next_doc_id;
@@ -5236,13 +5285,12 @@ fts_savepoint_copy(
 
 		ftt_dst = fts_trx_table_clone(*ftt_src);
 
-		rbt_insert(dst->tables, &ftt_dst->table->id, &ftt_dst);
+		rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
 	}
 }
 
 /*********************************************************************//**
-Take a FTS savepoint.
-@return DB_SUCCESS or error code */
+Take a FTS savepoint. */
 UNIV_INTERN
 void
 fts_savepoint_take(
@@ -5312,7 +5360,6 @@ fts_savepoint_release(
 	const char*	name)		/*!< in: savepoint name */
 {
 	ulint			i;
-	fts_savepoint_t*	prev;
 	ib_vector_t*		savepoints;
 	ulint			top_of_stack = 0;
 
@@ -5322,9 +5369,6 @@ fts_savepoint_release(
 
 	ut_a(ib_vector_size(savepoints) > 0);
 
-	prev = static_cast<fts_savepoint_t*>(
-		ib_vector_get(savepoints, top_of_stack));
-
 	/* Skip the implied savepoint (first element). */
 	for (i = 1; i < ib_vector_size(savepoints); ++i) {
 		fts_savepoint_t*	savepoint;
@@ -5338,17 +5382,6 @@ fts_savepoint_release(
 		we have to skip deleted/released entries. */
 		if (savepoint->name != NULL
 		    && strcmp(name, savepoint->name) == 0) {
-
-			fts_savepoint_t*	last;
-			fts_savepoint_t		temp;
-
-			last = static_cast<fts_savepoint_t*>(
-				ib_vector_last(savepoints));
-
-			/* Swap the entries. */
-			memcpy(&temp, last, sizeof(temp));
-			memcpy(last, prev, sizeof(*last));
-			memcpy(prev, &temp, sizeof(prev));
 			break;
 
 		/* Track the previous savepoint instance that will
@@ -5357,8 +5390,6 @@ fts_savepoint_release(
 			/* We need to delete all entries
 			greater than this element. */
 			top_of_stack = i;
-
-			prev = savepoint;
 		}
 	}
 
@@ -5395,8 +5426,7 @@ fts_savepoint_release(
 }
 
 /**********************************************************************//**
-Refresh last statement savepoint.
-@return DB_SUCCESS or error code */
+Refresh last statement savepoint. */
 UNIV_INTERN
 void
 fts_savepoint_laststmt_refresh(
@@ -5588,7 +5618,7 @@ static
 ibool
 fts_is_aux_table_name(
 /*==================*/
-	fts_sys_table_t*table,		/*!< out: table info */
+	fts_aux_table_t*table,		/*!< out: table info */
 	const char*	name,		/*!< in: table name */
 	ulint		len)		/*!< in: length of table name */
 {
@@ -5614,7 +5644,6 @@ fts_is_aux_table_name(
 	if (ptr != NULL && len > 20 && strncmp(ptr, "FTS_", 4) == 0) {
 		ulint		i;
 
-
 		/* Skip the prefix. */
 		ptr += 4;
 		len -= 4;
@@ -5689,7 +5718,7 @@ fts_read_tables(
 	void*		user_arg)	/*!< in: pointer to ib_vector_t */
 {
 	int		i;
-	fts_sys_table_t*table;
+	fts_aux_table_t*table;
 	mem_heap_t*	heap;
 	ibool		done = FALSE;
 	ib_vector_t*	tables = static_cast<ib_vector_t*>(user_arg);
@@ -5701,7 +5730,7 @@ fts_read_tables(
 
 	/* We will use this heap for allocating strings. */
 	heap = static_cast<mem_heap_t*>(tables->allocator->arg);
-	table = static_cast<fts_sys_table_t*>(ib_vector_push(tables, NULL));
+	table = static_cast<fts_aux_table_t*>(ib_vector_push(tables, NULL));
 
 	memset(table, 0x0, sizeof(*table));
 
@@ -5726,9 +5755,9 @@ fts_read_tables(
 			}
 
 			table->name = static_cast<char*>(
-				mem_heap_dup(heap, data, len + 1));
-			table->name[len] = '\0';
-			printf("Found [%.*s]\n", (int) len, table->name);
+				mem_heap_alloc(heap, len + 1));
+			memcpy(table->name, data, len);
+			table->name[len] = 0;
 			break;
 
 		case 1: /* ID */
@@ -5749,41 +5778,41 @@ fts_read_tables(
 Check and drop all orphaned FTS auxiliary tables, those that don't have
 a parent table or FTS index defined on them.
 @return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull))
+void
 fts_check_and_drop_orphaned_tables(
 /*===============================*/
 	trx_t*		trx,			/*!< in: transaction */
 	ib_vector_t*	tables)			/*!< in: tables to check */
 {
-	ulint		i;
-	ulint		error = DB_SUCCESS;
-
-	for (i = 0; i < ib_vector_size(tables); ++i) {
+	for (ulint i = 0; i < ib_vector_size(tables); ++i) {
 		dict_table_t*		table;
-		fts_sys_table_t*	sys_table;
-		ibool			drop = FALSE;
+		fts_aux_table_t*	aux_table;
+		bool			drop = false;
 
-		sys_table = static_cast<fts_sys_table_t*>(
+		aux_table = static_cast<fts_aux_table_t*>(
 			ib_vector_get(tables, i));
 
-		table = dict_table_open_on_id(sys_table->parent_id, FALSE);
+		table = dict_table_open_on_id(
+			aux_table->parent_id, TRUE, FALSE);
 
 		if (table == NULL || table->fts == NULL) {
 
-			drop = TRUE;
+			drop = true;
 
-		} else if (sys_table->index_id != 0) {
-			ulint		j;
+		} else if (aux_table->index_id != 0) {
 			index_id_t	id;
-			fts_t*	fts;
+			fts_t*		fts;
 
-			drop = TRUE;
+			drop = true;
 			fts = table->fts;
-			id = sys_table->index_id;
+			id = aux_table->index_id;
 
 			/* Search for the FT index in the table's list. */
-			for (j = 0; j < ib_vector_size(fts->indexes); ++j) {
+			for (ulint j = 0;
+			     j < ib_vector_size(fts->indexes);
+			     ++j) {
+
 				const dict_index_t*	index;
 
 				index = static_cast<const dict_index_t*>(
@@ -5791,28 +5820,36 @@ fts_check_and_drop_orphaned_tables(
 
 				if (index->id == id) {
 
-					drop = FALSE;
+					drop = false;
 					break;
 				}
 			}
 		}
 
 		if (table) {
-			dict_table_close(table, FALSE);
+			dict_table_close(table, TRUE, FALSE);
 		}
 
 		if (drop) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, "  InnoDB: Warning: Parent table of "
-				"FT auxiliary table %s not found.\n",
-				sys_table->name);
 
-			/* We ignore drop errors. */
-			fts_drop_table(trx, sys_table->name);
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Parent table of FTS auxiliary table %s not "
+				"found.", aux_table->name);
+
+			dberr_t	err = fts_drop_table(trx, aux_table->name);
+
+			if (err == DB_FAIL) {
+				char*	path;
+
+				path = fil_make_ibd_name(
+					aux_table->name, false);
+
+				os_file_delete_if_exists(path);
+
+				mem_free(path);
+			}
 		}
 	}
-
-	return(error);
 }
 
 /**********************************************************************//**
@@ -5823,19 +5860,62 @@ void
 fts_drop_orphaned_tables(void)
 /*==========================*/
 {
-	trx_t*		trx;
-	pars_info_t*	info;
-	mem_heap_t*	heap;
-	que_t*		graph;
-	ib_vector_t*	tables;
-	ib_alloc_t*	heap_alloc;
-	ulint		error = DB_SUCCESS;
+	trx_t*			trx;
+	pars_info_t*		info;
+	mem_heap_t*		heap;
+	que_t*			graph;
+	ib_vector_t*		tables;
+	ib_alloc_t*		heap_alloc;
+	space_name_list_t	space_name_list;
+	dberr_t			error = DB_SUCCESS;
+
+	/* Note: We have to free the memory after we are done with the list. */
+	error = fil_get_space_names(space_name_list);
+
+	if (error == DB_OUT_OF_MEMORY) {
+		ib_logf(IB_LOG_LEVEL_ERROR, "Out of memory");
+		ut_error;
+	}
 
 	heap = mem_heap_create(1024);
 	heap_alloc = ib_heap_allocator_create(heap);
 
 	/* We store the table ids of all the FTS indexes that were found. */
-	tables = ib_vector_create(heap_alloc, sizeof(fts_sys_table_t), 128);
+	tables = ib_vector_create(heap_alloc, sizeof(fts_aux_table_t), 128);
+
+	/* Get the list of all known .ibd files and check for orphaned
+	FTS auxiliary files in that list. We need to remove them because
+	users can't map them back to table names and this will create
+	unnecessary clutter. */
+
+	for (space_name_list_t::iterator it = space_name_list.begin();
+	     it != space_name_list.end();
+	     ++it) {
+
+		fts_aux_table_t*	fts_aux_table;
+
+		fts_aux_table = static_cast<fts_aux_table_t*>(
+			ib_vector_push(tables, NULL));
+
+		memset(fts_aux_table, 0x0, sizeof(*fts_aux_table));
+
+		if (!fts_is_aux_table_name(fts_aux_table, *it, strlen(*it))) {
+			ib_vector_pop(tables);
+		} else {
+			ulint	len = strlen(*it);
+
+			fts_aux_table->id = fil_get_space_id_for_table(*it);
+
+			/* We got this list from fil0fil.cc. The tablespace
+			with this name must exist. */
+			ut_a(fts_aux_table->id != ULINT_UNDEFINED);
+
+			fts_aux_table->name = static_cast<char*>(
+				mem_heap_dup(heap, *it, len + 1));
+
+			fts_aux_table->name[len] = 0;
+		}
+	}
 
 	trx = trx_allocate_for_background();
 	trx->op_info = "dropping orphaned FTS tables";
@@ -5867,10 +5947,7 @@ fts_drop_orphaned_tables(void)
 		error = fts_eval_sql(trx, graph);
 
 		if (error == DB_SUCCESS) {
-			error = fts_check_and_drop_orphaned_tables(trx, tables);
-		}
-
-		if (error == DB_SUCCESS) {
+			fts_check_and_drop_orphaned_tables(trx, tables);
 			fts_sql_commit(trx);
 			break;				/* Exit the loop. */
 		} else {
@@ -5881,15 +5958,15 @@ fts_drop_orphaned_tables(void)
 			ut_print_timestamp(stderr);
 
 			if (error == DB_LOCK_WAIT_TIMEOUT) {
-				fprintf(stderr, "  InnoDB: Warning: lock wait "
-					"timeout reading SYS_TABLES. "
-					"Retrying!\n");
+				ib_logf(IB_LOG_LEVEL_WARN,
+					"lock wait timeout reading SYS_TABLES. "
+					"Retrying!");
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, "  InnoDB: Error: %lu "
-					"while reading SYS_TABLES.\n",
-					error);
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"(%s) while reading SYS_TABLES.",
+					ut_strerr(error));
 
 				break;			/* Exit the loop. */
 			}
@@ -5905,6 +5982,14 @@ fts_drop_orphaned_tables(void)
 	if (heap != NULL) {
 		mem_heap_free(heap);
 	}
+
+	/** Free the memory allocated to store the .ibd names. */
+	for (space_name_list_t::iterator it = space_name_list.begin();
+	     it != space_name_list.end();
+	     ++it) {
+
+		delete[] *it;
+	}
 }
 
 /**********************************************************************//**
@@ -5986,7 +6071,7 @@ fts_load_stopword(
 {
 	fts_table_t	fts_table;
 	fts_string_t	str;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	ulint		use_stopword;
 	fts_cache_t*	cache;
 	const char*	stopword_to_use = NULL;
@@ -6086,6 +6171,43 @@ cleanup:
 
 /**********************************************************************//**
 Callback function when we initialize the FTS at the start up
+time. It recovers the maximum Doc IDs presented in the current table.
+@return: always returns TRUE */
+static
+ibool
+fts_init_get_doc_id(
+/*================*/
+	void*	row,			/*!< in: sel_node_t* */
+	void*	user_arg)		/*!< in: fts cache */
+{
+	doc_id_t	doc_id = FTS_NULL_DOC_ID;
+	sel_node_t*	node = static_cast<sel_node_t*>(row);
+	que_node_t*	exp = node->select_list;
+	fts_cache_t*    cache = static_cast<fts_cache_t*>(user_arg);
+
+	ut_ad(ib_vector_is_empty(cache->get_docs));
+
+	/* Copy each indexed column content into doc->text.f_str */
+	if (exp) {
+		dfield_t*	dfield = que_node_get_val(exp);
+		dtype_t*        type = dfield_get_type(dfield);
+		void*           data = dfield_get_data(dfield);
+
+		ut_a(dtype_get_mtype(type) == DATA_INT);
+
+		doc_id = static_cast<doc_id_t>(mach_read_from_8(
+			static_cast<const byte*>(data)));
+
+		if (doc_id >= cache->next_doc_id) {
+			cache->next_doc_id = doc_id + 1;
+		}
+	}
+
+	return(TRUE);
+}
+
+/**********************************************************************//**
+Callback function when we initialize the FTS at the start up
 time. It recovers Doc IDs that have not sync-ed to the auxiliary
 table, and require to bring them back into FTS index.
 @return: always returns TRUE */
@@ -6100,22 +6222,16 @@ fts_init_recover_doc(
 	fts_doc_t       doc;
 	ulint		doc_len = 0;
 	ulint		field_no = 0;
-	ibool		has_fts = TRUE;
-	fts_get_doc_t*  get_doc = NULL;
+	fts_get_doc_t*  get_doc = static_cast<fts_get_doc_t*>(user_arg);
 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
 	sel_node_t*	node = static_cast<sel_node_t*>(row);
 	que_node_t*	exp = node->select_list;
-	fts_cache_t*    cache = static_cast<fts_cache_t*>(user_arg);
+	fts_cache_t*	cache = get_doc->cache;
 
-	if (ib_vector_is_empty(cache->get_docs)) {
-		has_fts = FALSE;
-	} else {
-		get_doc = static_cast<fts_get_doc_t*>(
-			ib_vector_get(cache->get_docs, 0));
+	fts_doc_init(&doc);
+	doc.found = TRUE;
 
-		fts_doc_init(&doc);
-		doc.found = TRUE;
-	}
+	ut_ad(cache);
 
 	/* Copy each indexed column content into doc->text.f_str */
 	while (exp) {
@@ -6131,18 +6247,11 @@ fts_init_recover_doc(
 			doc_id = static_cast<doc_id_t>(mach_read_from_8(
 				static_cast<const byte*>(data)));
 
-			/* Just need to fetch the Doc ID */
-			if (!has_fts) {
-				goto func_exit;
-			}
-
 			field_no++;
 			exp = que_node_get_next(exp);
 			continue;
 		}
 
-		ut_a(has_fts);
-
 		if (len == UNIV_SQL_NULL) {
 			exp = que_node_get_next(exp);
 			continue;
@@ -6196,7 +6305,6 @@ fts_init_recover_doc(
 
 	cache->added++;
 
-func_exit:
 	if (doc_id >= cache->next_doc_id) {
 		cache->next_doc_id = doc_id + 1;
 	}
@@ -6223,6 +6331,9 @@ fts_init_index(
 	fts_get_doc_t*  get_doc = NULL;
 	ibool		has_fts = TRUE;
 	fts_cache_t*    cache = table->fts->cache;
+	bool		need_init = false;
+
+	ut_ad(!mutex_own(&dict_sys->mutex));
 
 	/* First check cache->get_docs is initialized */
 	if (!has_cache_lock) {
@@ -6239,6 +6350,8 @@ fts_init_index(
 		goto func_exit;
 	}
 
+	need_init = true;
+
 	start_doc = cache->synced_doc_id;
 
 	if (!start_doc) {
@@ -6250,28 +6363,32 @@ fts_init_index(
 	dropped, and we re-initialize the Doc ID system for subsequent
 	insertion */
 	if (ib_vector_is_empty(cache->get_docs)) {
-		index = dict_table_get_first_index(table);
+		index = dict_table_get_index_on_name(table, FTS_DOC_ID_INDEX_NAME);
+
+		ut_a(index);
+
 		has_fts = FALSE;
+		fts_doc_fetch_by_doc_id(NULL, start_doc, index,
+					FTS_FETCH_DOC_BY_ID_LARGE,
+					fts_init_get_doc_id, cache);
 	} else {
-		/* We only have one FTS index per table */
-		get_doc = static_cast<fts_get_doc_t*>(
-			ib_vector_get(cache->get_docs, 0));
+		for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
+			get_doc = static_cast<fts_get_doc_t*>(
+				ib_vector_get(cache->get_docs, i));
 
-		index = get_doc->index_cache->index;
-	}
+			index = get_doc->index_cache->index;
 
-	fts_doc_fetch_by_doc_id(NULL, start_doc, index,
-				FTS_FETCH_DOC_BY_ID_LARGE,
-				fts_init_recover_doc, cache);
+			fts_doc_fetch_by_doc_id(NULL, start_doc, index,
+						FTS_FETCH_DOC_BY_ID_LARGE,
+						fts_init_recover_doc, get_doc);
+		}
+	}
 
 	if (has_fts) {
 		if (table->fts->cache->stopword_info.status
 		    & STOPWORD_NOT_INIT) {
 			fts_load_stopword(table, NULL, NULL, NULL, TRUE, TRUE);
 		}
-
-		/* Register the table with the optimize thread. */
-		fts_optimize_add_table(table);
 	}
 
 	table->fts->fts_status |= ADDED_TABLE_SYNCED;
@@ -6283,5 +6400,12 @@ func_exit:
 		rw_lock_x_unlock(&cache->lock);
 	}
 
+	if (need_init) {
+		mutex_enter(&dict_sys->mutex);
+		/* Register the table with the optimize thread. */
+		fts_optimize_add_table(table);
+		mutex_exit(&dict_sys->mutex);
+	}
+
 	return(TRUE);
 }
diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc
index 92e040d2715..9abeeccac91 100644
--- a/storage/innobase/fts/fts0opt.cc
+++ b/storage/innobase/fts/fts0opt.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -39,26 +39,29 @@ Completed 2011/7/10 Sunny and Jimmy Yang
 #include "fts0vlc.ic"
 #endif
 
-/* The FTS optimize thread's work queue. */
+/** The FTS optimize thread's work queue. */
 static ib_wqueue_t* fts_optimize_wq;
 
-/* The number of document ids to delete in one statement. */
+/** The number of document ids to delete in one statement. */
 static const ulint FTS_MAX_DELETE_DOC_IDS = 1000;
 
-/* Time to wait for a message. */
+/** Time to wait for a message. */
 static const ulint FTS_QUEUE_WAIT_IN_USECS = 5000000;
 
-/* Default optimize interval in secs. */
+/** Default optimize interval in secs. */
 static const ulint FTS_OPTIMIZE_INTERVAL_IN_SECS = 300;
 
+/** Server is shutting down, so does we exiting the optimize thread */
+static bool fts_opt_start_shutdown = false;
+
 #if 0
-/* Check each table in round robin to see whether they'd
+/** Check each table in round robin to see whether they'd
 need to be "optimized" */
 static	ulint	fts_optimize_sync_iterator = 0;
 #endif
 
 /** State of a table within the optimization sub system. */
-enum fts_state_enum {
+enum fts_state_t {
 	FTS_STATE_LOADED,
 	FTS_STATE_RUNNING,
 	FTS_STATE_SUSPENDED,
@@ -67,7 +70,7 @@ enum fts_state_enum {
 };
 
 /** FTS optimize thread message types. */
-enum fts_msg_type_enum {
+enum fts_msg_type_t {
 	FTS_MSG_START,			/*!< Start optimizing thread */
 
 	FTS_MSG_PAUSE,			/*!< Pause optimizing thread */
@@ -83,21 +86,9 @@ enum fts_msg_type_enum {
 					threads work queue */
 };
 
-typedef enum fts_state_enum fts_state_t;
-typedef	struct fts_zip_struct fts_zip_t;
-typedef struct fts_msg_struct fts_msg_t;
-typedef struct fts_slot_struct fts_slot_t;
-typedef struct fts_encode_struct fts_encode_t;
-typedef enum fts_msg_type_enum fts_msg_type_t;
-typedef struct fts_msg_del_struct fts_msg_del_t;
-typedef struct fts_msg_stop_struct fts_msg_stop_t;
-typedef struct fts_optimize_struct fts_optimize_t;
-typedef struct fts_msg_optimize_struct fts_msg_optimize_t;
-typedef struct fts_optimize_graph_struct fts_optimize_graph_t;
-
 /** Compressed list of words that have been read from FTS INDEX
 that needs to be optimized. */
-struct fts_zip_struct {
+struct fts_zip_t {
 	ulint		status;		/*!< Status of (un)/zip operation */
 
 	ulint		n_words;	/*!< Number of words compressed */
@@ -128,7 +119,7 @@ struct fts_zip_struct {
 };
 
 /** Prepared statemets used during optimize */
-struct fts_optimize_graph_struct {
+struct fts_optimize_graph_t {
 					/*!< Delete a word from FTS INDEX */
 	que_t*		delete_nodes_graph;
 					/*!< Insert a word into FTS INDEX */
@@ -140,7 +131,7 @@ struct fts_optimize_graph_struct {
 };
 
 /** Used by fts_optimize() to store state. */
-struct fts_optimize_struct {
+struct fts_optimize_t {
 	trx_t*		trx;		/*!< The transaction used for all SQL */
 
 	ib_alloc_t*	self_heap;	/*!< Heap to use for allocations */
@@ -183,14 +174,14 @@ struct fts_optimize_struct {
 };
 
 /** Used by the optimize, to keep state during compacting nodes. */
-struct fts_encode_struct {
+struct fts_encode_t {
 	doc_id_t	src_last_doc_id;/*!< Last doc id read from src node */
 	byte*		src_ilist_ptr;	/*!< Current ptr within src ilist */
 };
 
 /** We use this information to determine when to start the optimize
 cycle for a table. */
-struct fts_slot_struct {
+struct fts_slot_t {
 	dict_table_t*	table;		/*!< Table to optimize */
 
 	fts_state_t	state;		/*!< State of this slot */
@@ -210,7 +201,7 @@ struct fts_slot_struct {
 };
 
 /** A table remove message for the FTS optimize thread. */
-struct fts_msg_del_struct {
+struct fts_msg_del_t {
 	dict_table_t*	table;		/*!< The table to remove */
 
 	os_event_t	event;		/*!< Event to synchronize acknowledgement
@@ -219,12 +210,12 @@ struct fts_msg_del_struct {
 };
 
 /** Stop the optimize thread. */
-struct fts_msg_optimize_struct {
+struct fts_msg_optimize_t {
 	dict_table_t*	table;		/*!< Table to optimize */
 };
 
 /** The FTS optimize message work queue message type. */
-struct fts_msg_struct {
+struct fts_msg_t {
 	fts_msg_type_t	type;		/*!< Message type */
 
 	void*		ptr;		/*!< The message contents */
@@ -466,9 +457,9 @@ fts_optimize_index_fetch_node(
 
 /**********************************************************************//**
 Read the rows from the FTS inde.
-@return vector of rows fetched */
+@return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_index_fetch_nodes(
 /*==================*/
 	trx_t*		trx,		/*!< in: transaction */
@@ -479,7 +470,7 @@ fts_index_fetch_nodes(
 	fts_fetch_t*	fetch)		/*!< in: fetch callback.*/
 {
 	pars_info_t*	info;
-	ulint		error;
+	dberr_t		error;
 
 	trx->op_info = "fetching FTS index nodes";
 
@@ -543,8 +534,9 @@ fts_index_fetch_nodes(
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, " InnoDB: Error: %lu "
-					"while reading FTS index.\n", error);
+				fprintf(stderr, " InnoDB: Error: (%s) "
+					"while reading FTS index.\n",
+					ut_strerr(error));
 
 				break;			/* Exit the loop. */
 			}
@@ -781,8 +773,8 @@ fts_zip_deflate_end(
 Read the words from the FTS INDEX.
 @return DB_SUCCESS if all OK, DB_TABLE_NOT_FOUND if no more indexes
         to search else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_index_fetch_words(
 /*==================*/
 	fts_optimize_t*		optim,	/*!< in: optimize scratch pad */
@@ -794,7 +786,7 @@ fts_index_fetch_words(
 	que_t*		graph;
 	ulint		selected;
 	fts_zip_t*	zip = NULL;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	mem_heap_t*	heap = static_cast<mem_heap_t*>(optim->self_heap->arg);
 	ibool		inited = FALSE;
 
@@ -849,13 +841,14 @@ fts_index_fetch_words(
 		zip = optim->zip;
 
 		for(;;) {
+			int	err;
 
-			if (!inited && ((error = deflateInit(zip->zp, 9))
+			if (!inited && ((err = deflateInit(zip->zp, 9))
 					!= Z_OK)) {
 				ut_print_timestamp(stderr);
 				fprintf(stderr,
 					" InnoDB: Error: ZLib deflateInit() "
-					"failed: %lu\n", error);
+					"failed: %d\n", err);
 
 				error = DB_ERROR;
 				break;
@@ -885,9 +878,9 @@ fts_index_fetch_words(
 
 					optim->trx->error_state = DB_SUCCESS;
 				} else {
-					fprintf(stderr, " InnoDB: Error: %lu "
+					fprintf(stderr, " InnoDB: Error: (%s) "
 						"while reading document.\n",
-						error);
+						ut_strerr(error));
 
 					break;	/* Exit the loop. */
 				}
@@ -962,14 +955,14 @@ fts_fetch_doc_ids(
 Read the rows from a FTS common auxiliary table.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_table_fetch_doc_ids(
 /*====================*/
 	trx_t*		trx,		/*!< in: transaction */
 	fts_table_t*	fts_table,	/*!< in: table */
 	fts_doc_ids_t*	doc_ids)	/*!< in: For collecting doc ids */
 {
-	ulint		error;
+	dberr_t		error;
 	que_t*		graph;
 	pars_info_t*	info = pars_info_create();
 	ibool		alloc_bk_trx = FALSE;
@@ -1114,8 +1107,8 @@ fts_optimize_lookup(
 /**********************************************************************//**
 Encode the word pos list into the node
 @return DB_SUCCESS or error code*/
-static
-ulint
+static __attribute__((nonnull))
+dberr_t
 fts_optimize_encode_node(
 /*=====================*/
 	fts_node_t*	node,		/*!< in: node to fill*/
@@ -1126,7 +1119,7 @@ fts_optimize_encode_node(
 	ulint		enc_len;
 	ulint		pos_enc_len;
 	doc_id_t	doc_id_delta;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	byte*		src = enc->src_ilist_ptr;
 
 	if (node->first_doc_id == 0) {
@@ -1202,8 +1195,8 @@ fts_optimize_encode_node(
 /**********************************************************************//**
 Optimize the data contained in a node.
 @return DB_SUCCESS or error code*/
-static
-ulint
+static __attribute__((nonnull))
+dberr_t
 fts_optimize_node(
 /*==============*/
 	ib_vector_t*	del_vec,	/*!< in: vector of doc ids to delete*/
@@ -1213,7 +1206,7 @@ fts_optimize_node(
 	fts_encode_t*	enc)		/*!< in: encoding state */
 {
 	ulint		copied;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	doc_id_t	doc_id = enc->src_last_doc_id;
 
 	if (!enc->src_ilist_ptr) {
@@ -1299,8 +1292,8 @@ test_again:
 
 /**********************************************************************//**
 Determine the starting pos within the deleted doc id vector for a word.
-@return DB_SUCCESS or error code */
-static
+@return delete position */
+static __attribute__((nonnull, warn_unused_result))
 int
 fts_optimize_deleted_pos(
 /*=====================*/
@@ -1428,8 +1421,8 @@ fts_optimize_word(
 /**********************************************************************//**
 Update the FTS index table. This is a delete followed by an insert.
 @return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_write_word(
 /*====================*/
 	trx_t*		trx,		/*!< in: transaction */
@@ -1441,7 +1434,7 @@ fts_optimize_write_word(
 	pars_info_t*	info;
 	que_t*		graph;
 	ulint		selected;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	char*		table_name = fts_get_table_name(fts_table);
 
 	info = pars_info_create();
@@ -1470,8 +1463,9 @@ fts_optimize_write_word(
 
 	if (error != DB_SUCCESS) {
 		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Error: (%lu) during optimize, "
-			"when deleting a word from the FTS index.\n", error);
+		fprintf(stderr, " InnoDB: Error: (%s) during optimize, "
+			"when deleting a word from the FTS index.\n",
+			ut_strerr(error));
 	}
 
 	fts_que_graph_free(graph);
@@ -1491,9 +1485,10 @@ fts_optimize_write_word(
 
 			if (error != DB_SUCCESS) {
 				ut_print_timestamp(stderr);
-				fprintf(stderr, " InnoDB: Error: (%lu) "
+				fprintf(stderr, " InnoDB: Error: (%s) "
 					"during optimize, while adding a "
-					"word to the FTS index.\n", error);
+					"word to the FTS index.\n",
+					ut_strerr(error));
 			}
 		}
 
@@ -1529,8 +1524,8 @@ fts_word_free(
 /**********************************************************************//**
 Optimize the word ilist and rewrite data to the FTS index.
 @return status one of RESTART, EXIT, ERROR */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_compact(
 /*=================*/
 	fts_optimize_t*	optim,		/*!< in: optimize state data */
@@ -1538,7 +1533,7 @@ fts_optimize_compact(
 	ib_time_t	start_time)	/*!< in: optimize start time */
 {
 	ulint		i;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	ulint		size = ib_vector_size(optim->words);
 
 	for (i = 0; i < size && error == DB_SUCCESS && !optim->done; ++i) {
@@ -1622,77 +1617,63 @@ fts_optimize_create(
 /**********************************************************************//**
 Get optimize start time of an FTS index.
 @return DB_SUCCESS if all OK else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_get_index_start_time(
 /*==============================*/
 	trx_t*		trx,			/*!< in: transaction */
 	dict_index_t*	index,			/*!< in: FTS index */
 	ib_time_t*	start_time)		/*!< out: time in secs */
 {
-	ulint		error;
-
-	error = fts_config_get_index_ulint(
-		trx, index, FTS_OPTIMIZE_START_TIME, (ulint*) start_time);
-
-	return(error);
+	return(fts_config_get_index_ulint(
+		       trx, index, FTS_OPTIMIZE_START_TIME,
+		       (ulint*) start_time));
 }
 
 /**********************************************************************//**
 Set the optimize start time of an FTS index.
 @return DB_SUCCESS if all OK else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_set_index_start_time(
 /*==============================*/
 	trx_t*		trx,			/*!< in: transaction */
 	dict_index_t*	index,			/*!< in: FTS index */
 	ib_time_t	start_time)		/*!< in: start time */
 {
-	ulint		error;
-
-	error = fts_config_set_index_ulint(
-		trx, index, FTS_OPTIMIZE_START_TIME, (ulint) start_time);
-
-	return(error);
+	return(fts_config_set_index_ulint(
+		       trx, index, FTS_OPTIMIZE_START_TIME,
+		       (ulint) start_time));
 }
 
 /**********************************************************************//**
 Get optimize end time of an FTS index.
 @return DB_SUCCESS if all OK else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_get_index_end_time(
 /*============================*/
 	trx_t*		trx,			/*!< in: transaction */
 	dict_index_t*	index,			/*!< in: FTS index */
 	ib_time_t*	end_time)		/*!< out: time in secs */
 {
-	ulint		error;
-
-	error = fts_config_get_index_ulint(
-		trx, index, FTS_OPTIMIZE_END_TIME, (ulint*) end_time);
-
-	return(error);
+	return(fts_config_get_index_ulint(
+		       trx, index, FTS_OPTIMIZE_END_TIME, (ulint*) end_time));
 }
 
 /**********************************************************************//**
 Set the optimize end time of an FTS index.
 @return DB_SUCCESS if all OK else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_set_index_end_time(
 /*============================*/
 	trx_t*		trx,			/*!< in: transaction */
 	dict_index_t*	index,			/*!< in: FTS index */
 	ib_time_t	end_time)		/*!< in: end time */
 {
-	ulint		error;
-
-	error = fts_config_set_index_ulint(
-		trx, index, FTS_OPTIMIZE_END_TIME, (ulint) end_time);
-
-	return(error);
+	return(fts_config_set_index_ulint(
+		       trx, index, FTS_OPTIMIZE_END_TIME, (ulint) end_time));
 }
 #endif
 
@@ -1798,7 +1779,7 @@ fts_optimize_words(
 	fprintf(stderr, "%.*s\n", (int) word->f_len, word->f_str);
 
 	while(!optim->done) {
-		ulint	error;
+		dberr_t	error;
 		trx_t*	trx = optim->trx;
 		ulint	selected;
 
@@ -1901,15 +1882,15 @@ fts_optimize_set_next_word(
 Optimize is complete. Set the completion time, and reset the optimize
 start string for this FTS index to "".
 @return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_index_completed(
 /*=========================*/
 	fts_optimize_t*	optim,	/*!< in: optimize instance */
 	dict_index_t*	index)	/*!< in: table with one FTS index */
 {
 	fts_string_t	word;
-	ulint		error;
+	dberr_t		error;
 	byte		buf[sizeof(ulint)];
 #ifdef FTS_OPTIMIZE_DEBUG
 	ib_time_t	end_time = ut_time();
@@ -1929,8 +1910,8 @@ fts_optimize_index_completed(
 
 	if (error != DB_SUCCESS) {
 
-		fprintf(stderr, "InnoDB: Error: (%lu) while "
-			"updating last optimized word!\n", error);
+		fprintf(stderr, "InnoDB: Error: (%s) while "
+			"updating last optimized word!\n", ut_strerr(error));
 	}
 
 	return(error);
@@ -1941,15 +1922,15 @@ fts_optimize_index_completed(
 Read the list of words from the FTS auxiliary index that will be
 optimized in this pass.
 @return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_index_read_words(
 /*==========================*/
 	fts_optimize_t*	optim,	/*!< in: optimize instance */
 	dict_index_t*	index,	/*!< in: table with one FTS index */
 	fts_string_t*	word)	/*!< in: buffer to use */
 {
-	ulint		error = DB_SUCCESS;
+	dberr_t	error = DB_SUCCESS;
 
 	if (optim->del_list_regenerated) {
 		word->f_len = 0;
@@ -1998,15 +1979,15 @@ fts_optimize_index_read_words(
 Run OPTIMIZE on the given FTS index. Note: this can take a very long
 time (hours).
 @return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_index(
 /*===============*/
 	fts_optimize_t*	optim,	/*!< in: optimize instance */
 	dict_index_t*	index)	/*!< in: table with one FTS index */
 {
 	fts_string_t	word;
-	ulint		error;
+	dberr_t		error;
 	byte		str[FTS_MAX_WORD_LEN + 1];
 
 	/* Set the current index that we have to optimize. */
@@ -2069,8 +2050,8 @@ fts_optimize_index(
 /**********************************************************************//**
 Delete the document ids in the delete, and delete cache tables.
 @return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_purge_deleted_doc_ids(
 /*===============================*/
 	fts_optimize_t*	optim)	/*!< in: optimize instance */
@@ -2081,7 +2062,7 @@ fts_optimize_purge_deleted_doc_ids(
 	fts_update_t*	update;
 	char*		sql_str;
 	doc_id_t	write_doc_id;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 
 	info = pars_info_create();
 
@@ -2138,13 +2119,13 @@ fts_optimize_purge_deleted_doc_ids(
 /**********************************************************************//**
 Delete the document ids in the pending delete, and delete tables.
 @return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_purge_deleted_doc_id_snapshot(
 /*=======================================*/
 	fts_optimize_t*	optim)	/*!< in: optimize instance */
 {
-	ulint		error;
+	dberr_t		error;
 	que_t*		graph;
 	char*		sql_str;
 
@@ -2188,13 +2169,13 @@ Copy the deleted doc ids that will be purged during this optimize run
 to the being deleted FTS auxiliary tables. The transaction is committed
 upon successfull copy and rolled back on DB_DUPLICATE_KEY error.
 @return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_create_deleted_doc_id_snapshot(
 /*========================================*/
 	fts_optimize_t*	optim)	/*!< in: optimize instance */
 {
-	ulint		error;
+	dberr_t		error;
 	que_t*		graph;
 	char*		sql_str;
 
@@ -2226,13 +2207,13 @@ fts_optimize_create_deleted_doc_id_snapshot(
 Read in the document ids that are to be purged during optimize. The
 transaction is committed upon successfully read.
 @return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_read_deleted_doc_id_snapshot(
 /*======================================*/
 	fts_optimize_t*	optim)	/*!< in: optimize instance */
 {
-	ulint		error;
+	dberr_t		error;
 
 	optim->fts_common_table.suffix = "BEING_DELETED";
 
@@ -2263,14 +2244,14 @@ Optimze all the FTS indexes, skipping those that have already been
 optimized, since the FTS auxiliary indexes are not guaranteed to be
 of the same cardinality.
 @return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_indexes(
 /*=================*/
 	fts_optimize_t*	optim)	/*!< in: optimize instance */
 {
 	ulint		i;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	fts_t*		fts = optim->table->fts;
 
 	/* Optimize the FTS indexes. */
@@ -2333,13 +2314,13 @@ fts_optimize_indexes(
 /*********************************************************************//**
 Cleanup the snapshot tables and the master deleted table.
 @return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_purge_snapshot(
 /*========================*/
 	fts_optimize_t*	optim)	/*!< in: optimize instance */
 {
-	ulint		error;
+	dberr_t		error;
 
 	/* Delete the doc ids from the master deleted tables, that were
 	in the snapshot that was taken at the start of optimize. */
@@ -2362,13 +2343,13 @@ fts_optimize_purge_snapshot(
 /*********************************************************************//**
 Reset the start time to 0 so that a new optimize can be started.
 @return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_optimize_reset_start_time(
 /*==========================*/
 	fts_optimize_t*	optim)	/*!< in: optimize instance */
 {
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 #ifdef FTS_OPTIMIZE_DEBUG
 	fts_t*		fts = optim->table->fts;
 
@@ -2401,13 +2382,13 @@ fts_optimize_reset_start_time(
 /*********************************************************************//**
 Run OPTIMIZE on the given table by a background thread.
 @return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull))
+dberr_t
 fts_optimize_table_bk(
 /*==================*/
 	fts_slot_t*	slot)	/*!< in: table to optimiza */
 {
-	ulint		error;
+	dberr_t		error;
 	dict_table_t*	table = slot->table;
 	fts_t*		fts = table->fts;
 
@@ -2440,12 +2421,12 @@ fts_optimize_table_bk(
 Run OPTIMIZE on the given table.
 @return DB_SUCCESS if all OK */
 UNIV_INTERN
-ulint
+dberr_t
 fts_optimize_table(
 /*===============*/
 	dict_table_t*	table)	/*!< in: table to optimiza */
 {
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 	fts_optimize_t*	optim = NULL;
 	fts_t*		fts = table->fts;
 
@@ -2567,6 +2548,11 @@ fts_optimize_add_table(
 		return;
 	}
 
+	/* Make sure table with FTS index cannot be evicted */
+	if (table->can_be_evicted) {
+		dict_table_move_from_lru_to_non_lru(table);
+	}
+
 	msg = fts_optimize_create_msg(FTS_MSG_ADD_TABLE, table);
 
 	ib_wqueue_add(fts_optimize_wq, msg, msg->heap);
@@ -2602,18 +2588,26 @@ fts_optimize_remove_table(
 	dict_table_t*	table)			/*!< in: table to remove */
 {
 	fts_msg_t*	msg;
-	os_event_t		event;
-	fts_msg_del_t* remove;
+	os_event_t	event;
+	fts_msg_del_t*	remove;
 
 	/* if the optimize system not yet initialized, return */
 	if (!fts_optimize_wq) {
 		return;
 	}
 
+	/* FTS optimizer thread is already exited */
+	if (fts_opt_start_shutdown) {
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"Try to remove table %s after FTS optimize"
+			" thread exiting.", table->name);
+		return;
+	}
+
 	msg = fts_optimize_create_msg(FTS_MSG_DEL_TABLE, NULL);
 
 	/* We will wait on this event until signalled by the consumer. */
-	event = os_event_create(table->name);
+	event = os_event_create();
 
 	remove = static_cast<fts_msg_del_t*>(
 		mem_heap_alloc(msg->heap, sizeof(*remove)));
@@ -2889,6 +2883,8 @@ fts_optimize_thread(
 	ulint		n_optimize = 0;
 	ib_wqueue_t*	wq = (ib_wqueue_t*) arg;
 
+	ut_ad(!srv_read_only_mode);
+
 	heap = mem_heap_create(sizeof(dict_table_t*) * 64);
 	heap_alloc = ib_heap_allocator_create(heap);
 
@@ -3010,10 +3006,10 @@ fts_optimize_thread(
 				ib_vector_get(tables, i));
 
 			if (slot->state != FTS_STATE_EMPTY) {
-				dict_table_t*	table;
+				dict_table_t*	table = NULL;
 
-			        table = dict_table_open_on_name_no_stats(
-					slot->table->name, FALSE,
+			        table = dict_table_open_on_name(
+					slot->table->name, FALSE, FALSE,
 					DICT_ERR_IGNORE_INDEX_ROOT);
 
 				if (table) {
@@ -3022,8 +3018,11 @@ fts_optimize_thread(
 						fts_sync_table(table);
 					}
 
-					fts_free(table);
-					dict_table_close(table, FALSE);
+					if (table->fts) {
+						fts_free(table);
+					}
+
+					dict_table_close(table, FALSE, FALSE);
 				}
 			}
 		}
@@ -3031,10 +3030,7 @@ fts_optimize_thread(
 
 	ib_vector_free(tables);
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr, " InnoDB: FTS optimize thread exiting.\n");
-
-	ib_wqueue_free(wq);
+	ib_logf(IB_LOG_LEVEL_INFO, "FTS optimize thread exiting.");
 
 	os_event_set(exit_event);
 
@@ -3052,6 +3048,8 @@ void
 fts_optimize_init(void)
 /*===================*/
 {
+	ut_ad(!srv_read_only_mode);
+
 	/* For now we only support one optimize thread. */
 	ut_a(fts_optimize_wq == NULL);
 
@@ -3074,18 +3072,30 @@ fts_optimize_is_init(void)
 
 /**********************************************************************//**
 Signal the optimize thread to prepare for shutdown. */
-
+UNIV_INTERN
 void
 fts_optimize_start_shutdown(void)
 /*=============================*/
 {
+	ut_ad(!srv_read_only_mode);
+
 	fts_msg_t*	msg;
 	os_event_t	event;
 
+	/* If there is an ongoing activity on dictionary, such as
+	srv_master_evict_from_table_cache(), wait for it */
+	dict_mutex_enter_for_mysql();
+
+	/* Tells FTS optimizer system that we are exiting from
+	optimizer thread, message send their after will not be
+	processed */
+	fts_opt_start_shutdown = true;
+	dict_mutex_exit_for_mysql();
+
 	/* We tell the OPTIMIZE thread to switch to state done, we
 	can't delete the work queue here because the add thread needs
 	deregister the FTS tables. */
-	event = os_event_create(NULL);
+	event = os_event_create();
 
 	msg = fts_optimize_create_msg(FTS_MSG_STOP, NULL);
 	msg->ptr = event;
@@ -3094,15 +3104,20 @@ fts_optimize_start_shutdown(void)
 
 	os_event_wait(event);
 	os_event_free(event);
+
+	ib_wqueue_free(fts_optimize_wq);
+
 }
 
 /**********************************************************************//**
 Reset the work queue. */
-
+UNIV_INTERN
 void
 fts_optimize_end(void)
 /*==================*/
 {
+	ut_ad(!srv_read_only_mode);
+
 	// FIXME: Potential race condition here: We should wait for
 	// the optimize thread to confirm shutdown.
 	fts_optimize_wq = NULL;
diff --git a/storage/innobase/fts/fts0pars.cc b/storage/innobase/fts/fts0pars.cc
index 4fdfff5ca42..dd2984b1beb 100644
--- a/storage/innobase/fts/fts0pars.cc
+++ b/storage/innobase/fts/fts0pars.cc
@@ -105,7 +105,7 @@ extern int ftserror(const char* p);
 typedef	int	(*fts_scanner_alt)(YYSTYPE* val, yyscan_t yyscanner);
 typedef	int	(*fts_scanner)();
 
-struct fts_lexer_struct {
+struct fts_lexer_t {
 	fts_scanner	scanner;
 	void*		yyscanner;
 };
diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc
index 58b429a8406..5c757b4f176 100644
--- a/storage/innobase/fts/fts0que.cc
+++ b/storage/innobase/fts/fts0que.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,6 +24,7 @@ Created 2007/03/27 Sunny Bains
 Completed 2011/7/10 Sunny and Jimmy Yang
 *******************************************************/
 
+#include "dict0dict.h" /* dict_table_get_n_rows() */
 #include "ut0rbt.h"
 #include "row0sel.h"
 #include "fts0fts.h"
@@ -57,15 +58,10 @@ static const double FTS_NORMALIZE_COEFF = 0.0115F;
 /* For parsing the search phrase */
 static const char* FTS_PHRASE_DELIMITER = "\t ";
 
-typedef struct fts_match_struct fts_match_t;
-typedef	struct fts_query_struct fts_query_t;
-typedef struct fts_phrase_struct fts_phrase_t;
-typedef struct fts_select_struct fts_select_t;
-typedef struct fts_doc_freq_struct fts_doc_freq_t;
-typedef struct fts_word_freq_struct fts_word_freq_t;
+struct fts_word_freq_t;
 
 /** State of an FTS query. */
-struct fts_query_struct {
+struct fts_query_t {
 	mem_heap_t*	heap;		/*!< Heap to use for allocations */
 
 	trx_t*		trx;		/*!< The query transaction */
@@ -126,11 +122,11 @@ struct fts_query_struct {
 					position info for each matched word
 					in the word list */
 
-	ulint		total_docs;	/*!< The total number of documents */
+	ib_uint64_t	total_docs;	/*!< The total number of documents */
 
 	ulint		total_words;	/*!< The total number of words */
 
-	ulint		error;		/*!< Error code if any, that is
+	dberr_t		error;		/*!< Error code if any, that is
 					encountered during query processing */
 
 	ib_rbt_t*	word_freqs;	/*!< RB tree of word frequencies per
@@ -144,7 +140,7 @@ struct fts_query_struct {
 
 /** For phrase matching, first we collect the documents and the positions
 then we match. */
-struct fts_match_struct {
+struct fts_match_t {
 	doc_id_t	doc_id;		/*!< Document id */
 
 	ulint		start;		/*!< Start the phrase match from
@@ -158,7 +154,7 @@ struct fts_match_struct {
 /** For matching tokens in a phrase search. We use this data structure in
 the callback that determines whether a document should be accepted or
 rejected for a phrase search. */
-struct fts_select_struct {
+struct fts_select_t {
 	doc_id_t	doc_id;		/*!< The document id to match */
 
 	ulint		min_pos;	/*!< For found to be TRUE at least
@@ -173,8 +169,23 @@ struct fts_select_struct {
 					the FTS index */
 };
 
+/** structure defines a set of ranges for original documents, each of which
+has a minimum position and maximum position. Text in such range should
+contain all words in the proximity search. We will need to count the
+words in such range to make sure it is less than the specified distance
+of the proximity search */
+struct fts_proximity_t {
+	ulint		n_pos;		/*!< number of position set, defines
+					a range (min to max) containing all
+					matching words */
+	ulint*		min_pos;	/*!< the minimum position (in bytes)
+					of the range */
+	ulint*		max_pos;	/*!< the maximum position (in bytes)
+					of the range */
+};
+
 /** The match positions and tokesn to match */
-struct fts_phrase_struct {
+struct fts_phrase_t {
 	ibool		found;		/*!< Match result */
 
 	const fts_match_t*
@@ -188,23 +199,26 @@ struct fts_phrase_struct {
 	CHARSET_INFO*	charset;	/*!< Phrase match charset */
 	mem_heap_t*     heap;		/*!< Heap for word processing */
 	ulint		zip_size;	/*!< row zip size */
+	fts_proximity_t*proximity_pos;	/*!< position info for proximity
+					search verification. Records the min
+					and max position of words matched */
 };
 
 /** For storing the frequncy of a word/term in a document */
-struct fts_doc_freq_struct {
+struct fts_doc_freq_t {
 	doc_id_t	doc_id;		/*!< Document id */
 	ulint		freq;		/*!< Frequency of a word in a document */
 };
 
 /** To determine the word frequency per document. */
-struct fts_word_freq_struct {
+struct fts_word_freq_t {
 	byte*		word;		/*!< Word for which we need the freq,
 					it's allocated on the query heap */
 
 	ib_rbt_t*	doc_freqs;	/*!< RB Tree for storing per document
 					word frequencies. The elements are
 					of type fts_doc_freq_t */
-	ulint		doc_count;	/*!< Total number of documents that
+	ib_uint64_t	doc_count;	/*!< Total number of documents that
 					contain this word */
 	double		idf;		/*!< Inverse document frequency */
 };
@@ -257,37 +271,46 @@ search arguments to search the document again, thus "expand"
 the search result set.
 @return DB_SUCCESS if success, otherwise the error code */
 static
-ulint
+dberr_t
 fts_expand_query(
 /*=============*/
 	dict_index_t*	index,		/*!< in: FTS index to search */
-	fts_query_t*	query);		/*!< in: query result, to be freed
+	fts_query_t*	query)		/*!< in: query result, to be freed
 					by the client */
+	__attribute__((nonnull, warn_unused_result));
 /*************************************************************//**
 This function finds documents that contain all words in a
 phrase or proximity search. And if proximity search, verify
-the words are close to each other enough, as in specified distance.
+the words are close enough to each other, as in specified distance.
 This function is called for phrase and proximity search.
 @return TRUE if documents are found, FALSE if otherwise */
 static
 ibool
-fts_check_phrase_proximity(
-/*=======================*/
-	fts_query_t*	query,		/*!< in:  query instance */
+fts_phrase_or_proximity_search(
+/*===========================*/
+	fts_query_t*	query,		/*!< in/out:  query instance
+					query->doc_ids might be instantiated
+					with qualified doc IDs */
 	ib_vector_t*	tokens);	/*!< in: Tokens contain words */
 /*************************************************************//**
-This function check the words in result document are close to each
-other enough (within proximity rnage). This is used for proximity search.
-@return TRUE if words are close to each other, FALSE if otherwise */
+This function checks whether words in result documents are close to
+each other (within proximity range as specified by "distance").
+If "distance" is MAX_ULINT, then it will find all combinations of
+positions of matching words and store min and max positions
+in the "qualified_pos" for later verification.
+@return true if words are close to each other, false if otherwise */
 static
-ulint
-fts_proximity_check_position(
-/*=========================*/
-	fts_match_t**	match,		/*!< in: query instance */
-	ulint		num_match,	/*!< in: number of matching
-					items */
-	ulint		distance);	/*!< in: distance value
-					for proximity search */
+bool
+fts_proximity_get_positions(
+/*========================*/
+	fts_match_t**		match,		/*!< in: query instance */
+	ulint			num_match,	/*!< in: number of matching
+						items */
+	ulint			distance,	/*!< in: distance value
+						for proximity search */
+	fts_proximity_t*	qualified_pos);	/*!< out: the position info
+						records ranges containing
+						all matching words. */
 #if 0
 /********************************************************************
 Get the total number of words in a documents. */
@@ -954,8 +977,8 @@ cont_search:
 /*****************************************************************//**
 Set difference.
 @return DB_SUCCESS if all went well */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_difference(
 /*=================*/
 	fts_query_t*		query,	/*!< in: query instance */
@@ -993,15 +1016,21 @@ fts_query_difference(
 		ut_a(index_cache != NULL);
 
 		/* Search the cache for a matching word first. */
-		nodes = fts_cache_find_word(index_cache, token);
+		if (query->cur_node->term.wildcard
+		    && query->flags != FTS_PROXIMITY
+		    && query->flags != FTS_PHRASE) {
+			fts_cache_find_wildcard(query, index_cache, token);
+		} else {
+			nodes = fts_cache_find_word(index_cache, token);
 
-		for (i = 0; nodes && i < ib_vector_size(nodes); ++i) {
-			const fts_node_t*	node;
+			for (i = 0; nodes && i < ib_vector_size(nodes); ++i) {
+				const fts_node_t*	node;
 
-			node = static_cast<const fts_node_t*>(
-				ib_vector_get_const(nodes, i));
+				node = static_cast<const fts_node_t*>(
+					ib_vector_get_const(nodes, i));
 
-			fts_query_check_node(query, token, node);
+				fts_query_check_node(query, token, node);
+			}
 		}
 
 		rw_lock_x_unlock(&cache->lock);
@@ -1026,8 +1055,8 @@ fts_query_difference(
 /*****************************************************************//**
 Intersect the token doc ids with the current set.
 @return DB_SUCCESS if all went well */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_intersect(
 /*================*/
 	fts_query_t*		query,	/*!< in: query instance */
@@ -1216,8 +1245,8 @@ fts_query_cache(
 /*****************************************************************//**
 Set union.
 @return DB_SUCCESS if all went well */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_union(
 /*============*/
 	fts_query_t*		query,	/*!< in: query instance */
@@ -1248,13 +1277,7 @@ fts_query_union(
 
 	/* Single '%' would confuse parser in pars_like_rebind(). In addition,
 	our wildcard search only supports prefix search */
-	if (*token->f_str == '%') {
-		if (token->f_len == 1) {
-			return(query->error);
-		}
-		token->f_str++;
-		token->f_len--;
-	}
+	ut_ad(*token->f_str != '%');
 
 	fts_query_cache(query, token);
 
@@ -1485,6 +1508,67 @@ fts_query_match_phrase_terms(
 }
 
 /*****************************************************************//**
+Callback function to count the number of words in position ranges,
+and see whether the word count is in specified "phrase->distance"
+@return true if the number of characters is less than the "distance" */
+static
+bool
+fts_proximity_is_word_in_range(
+/*===========================*/
+	const fts_phrase_t*
+			phrase,		/*!< in: phrase with the search info */
+	byte*		start,		/*!< in: text to search */
+	ulint		total_len)	/*!< in: length of text */
+{
+	fts_proximity_t*	proximity_pos = phrase->proximity_pos;
+
+	/* Search each matched position pair (with min and max positions)
+	and count the number of words in the range */
+	for (ulint i = 0; i < proximity_pos->n_pos; i++) {
+		ulint		cur_pos = proximity_pos->min_pos[i];
+		ulint		n_word = 0;
+
+		ut_ad(proximity_pos->max_pos[i] <= total_len);
+
+		/* Walk through words in the range and count them */
+		while (cur_pos <= proximity_pos->max_pos[i]) {
+			ulint		len;
+			fts_string_t	str;
+			ulint           offset = 0;
+
+			len = innobase_mysql_fts_get_token(
+				phrase->charset,
+				start + cur_pos,
+				start + total_len, &str, &offset);
+
+			if (len == 0) {
+				break;
+			}
+
+			/* Advances position with "len" bytes */
+			cur_pos += len;
+
+			/* Record the number of words */
+			if (str.f_n_char > 0) {
+				n_word++;
+			}
+
+			if (n_word > phrase->distance) {
+				break;
+			}
+		}
+
+		/* Check if the number of words is less than specified
+		"distance" */
+		if (n_word && n_word <= phrase->distance) {
+			return(true);
+		}
+	}
+
+	return(false);
+}
+
+/*****************************************************************//**
 Callback function to fetch and search the document.
 @return TRUE if matched else FALSE */
 static
@@ -1594,31 +1678,77 @@ fts_query_fetch_document(
 	sel_node_t*	node = static_cast<sel_node_t*>(row);
 	fts_phrase_t*	phrase = static_cast<fts_phrase_t*>(user_arg);
 	ulint		prev_len = 0;
+	ulint		total_len = 0;
+	byte*		document_text = NULL;
 
 	exp = node->select_list;
 
 	phrase->found = FALSE;
 
+	/* For proximity search, we will need to get the whole document
+	from all fields, so first count the total length of the document
+	from all the fields */
+	if (phrase->proximity_pos) {
+		 while (exp) {
+			ulint		field_len;
+			dfield_t*	dfield = que_node_get_val(exp);
+			byte*		data = static_cast<byte*>(
+						dfield_get_data(dfield));
+
+			if (dfield_is_ext(dfield)) {
+				ulint	local_len = dfield_get_len(dfield);
+
+				local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+
+				field_len = mach_read_from_4(
+					data + local_len + BTR_EXTERN_LEN + 4);
+			} else {
+				field_len = dfield_get_len(dfield);
+			}
+
+			if (field_len != UNIV_SQL_NULL) {
+				total_len += field_len + 1;
+			}
+
+			exp = que_node_get_next(exp);
+		}
+
+		document_text = static_cast<byte*>(mem_heap_zalloc(
+					phrase->heap, total_len));
+
+		if (!document_text) {
+			return(FALSE);
+		}
+	}
+
+	exp = node->select_list;
+
 	while (exp) {
 		dfield_t*	dfield = que_node_get_val(exp);
-		void*		data = NULL;
+		byte*		data = static_cast<byte*>(
+					dfield_get_data(dfield));
 		ulint		cur_len;
 
 		if (dfield_is_ext(dfield)) {
 			data = btr_copy_externally_stored_field(
-				&cur_len, static_cast<const byte*>(data),
-				phrase->zip_size,
+				&cur_len, data, phrase->zip_size,
 				dfield_get_len(dfield), phrase->heap);
 		} else {
-			data = dfield_get_data(dfield);
 			cur_len = dfield_get_len(dfield);
 		}
 
 		if (cur_len != UNIV_SQL_NULL && cur_len != 0) {
-			phrase->found =
-				fts_query_match_phrase(
-					phrase, static_cast<byte*>(data),
-					cur_len, prev_len, phrase->heap);
+			if (phrase->proximity_pos) {
+				memcpy(document_text + prev_len, data, cur_len);
+			} else {
+				/* For phrase search */
+				phrase->found =
+					fts_query_match_phrase(
+						phrase,
+						static_cast<byte*>(data),
+						cur_len, prev_len,
+						phrase->heap);
+			}
 		}
 
 		if (phrase->found) {
@@ -1633,6 +1763,13 @@ fts_query_fetch_document(
 		exp = que_node_get_next(exp);
 	}
 
+	if (phrase->proximity_pos) {
+		ut_ad(prev_len <= total_len);
+
+		phrase->found = fts_proximity_is_word_in_range(
+			phrase, document_text, total_len);
+	}
+
 	return(phrase->found);
 }
 
@@ -1689,13 +1826,12 @@ fts_query_select(
 
 /********************************************************************
 Read the rows from the FTS index, that match word and where the
-doc id is between first and last doc id. */
-static
-ulint
+doc id is between first and last doc id.
+@return DB_SUCCESS if all went well else error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_find_term(
 /*================*/
-					/*!< out: DB_SUCCESS if all went well
-					else error code */
 	fts_query_t*		query,	/*!< in: FTS query state */
 	que_t**			graph,	/*!< in: prepared statement */
 	const fts_string_t*	word,	/*!< in: the word to fetch */
@@ -1705,7 +1841,7 @@ fts_query_find_term(
 	ibool*			found)	/*!< out: TRUE if found else FALSE */
 {
 	pars_info_t*		info;
-	ulint			error;
+	dberr_t			error;
 	fts_select_t		select;
 	doc_id_t		match_doc_id;
 	trx_t*			trx = query->trx;
@@ -1830,19 +1966,18 @@ fts_query_sum(
 }
 
 /********************************************************************
-Calculate the total documents that contain a particular word (term). */
-static
-ulint
+Calculate the total documents that contain a particular word (term).
+@return DB_SUCCESS if all went well else error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_total_docs_containing_term(
 /*=================================*/
-					/*!< out: DB_SUCCESS if all went well
-					else error code */
 	fts_query_t*		query,	/*!< in: FTS query state */
 	const fts_string_t*	word,	/*!< in: the word to check */
 	ulint*			total)	/*!< out: documents containing word */
 {
 	pars_info_t*		info;
-	ulint			error;
+	dberr_t			error;
 	que_t*			graph;
 	ulint			selected;
 	trx_t*			trx = query->trx;
@@ -1910,19 +2045,18 @@ fts_query_total_docs_containing_term(
 }
 
 /********************************************************************
-Get the total number of words in a documents. */
-static
-ulint
+Get the total number of words in a documents.
+@return DB_SUCCESS if all went well else error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_terms_in_document(
 /*========================*/
-					/*!< out: DB_SUCCESS if all went well
-					else error code */
 	fts_query_t*	query,		/*!< in: FTS query state */
 	doc_id_t	doc_id,		/*!< in: the word to check */
 	ulint*		total)		/*!< out: total words in document */
 {
 	pars_info_t*	info;
-	ulint		error;
+	dberr_t		error;
 	que_t*		graph;
 	doc_id_t	read_doc_id;
 	trx_t*		trx = query->trx;
@@ -1993,9 +2127,9 @@ fts_query_terms_in_document(
 
 /*****************************************************************//**
 Retrieve the document and match the phrase tokens.
-@return TRUE if matches else FALSE */
-static
-ulint
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_match_document(
 /*=====================*/
 	ib_vector_t*	tokens,		/*!< in: phrase tokens */
@@ -2004,7 +2138,7 @@ fts_query_match_document(
 	ulint		distance,	/*!< in: proximity distance */
 	ibool*		found)		/*!< out: TRUE if phrase found */
 {
-	ulint		error;
+	dberr_t		error;
 	fts_phrase_t	phrase;
 
 	memset(&phrase, 0x0, sizeof(phrase));
@@ -2025,8 +2159,8 @@ fts_query_match_document(
 
 	if (error != DB_SUCCESS) {
 		ut_print_timestamp(stderr);
-		fprintf(stderr, "InnoDB: Error: (%lu) matching document.\n",
-			error);
+		fprintf(stderr, "InnoDB: Error: (%s) matching document.\n",
+			ut_strerr(error));
 	} else {
 		*found = phrase.found;
 	}
@@ -2037,11 +2171,66 @@ fts_query_match_document(
 }
 
 /*****************************************************************//**
+This function fetches the original documents and count the
+words in between matching words to see that is in specified distance
+@return DB_SUCCESS if all OK */
+static __attribute__((nonnull, warn_unused_result))
+bool
+fts_query_is_in_proximity_range(
+/*============================*/
+	const fts_query_t*	query,		/*!< in:  query instance */
+	fts_match_t**		match,		/*!< in: query instance */
+	fts_proximity_t*	qualified_pos)	/*!< in: position info for
+						qualified ranges */
+{
+	fts_get_doc_t		get_doc;
+	fts_cache_t*		cache = query->index->table->fts->cache;
+	dberr_t			err;
+	fts_phrase_t		phrase;
+
+	memset(&get_doc, 0x0, sizeof(get_doc));
+	memset(&phrase, 0x0, sizeof(phrase));
+
+	rw_lock_x_lock(&cache->lock);
+	get_doc.index_cache = fts_find_index_cache(cache, query->index);
+	rw_lock_x_unlock(&cache->lock);
+	ut_a(get_doc.index_cache != NULL);
+
+	phrase.distance = query->distance;
+	phrase.charset = get_doc.index_cache->charset;
+	phrase.zip_size = dict_table_zip_size(
+		get_doc.index_cache->index->table);
+	phrase.heap = mem_heap_create(512);
+	phrase.proximity_pos = qualified_pos;
+	phrase.found = FALSE;
+
+	err = fts_doc_fetch_by_doc_id(
+		&get_doc, match[0]->doc_id, NULL, FTS_FETCH_DOC_BY_ID_EQUAL,
+		fts_query_fetch_document, &phrase);
+
+	if (err != DB_SUCCESS) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Error: (%s) in verification phase of proximity "
+			"search", ut_strerr(err));
+	}
+
+	/* Free the prepared statement. */
+	if (get_doc.get_document_graph) {
+		fts_que_graph_free(get_doc.get_document_graph);
+		get_doc.get_document_graph = NULL;
+	}
+
+	mem_heap_free(phrase.heap);
+
+	return(err == DB_SUCCESS && phrase.found);
+}
+
+/*****************************************************************//**
 Iterate over the matched document ids and search the for the
 actual phrase in the text.
 @return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_search_phrase(
 /*====================*/
 	fts_query_t*		query,	/*!< in: query instance */
@@ -2050,8 +2239,6 @@ fts_query_search_phrase(
 	ulint			i;
 	fts_get_doc_t		get_doc;
 	ulint			n_matched;
-	// FIXME: Debug code
-	ulint			searched = 0;
 	fts_cache_t*		cache = query->index->table->fts->cache;
 
 	n_matched = ib_vector_size(query->matched);
@@ -2061,9 +2248,7 @@ fts_query_search_phrase(
 
 	rw_lock_x_lock(&cache->lock);
 
-	// FIXME: We shouldn't have to cast here.
-	get_doc.index_cache = (fts_index_cache_t*)
-	fts_find_index_cache(cache, query->index);
+	get_doc.index_cache = fts_find_index_cache(cache, query->index);
 
 	/* Must find the index cache */
 	ut_a(get_doc.index_cache != NULL);
@@ -2089,9 +2274,6 @@ fts_query_search_phrase(
 		an earlier pass. */
 		if (match->doc_id != 0) {
 
-			// FIXME: Debug code
-			++searched;
-
 			query->error = fts_query_match_document(
 				tokens, &get_doc,
 				match, query->distance, &found);
@@ -2119,18 +2301,14 @@ fts_query_search_phrase(
 		get_doc.get_document_graph = NULL;
 	}
 
-	// FIXME: Debug code
-	ut_print_timestamp(stderr);
-	printf(" End: %lu, %lu\n", searched, ib_vector_size(query->matched));
-
 	return(query->error);
 }
 
 /*****************************************************************//**
 Text/Phrase search.
-@return count of doc ids added */
-static
-ulint
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_phrase_search(
 /*====================*/
 	fts_query_t*		query,	/*!< in: query instance */
@@ -2290,7 +2468,7 @@ fts_query_phrase_search(
 		/* If we are doing proximity search, verify the distance
 		between all words, and check they are in specified distance. */
 		if (query->flags & FTS_PROXIMITY) {
-			fts_check_phrase_proximity(query, tokens);
+			fts_phrase_or_proximity_search(query, tokens);
 		} else {
 			ibool	matched;
 
@@ -2301,7 +2479,7 @@ fts_query_phrase_search(
 			and then doing a search through the text. Isolated
 			testing shows this also helps in mitigating disruption
 			of the buffer cache. */
-			matched = fts_check_phrase_proximity(query, tokens);
+			matched = fts_phrase_or_proximity_search(query, tokens);
 			query->matched = query->match_array[0];
 
 			/* Read the actual text in and search for the phrase. */
@@ -2329,8 +2507,8 @@ func_exit:
 /*****************************************************************//**
 Find the word and evaluate.
 @return DB_SUCCESS if all went well */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_execute(
 /*==============*/
 	fts_query_t*		query,	/*!< in: query instance */
@@ -2477,13 +2655,12 @@ fts_query_visitor(
 /*****************************************************************//**
 Process (nested) sub-expression, create a new result set to store the
 sub-expression result by processing nodes under current sub-expression
-list. Merge the sub-expression result with that of parent expression list. */
-
-ulint
+list. Merge the sub-expression result with that of parent expression list.
+@return DB_SUCCESS if all went well */
+UNIV_INTERN
+dberr_t
 fts_ast_visit_sub_exp(
 /*==================*/
-						/*!< out: DB_SUCCESS if all
-						went well */
 	fts_ast_node_t*		node,		/*!< in,out: current root node */
 	fts_ast_callback	visitor,	/*!< in: callback function */
 	void*			arg)		/*!< in,out: arg for callback */
@@ -2492,8 +2669,9 @@ fts_ast_visit_sub_exp(
 	fts_query_t*		query = static_cast<fts_query_t*>(arg);
 	ib_rbt_t*		parent_doc_ids;
 	ib_rbt_t*		subexpr_doc_ids;
-	ulint			error = DB_SUCCESS;
+	dberr_t			error = DB_SUCCESS;
 	ibool			inited = query->inited;
+	bool			will_be_ignored = false;
 
 	ut_a(node->type == FTS_AST_SUBEXP_LIST);
 
@@ -2521,7 +2699,8 @@ fts_ast_visit_sub_exp(
 
 	/* Process nodes in current sub-expression and store its
 	result set in query->doc_ids we created above. */
-	error = fts_ast_visit(FTS_NONE, node->next, visitor, arg);
+	error = fts_ast_visit(FTS_NONE, node->next, visitor,
+			      arg, &will_be_ignored);
 
 	/* Reinstate parent node state and prepare for merge. */
 	query->inited = inited;
@@ -2757,6 +2936,8 @@ fts_query_read_node(
 	ut_a(query->cur_node->type == FTS_AST_TERM ||
 	     query->cur_node->type == FTS_AST_TEXT);
 
+	memset(&node, 0, sizeof(node));
+
 	/* Need to consider the wildcard search case, the word frequency
 	is created on the search string not the actual word. So we need
 	to assign the frequency on search string behalf. */
@@ -2879,8 +3060,8 @@ fts_query_calculate_idf(
 /*====================*/
 	fts_query_t*	query)	/*!< in: Query state */
 {
-	const ib_rbt_node_t* node;
-	double		total_docs = query->total_docs;
+	const ib_rbt_node_t*	node;
+	ib_uint64_t		total_docs = query->total_docs;
 
 	/* We need to free any instances of fts_doc_freq_t that we
 	may have allocated. */
@@ -2893,7 +3074,7 @@ fts_query_calculate_idf(
 		word_freq = rbt_value(fts_word_freq_t, node);
 
 		if (word_freq->doc_count > 0) {
-			if (total_docs == (double) word_freq->doc_count) {
+			if (total_docs == word_freq->doc_count) {
 				/* QP assume ranking > 0 if we find
 				a match. Since Log10(1) = 0, we cannot
 				make IDF a zero value if do find a
@@ -2907,10 +3088,13 @@ fts_query_calculate_idf(
 			}
 		}
 
-		fprintf(stderr,"'%s' -> %lu/%lu %6.5lf\n",
-		       word_freq->word,
-		       query->total_docs, word_freq->doc_count,
-		       word_freq->idf);
+		if (fts_enable_diag_print) {
+			fprintf(stderr,"'%s' -> " UINT64PF "/" UINT64PF
+				" %6.5lf\n",
+			        word_freq->word,
+			        query->total_docs, word_freq->doc_count,
+			        word_freq->idf);
+		}
 	}
 }
 
@@ -3017,7 +3201,7 @@ fts_retrieve_ranking(
 
 		ranking = rbt_value(fts_ranking_t, parent.last);
 
-		return (ranking->rank);
+		return(ranking->rank);
 	}
 
 	return(0);
@@ -3184,7 +3368,7 @@ fts_query_parse(
 FTS Query entry point.
 @return DB_SUCCESS if successful otherwise error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_query(
 /*======*/
 	trx_t*		trx,		/*!< in: transaction */
@@ -3196,7 +3380,7 @@ fts_query(
 	fts_result_t**	result)		/*!< in/out: result doc ids */
 {
 	fts_query_t	query;
-	ulint		error;
+	dberr_t		error = DB_SUCCESS;
 	byte*		lc_query_str;
 	ulint		lc_query_str_len;
 	ulint		result_len;
@@ -3204,6 +3388,7 @@ fts_query(
 	trx_t*		query_trx;
 	CHARSET_INFO*	charset;
 	ulint		start_time_ms;
+	bool		will_be_ignored = false;
 
 	boolean_mode = flags & FTS_BOOL;
 
@@ -3237,20 +3422,24 @@ fts_query(
 	/* Setup the RB tree that will be used to collect per term
 	statistics. */
 	query.word_freqs = rbt_create_arg_cmp(
-		sizeof(fts_word_freq_t), innobase_fts_string_cmp, charset);
+		sizeof(fts_word_freq_t), innobase_fts_string_cmp, 
+                (void*) charset);
 
-	query.total_docs = fts_get_total_document_count(index->table);
+	query.total_docs = dict_table_get_n_rows(index->table);
 
-	error = fts_get_total_word_count(trx, query.index, &query.total_words);
+#ifdef FTS_DOC_STATS_DEBUG
+	if (ft_enable_diag_print) {
+		error = fts_get_total_word_count(
+			trx, query.index, &query.total_words);
 
-	if (error != DB_SUCCESS) {
-		goto func_exit;
-	}
+		if (error != DB_SUCCESS) {
+			goto func_exit;
+		}
 
-#ifdef	FTS_INTERNAL_DIAG_PRINT
-	fprintf(stderr, "Total docs: %lu Total words: %lu\n",
-		query.total_docs, query.total_words);
-#endif
+		fprintf(stderr, "Total docs: " UINT64PF " Total words: %lu\n",
+			query.total_docs, query.total_words);
+	}
+#endif /* FTS_DOC_STATS_DEBUG */
 
 	query.fts_common_table.suffix = "DELETED";
 
@@ -3299,13 +3488,14 @@ fts_query(
 		sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
 
 	/* Parse the input query string. */
-	if (fts_query_parse(&query, lc_query_str, query_len)) {
+	if (fts_query_parse(&query, lc_query_str, result_len)) {
 		fts_ast_node_t*	ast = query.root;
 
 		/* Traverse the Abstract Syntax Tree (AST) and execute
 		the query. */
 		query.error = fts_ast_visit(
-			FTS_NONE, ast, fts_query_visitor, &query);
+			FTS_NONE, ast, fts_query_visitor,
+			&query, &will_be_ignored);
 
 		/* If query expansion is requested, extend the search
 		with first search pass result */
@@ -3453,8 +3643,8 @@ words in documents found in the first search pass will be used as
 search arguments to search the document again, thus "expand"
 the search result set.
 @return DB_SUCCESS if success, otherwise the error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_expand_query(
 /*=============*/
 	dict_index_t*	index,		/*!< in: FTS index to search */
@@ -3463,7 +3653,7 @@ fts_expand_query(
 	const ib_rbt_node_t*	node;
 	const ib_rbt_node_t*	token_node;
 	fts_doc_t		result_doc;
-	ulint			error = DB_SUCCESS;
+	dberr_t			error = DB_SUCCESS;
 	const fts_index_cache_t*index_cache;
 
 	/* If no doc is found in first search pass, return */
@@ -3482,7 +3672,7 @@ fts_expand_query(
 
 	result_doc.tokens = rbt_create_arg_cmp(
 		sizeof(fts_token_t), innobase_fts_text_cmp,
-		index_cache->charset);
+		(void*) index_cache->charset);
 
 	result_doc.charset = index_cache->charset;
 
@@ -3557,14 +3747,16 @@ fts_expand_query(
 /*************************************************************//**
 This function finds documents that contain all words in a
 phrase or proximity search. And if proximity search, verify
-the words are close to each other enough, as in specified distance.
+the words are close enough to each other, as in specified distance.
 This function is called for phrase and proximity search.
 @return TRUE if documents are found, FALSE if otherwise */
 static
 ibool
-fts_check_phrase_proximity(
-/*=======================*/
-	fts_query_t*	query,		/*!< in:  query instance */
+fts_phrase_or_proximity_search(
+/*===========================*/
+	fts_query_t*	query,		/*!< in/out:  query instance.
+					query->doc_ids might be instantiated
+					with qualified doc IDs */
 	ib_vector_t*	tokens)		/*!< in: Tokens contain words */
 {
 	ulint		n_matched;
@@ -3581,8 +3773,13 @@ fts_check_phrase_proximity(
 	walk through the list and find common documents that
 	contain all the matching words. */
 	for (i = 0; i < n_matched; i++) {
-		ulint	j;
-		ulint	k = 0;
+		ulint		j;
+		ulint		k = 0;
+		fts_proximity_t	qualified_pos;
+		ulint		qualified_pos_buf[MAX_PROXIMITY_ITEM * 2];
+
+		qualified_pos.min_pos = &qualified_pos_buf[0];
+		qualified_pos.max_pos = &qualified_pos_buf[MAX_PROXIMITY_ITEM];
 
 		match[0] = static_cast<fts_match_t*>(
 			ib_vector_get(query->match_array[0], i));
@@ -3647,24 +3844,31 @@ fts_check_phrase_proximity(
 
 		/* For this matching doc, we need to further
 		verify whether the words in the doc are close
-		to each other, and with in distance specified
+		to each other, and within the distance specified
 		in the proximity search */
 		if (query->flags & FTS_PHRASE) {
 			matched = TRUE;
-		} else if (fts_proximity_check_position(
-			match, num_token, query->distance)) {
-			ulint	z;
-			/* If so, mark we find a matching doc */
-			fts_query_process_doc_id(query, match[0]->doc_id, 0);
+		} else if (fts_proximity_get_positions(
+			match, num_token, ULINT_MAX, &qualified_pos)) {
+
+			/* Fetch the original documents and count the
+			words in between matching words to see that is in
+			specified distance */
+			if (fts_query_is_in_proximity_range(
+				query, match, &qualified_pos)) {
+				/* If so, mark we find a matching doc */
+				fts_query_process_doc_id(
+					query, match[0]->doc_id, 0);
 
-			matched = TRUE;
-			for (z = 0; z < num_token; z++) {
-				fts_string_t*	token;
-				token = static_cast<fts_string_t*>(
-					ib_vector_get(tokens, z));
-				fts_query_add_word_to_document(
-					query, match[0]->doc_id,
-					token->f_str);
+				matched = TRUE;
+				for (ulint z = 0; z < num_token; z++) {
+					fts_string_t*	token;
+					token = static_cast<fts_string_t*>(
+						ib_vector_get(tokens, z));
+					fts_query_add_word_to_document(
+						query, match[0]->doc_id,
+						token->f_str);
+				}
 			}
 		}
 
@@ -3678,24 +3882,32 @@ func_exit:
 }
 
 /*************************************************************//**
-This function check the words in result document are close to each
-other (within proximity range). This is used for proximity search.
-@return TRUE if words are close to each other, FALSE if otherwise */
+This function checks whether words in result documents are close to
+each other (within proximity range as specified by "distance").
+If "distance" is MAX_ULINT, then it will find all combinations of
+positions of matching words and store min and max positions
+in the "qualified_pos" for later verification.
+@return true if words are close to each other, false if otherwise */
 static
-ulint
-fts_proximity_check_position(
-/*=========================*/
-	fts_match_t**	match,		/*!< in: query instance */
-	ulint		num_match,	/*!< in: number of matching
-					items */
-	ulint		distance)	/*!< in: distance value
-					for proximity search */
+bool
+fts_proximity_get_positions(
+/*========================*/
+	fts_match_t**		match,		/*!< in: query instance */
+	ulint			num_match,	/*!< in: number of matching
+						items */
+	ulint			distance,	/*!< in: distance value
+						for proximity search */
+	fts_proximity_t*	qualified_pos)	/*!< out: the position info
+						records ranges containing
+						all matching words. */
 {
 	ulint	i;
 	ulint	idx[MAX_PROXIMITY_ITEM];
 	ulint	num_pos[MAX_PROXIMITY_ITEM];
 	ulint	min_idx;
 
+	qualified_pos->n_pos = 0;
+
 	ut_a(num_match < MAX_PROXIMITY_ITEM);
 
 	/* Each word could appear multiple times in a doc. So
@@ -3747,14 +3959,21 @@ fts_proximity_check_position(
 		find a good match */
 		if (max_pos - min_pos <= distance
 		    && (i >= num_match || position[i] != ULINT_UNDEFINED)) {
-			return(TRUE);
-		} else {
-			/* Otherwise, move to the next position is the
-			list for the word with the smallest position */
-			idx[min_idx]++;
+			/* The charset has variable character
+			length encoding, record the min_pos and
+			max_pos, we will need to verify the actual
+			number of characters */
+			qualified_pos->min_pos[qualified_pos->n_pos] = min_pos;
+			qualified_pos->max_pos[qualified_pos->n_pos] = max_pos;
+			qualified_pos->n_pos++;
 		}
+
+		/* Otherwise, move to the next position is the
+		list for the word with the smallest position */
+		idx[min_idx]++;
 	}
 
-	/* Failed to find all words within the range for the doc */
-	return(FALSE);
+	ut_ad(qualified_pos->n_pos <= MAX_PROXIMITY_ITEM);
+
+	return(qualified_pos->n_pos != 0);
 }
diff --git a/storage/innobase/fts/fts0sql.cc b/storage/innobase/fts/fts0sql.cc
index 8e60a5f1132..03c19d93af6 100644
--- a/storage/innobase/fts/fts0sql.cc
+++ b/storage/innobase/fts/fts0sql.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -262,7 +262,7 @@ fts_parse_sql_no_dict_lock(
 Evaluate an SQL query graph.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_eval_sql(
 /*=========*/
 	trx_t*		trx,		/*!< in: transaction */
@@ -327,16 +327,16 @@ fts_get_select_columns_str(
 Commit a transaction.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_sql_commit(
 /*===========*/
 	trx_t*		trx)		/*!< in: transaction */
 {
-	ulint	error;
+	dberr_t	error;
 
 	error = trx_commit_for_mysql(trx);
 
-	/* Commit above returns 0 on success, it should always succeed */
+	/* Commit should always succeed */
 	ut_a(error == DB_SUCCESS);
 
 	return(DB_SUCCESS);
@@ -346,7 +346,7 @@ fts_sql_commit(
 Rollback a transaction.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_sql_rollback(
 /*=============*/
 	trx_t*		trx)		/*!< in: transaction */
diff --git a/storage/innobase/fts/fts0tlex.cc b/storage/innobase/fts/fts0tlex.cc
index 69b859716d5..44434c4ea25 100644
--- a/storage/innobase/fts/fts0tlex.cc
+++ b/storage/innobase/fts/fts0tlex.cc
@@ -35,7 +35,7 @@
 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 
 /* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
+ * if you want the limit (max/min) macros for int types. 
  */
 #ifndef __STDC_LIMIT_MACROS
 #define __STDC_LIMIT_MACROS 1
@@ -247,7 +247,7 @@ struct yy_buffer_state
 
     int yy_bs_lineno; /**< The line count. */
     int yy_bs_column; /**< The column count. */
-
+ 
 	/* Whether to try to fill the input buffer when we reach the
 	 * end of it.
 	 */
@@ -368,10 +368,10 @@ struct yy_trans_info
 	flex_int32_t yy_verify;
 	flex_int32_t yy_nxt;
 	};
-static yyconst flex_int16_t yy_accept[16] =
+static yyconst flex_int16_t yy_accept[17] =
     {   0,
-        4,    4,    7,    4,    1,    5,    1,    6,    2,    4,
-        1,    1,    0,    3,    0
+        4,    4,    7,    4,    1,    5,    1,    6,    6,    2,
+        4,    1,    1,    0,    3,    0
     } ;
 
 static yyconst flex_int32_t yy_ec[256] =
@@ -379,8 +379,8 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1,    1,    1,    1,    2,    3,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    4,    1,    5,    1,    1,    1,    1,    1,    1,
-        1,    6,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    4,    1,    5,    1,    1,    6,    1,    1,    1,
+        1,    7,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
@@ -406,35 +406,35 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1
     } ;
 
-static yyconst flex_int32_t yy_meta[7] =
+static yyconst flex_int32_t yy_meta[8] =
     {   0,
-        1,    2,    3,    4,    5,    1
+        1,    2,    3,    4,    5,    5,    1
     } ;
 
-static yyconst flex_int16_t yy_base[19] =
+static yyconst flex_int16_t yy_base[20] =
     {   0,
-        0,    0,   17,    0,    5,   20,    0,    8,    0,    0,
-        0,    0,    3,   20,   20,    9,   10,   14
+        0,    0,   18,    0,    6,   21,    0,    9,   21,    0,
+        0,    0,    0,    4,   21,   21,   10,   11,   15
     } ;
 
-static yyconst flex_int16_t yy_def[19] =
+static yyconst flex_int16_t yy_def[20] =
     {   0,
-       15,    1,   15,   16,   16,   15,   17,   18,   16,   16,
-        5,   17,   18,   15,    0,   15,   15,   15
+       16,    1,   16,   17,   17,   16,   18,   19,   16,   17,
+       17,    5,   18,   19,   16,    0,   16,   16,   16
     } ;
 
-static yyconst flex_int16_t yy_nxt[27] =
+static yyconst flex_int16_t yy_nxt[29] =
     {   0,
-        4,    5,    6,    7,    8,    9,   11,   14,   12,   10,
-       10,   12,   14,   12,   13,   13,   15,   13,   13,    3,
-       15,   15,   15,   15,   15,   15
+        4,    5,    6,    7,    8,    9,   10,   12,   15,   13,
+       11,   11,   13,   15,   13,   14,   14,   16,   14,   14,
+        3,   16,   16,   16,   16,   16,   16,   16
     } ;
 
-static yyconst flex_int16_t yy_chk[27] =
+static yyconst flex_int16_t yy_chk[29] =
     {   0,
-        1,    1,    1,    1,    1,    1,    5,   13,    5,   16,
-       16,   17,    8,   17,   18,   18,    3,   18,   18,   15,
-       15,   15,   15,   15,   15,   15
+        1,    1,    1,    1,    1,    1,    1,    5,   14,    5,
+       17,   17,   18,    8,   18,   19,   19,    3,   19,   19,
+       16,   16,   16,   16,   16,   16,   16,   16
     } ;
 
 /* The intent behind this definition is that it'll catch
@@ -699,7 +699,7 @@ YY_DECL
 	register yy_state_type yy_current_state;
 	register char *yy_cp, *yy_bp;
 	register int yy_act;
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 #line 44 "fts0tlex.l"
 
@@ -757,13 +757,13 @@ yy_match:
 			while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 				{
 				yy_current_state = (int) yy_def[yy_current_state];
-				if ( yy_current_state >= 16 )
+				if ( yy_current_state >= 17 )
 					yy_c = yy_meta[(unsigned int) yy_c];
 				}
 			yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
 			++yy_cp;
 			}
-		while ( yy_current_state != 15 );
+		while ( yy_current_state != 16 );
 		yy_cp = yyg->yy_last_accepting_cpos;
 		yy_current_state = yyg->yy_last_accepting_state;
 
@@ -969,7 +969,7 @@ case YY_STATE_EOF(INITIAL):
  */
 static int yy_get_next_buffer (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
 	register char *source = yyg->yytext_ptr;
 	register int number_to_move, i;
@@ -1035,9 +1035,9 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 				else
 					b->yy_buf_size *= 2;
 
-				b->yy_ch_buf = (char*)
+				b->yy_ch_buf = (char *)
 					/* Include room in for 2 EOB chars. */
-					fts0trealloc((void*) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner );
+					fts0trealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner );
 				}
 			else
 				/* Can't grow it, we don't own it. */
@@ -1086,7 +1086,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 	if ((yy_size_t) (yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
 		/* Extend the array by 50%, plus the number we really need. */
 		yy_size_t new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1);
-		YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char*) fts0trealloc((void*) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner );
+		YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) fts0trealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner );
 		if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
 			YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
 	}
@@ -1106,7 +1106,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 {
 	register yy_state_type yy_current_state;
 	register char *yy_cp;
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	yy_current_state = yyg->yy_start;
 
@@ -1121,7 +1121,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 		while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 			{
 			yy_current_state = (int) yy_def[yy_current_state];
-			if ( yy_current_state >= 16 )
+			if ( yy_current_state >= 17 )
 				yy_c = yy_meta[(unsigned int) yy_c];
 			}
 		yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
@@ -1138,7 +1138,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
     static yy_state_type yy_try_NUL_trans  (yy_state_type yy_current_state , yyscan_t yyscanner)
 {
 	register int yy_is_jam;
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner; /* This var may be unused depending upon options. */
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */
 	register char *yy_cp = yyg->yy_c_buf_p;
 
 	register YY_CHAR yy_c = 1;
@@ -1150,11 +1150,11 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 	while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 		{
 		yy_current_state = (int) yy_def[yy_current_state];
-		if ( yy_current_state >= 16 )
+		if ( yy_current_state >= 17 )
 			yy_c = yy_meta[(unsigned int) yy_c];
 		}
 	yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
-	yy_is_jam = (yy_current_state == 15);
+	yy_is_jam = (yy_current_state == 16);
 
 	return yy_is_jam ? 0 : yy_current_state;
 }
@@ -1168,7 +1168,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 
 {
 	int c;
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	*yyg->yy_c_buf_p = yyg->yy_hold_char;
 
@@ -1226,7 +1226,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 			}
 		}
 
-	c = *(unsigned char*) yyg->yy_c_buf_p;	/* cast for 8-bit char's */
+	c = *(unsigned char *) yyg->yy_c_buf_p;	/* cast for 8-bit char's */
 	*yyg->yy_c_buf_p = '\0';	/* preserve yytext */
 	yyg->yy_hold_char = *++yyg->yy_c_buf_p;
 
@@ -1241,7 +1241,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
  */
     void fts0trestart  (FILE * input_file , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	if ( ! YY_CURRENT_BUFFER ){
         fts0tensure_buffer_stack (yyscanner);
@@ -1259,7 +1259,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
  */
     void fts0t_switch_to_buffer  (YY_BUFFER_STATE  new_buffer , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	/* TODO. We should be able to replace this entire function body
 	 * with
@@ -1291,7 +1291,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 
 static void fts0t_load_buffer_state  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
 	yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
 	yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
@@ -1317,7 +1317,7 @@ static void fts0t_load_buffer_state  (yyscan_t yyscanner)
 	/* yy_ch_buf has to be 2 characters longer than the size given because
 	 * we need to put in 2 end-of-buffer characters.
 	 */
-	b->yy_ch_buf = (char*) fts0talloc(b->yy_buf_size + 2 ,yyscanner );
+	b->yy_ch_buf = (char *) fts0talloc(b->yy_buf_size + 2 ,yyscanner );
 	if ( ! b->yy_ch_buf )
 		YY_FATAL_ERROR( "out of dynamic memory in fts0t_create_buffer()" );
 
@@ -1334,7 +1334,7 @@ static void fts0t_load_buffer_state  (yyscan_t yyscanner)
  */
     void fts0t_delete_buffer (YY_BUFFER_STATE  b , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	if ( ! b )
 		return;
@@ -1343,9 +1343,9 @@ static void fts0t_load_buffer_state  (yyscan_t yyscanner)
 		YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
 
 	if ( b->yy_is_our_buffer )
-		fts0tfree((void*) b->yy_ch_buf ,yyscanner );
+		fts0tfree((void *) b->yy_ch_buf ,yyscanner );
 
-	fts0tfree((void*) b ,yyscanner );
+	fts0tfree((void *) b ,yyscanner );
 }
 
 /* Initializes or reinitializes a buffer.
@@ -1356,7 +1356,7 @@ static void fts0t_load_buffer_state  (yyscan_t yyscanner)
 
 {
 	int oerrno = errno;
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	fts0t_flush_buffer(b ,yyscanner);
 
@@ -1383,7 +1383,7 @@ static void fts0t_load_buffer_state  (yyscan_t yyscanner)
  */
     void fts0t_flush_buffer (YY_BUFFER_STATE  b , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	if ( ! b )
 		return;
 
@@ -1413,7 +1413,7 @@ static void fts0t_load_buffer_state  (yyscan_t yyscanner)
  */
 void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	if (new_buffer == NULL)
 		return;
 
@@ -1444,7 +1444,7 @@ void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
  */
 void fts0tpop_buffer_state (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	if (!YY_CURRENT_BUFFER)
 		return;
 
@@ -1465,7 +1465,7 @@ void fts0tpop_buffer_state (yyscan_t yyscanner)
 static void fts0tensure_buffer_stack (yyscan_t yyscanner)
 {
 	int num_to_alloc;
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	if (!yyg->yy_buffer_stack) {
 
@@ -1474,7 +1474,7 @@ static void fts0tensure_buffer_stack (yyscan_t yyscanner)
 		 * immediate realloc on the next call.
          */
 		num_to_alloc = 1;
-		yyg->yy_buffer_stack = (struct yy_buffer_state**) fts0talloc
+		yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0talloc
 								(num_to_alloc * sizeof(struct yy_buffer_state*)
 								, yyscanner);
 		if ( ! yyg->yy_buffer_stack )
@@ -1493,7 +1493,7 @@ static void fts0tensure_buffer_stack (yyscan_t yyscanner)
 		int grow_size = 8 /* arbitrary grow size */;
 
 		num_to_alloc = yyg->yy_buffer_stack_max + grow_size;
-		yyg->yy_buffer_stack = (struct yy_buffer_state**) fts0trealloc
+		yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0trealloc
 								(yyg->yy_buffer_stack,
 								num_to_alloc * sizeof(struct yy_buffer_state*)
 								, yyscanner);
@@ -1510,7 +1510,7 @@ static void fts0tensure_buffer_stack (yyscan_t yyscanner)
  * @param base the character buffer
  * @param size the size in bytes of the character buffer
  * @param yyscanner The scanner object.
- * @return the newly allocated buffer state object.
+ * @return the newly allocated buffer state object. 
  */
 YY_BUFFER_STATE fts0t_scan_buffer  (char * base, yy_size_t  size , yyscan_t yyscanner)
 {
@@ -1571,7 +1571,7 @@ YY_BUFFER_STATE fts0t_scan_bytes  (yyconst char * yybytes, int  _yybytes_len , y
 
 	/* Get memory for full buffer, including space for trailing EOB's. */
 	n = _yybytes_len + 2;
-	buf = (char*) fts0talloc(n ,yyscanner );
+	buf = (char *) fts0talloc(n ,yyscanner );
 	if ( ! buf )
 		YY_FATAL_ERROR( "out of dynamic memory in fts0t_scan_bytes()" );
 
@@ -1626,7 +1626,7 @@ static void yy_fatal_error (yyconst char* msg ,  yyscan_t yyscanner __attribute_
  */
 YY_EXTRA_TYPE fts0tget_extra  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     return yyextra;
 }
 
@@ -1635,7 +1635,7 @@ YY_EXTRA_TYPE fts0tget_extra  (yyscan_t yyscanner)
  */
 int fts0tget_lineno  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
         if (! YY_CURRENT_BUFFER)
             return 0;
@@ -1648,7 +1648,7 @@ int fts0tget_lineno  (yyscan_t yyscanner)
  */
 int fts0tget_column  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
         if (! YY_CURRENT_BUFFER)
             return 0;
@@ -1661,7 +1661,7 @@ int fts0tget_column  (yyscan_t yyscanner)
  */
 FILE *fts0tget_in  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     return yyin;
 }
 
@@ -1670,7 +1670,7 @@ FILE *fts0tget_in  (yyscan_t yyscanner)
  */
 FILE *fts0tget_out  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     return yyout;
 }
 
@@ -1679,7 +1679,7 @@ FILE *fts0tget_out  (yyscan_t yyscanner)
  */
 int fts0tget_leng  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     return yyleng;
 }
 
@@ -1689,7 +1689,7 @@ int fts0tget_leng  (yyscan_t yyscanner)
 
 char *fts0tget_text  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     return yytext;
 }
 
@@ -1699,7 +1699,7 @@ char *fts0tget_text  (yyscan_t yyscanner)
  */
 void fts0tset_extra (YY_EXTRA_TYPE  user_defined , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     yyextra = user_defined ;
 }
 
@@ -1709,11 +1709,11 @@ void fts0tset_extra (YY_EXTRA_TYPE  user_defined , yyscan_t yyscanner)
  */
 void fts0tset_lineno (int  line_number , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
         /* lineno is only valid if an input buffer exists. */
         if (! YY_CURRENT_BUFFER )
-           yy_fatal_error( "fts0tset_lineno called with no buffer" , yyscanner);
+           yy_fatal_error( "fts0tset_lineno called with no buffer" , yyscanner); 
 
     yylineno = line_number;
 }
@@ -1724,11 +1724,11 @@ void fts0tset_lineno (int  line_number , yyscan_t yyscanner)
  */
 void fts0tset_column (int  column_no , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
         /* column is only valid if an input buffer exists. */
         if (! YY_CURRENT_BUFFER )
-           yy_fatal_error( "fts0tset_column called with no buffer" , yyscanner);
+           yy_fatal_error( "fts0tset_column called with no buffer" , yyscanner); 
 
     yycolumn = column_no;
 }
@@ -1741,25 +1741,25 @@ void fts0tset_column (int  column_no , yyscan_t yyscanner)
  */
 void fts0tset_in (FILE *  in_str , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     yyin = in_str ;
 }
 
 void fts0tset_out (FILE *  out_str , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     yyout = out_str ;
 }
 
 int fts0tget_debug  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     return yy_flex_debug;
 }
 
 void fts0tset_debug (int  bdebug , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     yy_flex_debug = bdebug ;
 }
 
@@ -1819,19 +1819,19 @@ int fts0tlex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals )
         errno = ENOMEM;
         return 1;
     }
-
+    
     /* By setting to 0xAA, we expose bugs in
     yy_init_globals. Leave at 0x00 for releases. */
     memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
 
     fts0tset_extra (yy_user_defined, *ptr_yy_globals);
-
+ 
     return yy_init_globals ( *ptr_yy_globals );
 }
 
 static int yy_init_globals (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
     /* Initialization is the same as for the non-reentrant scanner.
      * This function is called from fts0tlex_destroy(), so don't allocate here.
      */
@@ -1839,7 +1839,7 @@ static int yy_init_globals (yyscan_t yyscanner)
     yyg->yy_buffer_stack = 0;
     yyg->yy_buffer_stack_top = 0;
     yyg->yy_buffer_stack_max = 0;
-    yyg->yy_c_buf_p = (char*) 0;
+    yyg->yy_c_buf_p = (char *) 0;
     yyg->yy_init = 0;
     yyg->yy_start = 0;
 
@@ -1852,8 +1852,8 @@ static int yy_init_globals (yyscan_t yyscanner)
     yyin = stdin;
     yyout = stdout;
 #else
-    yyin = (FILE*) 0;
-    yyout = (FILE*) 0;
+    yyin = (FILE *) 0;
+    yyout = (FILE *) 0;
 #endif
 
     /* For future reference: Set errno on error, since we are called by
@@ -1865,7 +1865,7 @@ static int yy_init_globals (yyscan_t yyscanner)
 /* fts0tlex_destroy is for both reentrant and non-reentrant scanners. */
 int fts0tlex_destroy  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
     /* Pop the buffer stack, destroying each element. */
 	while(YY_CURRENT_BUFFER){
@@ -1918,24 +1918,24 @@ static int yy_flex_strlen (yyconst char * s ,  yyscan_t yyscanner __attribute__(
 
 void *fts0talloc (yy_size_t  size ,  yyscan_t yyscanner __attribute__((unused)))
 {
-	return (void*) malloc( size );
+	return (void *) malloc( size );
 }
 
 void *fts0trealloc  (void * ptr, yy_size_t  size ,  yyscan_t yyscanner __attribute__((unused)))
 {
-	/* The cast to (char*) in the following accommodates both
+	/* The cast to (char *) in the following accommodates both
 	 * implementations that use char* generic pointers, and those
 	 * that use void* generic pointers.  It works with the latter
 	 * because both ANSI C and C++ allow castless assignment from
 	 * any pointer type to void*, and deal with argument conversions
 	 * as though doing an assignment.
 	 */
-	return (void*) realloc( (char*) ptr, size );
+	return (void *) realloc( (char *) ptr, size );
 }
 
 void fts0tfree (void * ptr ,  yyscan_t yyscanner __attribute__((unused)))
 {
-	free( (char*) ptr );	/* see fts0trealloc() for (char*) cast */
+	free( (char *) ptr );	/* see fts0trealloc() for (char *) cast */
 }
 
 #define YYTABLES_NAME "yytables"
diff --git a/storage/innobase/fts/fts0tlex.l b/storage/innobase/fts/fts0tlex.l
index 8b04a9fecf1..8c42678ac7a 100644
--- a/storage/innobase/fts/fts0tlex.l
+++ b/storage/innobase/fts/fts0tlex.l
@@ -57,7 +57,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
 	return(FTS_TEXT);
 }
 
-[^" \n]*	{
+[^" \n\%]*	{
 	val->token = strdup(fts0tget_text(yyscanner));
 
 	return(FTS_TERM);
diff --git a/storage/innobase/ha/ha0ha.cc b/storage/innobase/ha/ha0ha.cc
index b58dc486cfa..3ec778f3bec 100644
--- a/storage/innobase/ha/ha0ha.cc
+++ b/storage/innobase/ha/ha0ha.cc
@@ -32,9 +32,7 @@ Created 8/22/1994 Heikki Tuuri
 #ifdef UNIV_DEBUG
 # include "buf0buf.h"
 #endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
 # include "btr0sea.h"
-#endif /* !UNIV_HOTBACKUP */
 #include "page0page.h"
 
 /*************************************************************//**
@@ -79,7 +77,6 @@ ha_create_func(
 		return(table);
 	}
 
-#ifndef UNIV_HOTBACKUP
 	if (type == MEM_HEAP_FOR_PAGE_HASH) {
 		/* We create a hash table protected by rw_locks for
 		buf_pool->page_hash. */
@@ -97,7 +94,6 @@ ha_create_func(
 		table->heaps[i] = mem_heap_create_typed(4096, type);
 		ut_a(table->heaps[i]);
 	}
-#endif /* !UNIV_HOTBACKUP */
 
 	return(table);
 }
@@ -120,7 +116,6 @@ ha_clear(
 	       || rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
 #endif /* UNIV_SYNC_DEBUG */
 
-#ifndef UNIV_HOTBACKUP
 	/* Free the memory heaps. */
 	n = table->n_sync_obj;
 
@@ -151,7 +146,6 @@ ha_clear(
 	table->n_sync_obj = 0;
 	table->type = HASH_TABLE_SYNC_NONE;
 
-#endif /* !UNIV_HOTBACKUP */
 
 	/* Clear the hash table. */
 	n = hash_get_n_cells(table);
@@ -179,7 +173,7 @@ ha_insert_for_fold_func(
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	buf_block_t*	block,	/*!< in: buffer block containing the data */
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-	const rec_t*	data)	/*!< in: data, must not be NULL */
+	rec_t*		data)	/*!< in: data, must not be NULL */
 {
 	hash_cell_t*	cell;
 	ha_node_t*	node;
@@ -215,7 +209,7 @@ ha_insert_for_fold_func(
 
 			prev_node->block = block;
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-			prev_node->data = (rec_t*) data;
+			prev_node->data = data;
 
 			return(TRUE);
 		}
@@ -237,7 +231,7 @@ ha_insert_for_fold_func(
 		return(FALSE);
 	}
 
-	ha_node_set_data(node, block, (rec_t*) data);
+	ha_node_set_data(node, block, data);
 
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	if (table->adaptive) {
diff --git a/storage/innobase/ha/hash0hash.cc b/storage/innobase/ha/hash0hash.cc
index 99128a676d5..174b6bcb57e 100644
--- a/storage/innobase/ha/hash0hash.cc
+++ b/storage/innobase/ha/hash0hash.cc
@@ -106,14 +106,14 @@ void
 hash_mutex_exit_all_but(
 /*====================*/
 	hash_table_t*	table,		/*!< in: hash table */
-	mutex_t*	keep_mutex)	/*!< in: mutex to keep */
+	ib_mutex_t*	keep_mutex)	/*!< in: mutex to keep */
 {
 	ulint	i;
 
 	ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
 	for (i = 0; i < table->n_sync_obj; i++) {
 
-		mutex_t* mutex = table->sync_obj.mutexes + i;
+		ib_mutex_t* mutex = table->sync_obj.mutexes + i;
 		if (UNIV_LIKELY(keep_mutex != mutex)) {
 			mutex_exit(mutex);
 		}
@@ -373,8 +373,8 @@ hash_create_sync_obj_func(
 
 	switch (type) {
 	case HASH_TABLE_SYNC_MUTEX:
-		table->sync_obj.mutexes = static_cast<mutex_t*>(
-			mem_alloc(n_sync_obj * sizeof(mutex_t)));
+		table->sync_obj.mutexes = static_cast<ib_mutex_t*>(
+			mem_alloc(n_sync_obj * sizeof(ib_mutex_t)));
 
 		for (i = 0; i < n_sync_obj; i++) {
 			mutex_create(hash_table_mutex_key,
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index e19fe47e81a..44bbe20c8d3 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -3,6 +3,7 @@
 Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2008, 2009 Google Inc.
 Copyright (c) 2009, Percona Inc.
+Copyright (c) 2012, Facebook Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -36,8 +37,10 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 #include <sql_acl.h>	// PROCESS_ACL
 #include <debug_sync.h> // DEBUG_SYNC
+#include <my_base.h>	// HA_OPTION_*
 #include <mysys_err.h>
 #include <innodb_priv.h>
+
 #ifdef _WIN32
 #include <io.h>
 #endif
@@ -57,8 +60,10 @@ this program; if not, write to the Free Software Foundation, Inc.,
 #include "srv0srv.h"
 #include "trx0roll.h"
 #include "trx0trx.h"
+
 #include "trx0sys.h"
 #include "mtr0mtr.h"
+#include "rem0types.h"
 #include "row0ins.h"
 #include "row0mysql.h"
 #include "row0sel.h"
@@ -75,14 +80,24 @@ this program; if not, write to the Free Software Foundation, Inc.,
 #include "row0merge.h"
 #include "dict0boot.h"
 #include "dict0stats.h"
+#include "dict0stats_bg.h"
 #include "ha_prototypes.h"
 #include "ut0mem.h"
 #include "ibuf0ibuf.h"
 #include "dict0dict.h"
 #include "srv0mon.h"
+#include "api0api.h"
+#include "api0misc.h"
 #include "pars0pars.h"
 #include "fts0fts.h"
 #include "fts0types.h"
+#include "row0import.h"
+#include "row0quiesce.h"
+#ifdef UNIV_DEBUG
+#include "trx0purge.h"
+#endif /* UNIV_DEBUG */
+#include "fts0priv.h"
+#include "page0zip.h"
 
 #include "ha_innodb.h"
 #include "i_s.h"
@@ -112,11 +127,9 @@ static const long AUTOINC_NEW_STYLE_LOCKING = 1;
 static const long AUTOINC_NO_LOCKING = 2;
 
 static long innobase_mirrored_log_groups;
-static long innobase_log_files_in_group;
 static long innobase_log_buffer_size;
 static long innobase_additional_mem_pool_size;
 static long innobase_file_io_threads;
-static long innobase_force_recovery;
 static long innobase_open_files;
 static long innobase_autoinc_lock_mode;
 static ulong innobase_commit_concurrency = 0;
@@ -134,12 +147,13 @@ static uint innobase_old_blocks_pct;
 of the buffer pool. */
 static uint innobase_change_buffer_max_size = CHANGE_BUFFER_DEFAULT_SIZE;
 
+static ulong innobase_compression_level = DEFAULT_COMPRESSION_LEVEL;
+
 /* The default values for the following char* start-up parameters
 are determined in innobase_init below: */
 
 static char*	innobase_data_home_dir			= NULL;
 static char*	innobase_data_file_path			= NULL;
-static char*	innobase_log_group_home_dir		= NULL;
 static char*	innobase_file_format_name		= NULL;
 static char*	innobase_change_buffering		= NULL;
 static char*	innobase_enable_monitor_counter		= NULL;
@@ -176,7 +190,6 @@ static my_bool	innobase_stats_on_metadata		= TRUE;
 static my_bool	innobase_large_prefix			= FALSE;
 static my_bool	innodb_optimize_fulltext_only		= FALSE;
 
-
 static char*	internal_innobase_data_file_path	= NULL;
 
 static char*	innodb_version_str = (char*) INNODB_VERSION_STR;
@@ -250,6 +263,11 @@ const struct _ft_vft ft_vft_result = {NULL,
 				      innobase_fts_retrieve_ranking,
 				      NULL};
 
+const struct _ft_vft_ext ft_vft_ext_result = {innobase_fts_get_version,
+					      innobase_fts_flags,
+					      innobase_fts_retrieve_docid,
+					      innobase_fts_count_matches};
+
 #ifdef HAVE_PSI_INTERFACE
 /* Keys to register pthread mutexes/cond in the current file with
 performance schema */
@@ -262,8 +280,7 @@ static mysql_pfs_key_t	pending_checkpoint_mutex_key;
 static PSI_mutex_info	all_pthread_mutexes[] = {
 	{&commit_threads_m_key, "commit_threads_m", 0},
 	{&commit_cond_mutex_key, "commit_cond_mutex", 0},
-	{&innobase_share_mutex_key, "innobase_share_mutex", 0},
-	{&pending_checkpoint_mutex_key, "pending_checkpoint_mutex", 0}
+	{&innobase_share_mutex_key, "innobase_share_mutex", 0}
 };
 
 static PSI_cond_info	all_innodb_conds[] = {
@@ -306,8 +323,10 @@ static PSI_mutex_info all_innodb_mutexes[] = {
 #  endif /* UNIV_MEM_DEBUG */
 	{&mem_pool_mutex_key, "mem_pool_mutex", 0},
 	{&mutex_list_mutex_key, "mutex_list_mutex", 0},
+	{&page_zip_stat_per_index_mutex_key, "page_zip_stat_per_index_mutex", 0},
 	{&purge_sys_bh_mutex_key, "purge_sys_bh_mutex", 0},
 	{&recv_sys_mutex_key, "recv_sys_mutex", 0},
+	{&recv_writer_mutex_key, "recv_writer_mutex", 0},
 	{&rseg_mutex_key, "rseg_mutex", 0},
 #  ifdef UNIV_SYNC_DEBUG
 	{&rw_lock_debug_mutex_key, "rw_lock_debug_mutex", 0},
@@ -336,8 +355,12 @@ static PSI_mutex_info all_innodb_mutexes[] = {
 #ifndef HAVE_ATOMIC_BUILTINS
 	{&srv_conc_mutex_key, "srv_conc_mutex", 0},
 #endif /* !HAVE_ATOMIC_BUILTINS */
+#ifndef HAVE_ATOMIC_BUILTINS_64
+	{&monitor_mutex_key, "monitor_mutex", 0},
+#endif /* !HAVE_ATOMIC_BUILTINS_64 */
 	{&ut_list_mutex_key, "ut_list_mutex", 0},
 	{&trx_sys_mutex_key, "trx_sys_mutex", 0},
+	{&zip_pad_mutex_key, "zip_pad_mutex", 0},
 };
 # endif /* UNIV_PFS_MUTEX */
 
@@ -364,6 +387,7 @@ static PSI_rwlock_info all_innodb_rwlocks[] = {
 	{&trx_i_s_cache_lock_key, "trx_i_s_cache_lock", 0},
 	{&trx_purge_latch_key, "trx_purge_latch", 0},
 	{&index_tree_rw_lock_key, "index_tree_rw_lock", 0},
+	{&index_online_log_key, "index_online_log", 0},
 	{&dict_table_stats_latch_key, "dict_table_stats", 0},
 	{&hash_table_rw_lock_key, "hash table locks", 0}
 };
@@ -381,7 +405,8 @@ static PSI_thread_info	all_innodb_threads[] = {
 	{&srv_monitor_thread_key, "srv_monitor_thread", 0},
 	{&srv_master_thread_key, "srv_master_thread", 0},
 	{&srv_purge_thread_key, "srv_purge_thread", 0},
-	{&buf_page_cleaner_thread_key, "page_cleaner_thread", 0}
+	{&buf_page_cleaner_thread_key, "page_cleaner_thread", 0},
+	{&recv_writer_thread_key, "recovery writer thread", 0}
 };
 # endif /* UNIV_PFS_THREAD */
 
@@ -396,6 +421,70 @@ static PSI_file_info	all_innodb_files[] = {
 # endif /* UNIV_PFS_IO */
 #endif /* HAVE_PSI_INTERFACE */
 
+/** Always normalize table name to lower case on Windows */
+#ifdef __WIN__
+#define normalize_table_name(norm_name, name)           \
+	normalize_table_name_low(norm_name, name, TRUE)
+#else
+#define normalize_table_name(norm_name, name)           \
+	normalize_table_name_low(norm_name, name, FALSE)
+#endif /* __WIN__ */
+
+/** Set up InnoDB API callback function array */
+ib_cb_t innodb_api_cb[] = {
+	(ib_cb_t) ib_cursor_open_table,
+	(ib_cb_t) ib_cursor_read_row,
+	(ib_cb_t) ib_cursor_insert_row,
+	(ib_cb_t) ib_cursor_delete_row,
+	(ib_cb_t) ib_cursor_update_row,
+	(ib_cb_t) ib_cursor_moveto,
+	(ib_cb_t) ib_cursor_first,
+	(ib_cb_t) ib_cursor_next,
+	(ib_cb_t) ib_cursor_last,
+	(ib_cb_t) ib_cursor_set_match_mode,
+	(ib_cb_t) ib_sec_search_tuple_create,
+	(ib_cb_t) ib_clust_read_tuple_create,
+	(ib_cb_t) ib_tuple_delete,
+	(ib_cb_t) ib_tuple_copy,
+	(ib_cb_t) ib_tuple_read_u32,
+	(ib_cb_t) ib_tuple_write_u32,
+	(ib_cb_t) ib_tuple_read_u64,
+	(ib_cb_t) ib_tuple_write_u64,
+	(ib_cb_t) ib_tuple_read_i32,
+	(ib_cb_t) ib_tuple_write_i32,
+	(ib_cb_t) ib_tuple_read_i64,
+	(ib_cb_t) ib_tuple_write_i64,
+	(ib_cb_t) ib_tuple_get_n_cols,
+	(ib_cb_t) ib_col_set_value,
+	(ib_cb_t) ib_col_get_value,
+	(ib_cb_t) ib_col_get_meta,
+	(ib_cb_t) ib_trx_begin,
+	(ib_cb_t) ib_trx_commit,
+	(ib_cb_t) ib_trx_rollback,
+	(ib_cb_t) ib_trx_start,
+	(ib_cb_t) ib_trx_release,
+	(ib_cb_t) ib_trx_state,
+	(ib_cb_t) ib_cursor_lock,
+	(ib_cb_t) ib_cursor_close,
+	(ib_cb_t) ib_cursor_new_trx,
+	(ib_cb_t) ib_cursor_reset,
+	(ib_cb_t) ib_open_table_by_name,
+	(ib_cb_t) ib_col_get_name,
+	(ib_cb_t) ib_table_truncate,
+	(ib_cb_t) ib_cursor_open_index_using_name,
+	(ib_cb_t) ib_close_thd,
+	(ib_cb_t) ib_cfg_get_cfg,
+	(ib_cb_t) ib_cursor_set_cluster_access,
+	(ib_cb_t) ib_cursor_commit_trx,
+	(ib_cb_t) ib_cfg_trx_level,
+	(ib_cb_t) ib_tuple_get_n_user_cols,
+	(ib_cb_t) ib_cursor_set_lock_mode,
+	(ib_cb_t) ib_cursor_clear_trx,
+	(ib_cb_t) ib_get_idx_field_name,
+	(ib_cb_t) ib_trx_get_start_time,
+	(ib_cb_t) ib_cfg_bk_commit_interval
+};
+
 /*************************************************************//**
 Check whether valid argument given to innodb_ft_*_stopword_table.
 This function is registered as a callback with MySQL.
@@ -410,24 +499,10 @@ innodb_stopword_table_validate(
 	void*				save,	/*!< out: immediate result
 						for update function */
 	struct st_mysql_value*		value);	/*!< in: incoming string */
-/****************************************************************//**
-Update the session variable innodb_session_stopword_table
-with the "saved" stopword table name value. This function
-is registered as a callback with MySQL. */
-static
-void
-innodb_session_stopword_update(
-/*===========================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
-	const void*			save);	/*!< in: immediate result
-						from check function */
-/** "GEN_CLUST_INDEX" is the name reserved for Innodb default
-system primary index. */
-static const char innobase_index_reserve_name[]= "GEN_CLUST_INDEX";
+
+/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
+system clustered index when there is no primary key. */
+const char innobase_index_reserve_name[] = "GEN_CLUST_INDEX";
 
 static const char innobase_hton_name[]= "InnoDB";
 
@@ -450,19 +525,14 @@ static MYSQL_THDVAR_BOOL(ft_enable_stopword, PLUGIN_VAR_OPCMDARG,
   NULL, NULL,
   /* default */ TRUE);
 
-static MYSQL_THDVAR_BOOL(analyze_is_persistent, PLUGIN_VAR_OPCMDARG,
-  "ANALYZE TABLE in InnoDB uses a more precise (and slow) sampling "
-  "algorithm and saves the results persistently.",
-  /* check_func */ NULL, /* update_func */ NULL,
-  /* default */ FALSE);
-
 static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
   "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
   NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
 
-static MYSQL_THDVAR_STR(ft_user_stopword_table, PLUGIN_VAR_OPCMDARG,
+static MYSQL_THDVAR_STR(ft_user_stopword_table,
+  PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC,
   "User supplied stopword table name, effective in the session level.",
-  innodb_stopword_table_validate, innodb_session_stopword_update, NULL);
+  innodb_stopword_table_validate, NULL, NULL);
 
 static SHOW_VAR innodb_status_variables[]= {
   {"buffer_pool_dump_status",
@@ -471,8 +541,12 @@ static SHOW_VAR innodb_status_variables[]= {
   (char*) &export_vars.innodb_buffer_pool_load_status,	  SHOW_CHAR},
   {"buffer_pool_pages_data",
   (char*) &export_vars.innodb_buffer_pool_pages_data,	  SHOW_LONG},
+  {"buffer_pool_bytes_data",
+  (char*) &export_vars.innodb_buffer_pool_bytes_data,	  SHOW_LONG},
   {"buffer_pool_pages_dirty",
   (char*) &export_vars.innodb_buffer_pool_pages_dirty,	  SHOW_LONG},
+  {"buffer_pool_bytes_dirty",
+  (char*) &export_vars.innodb_buffer_pool_bytes_dirty,	  SHOW_LONG},
   {"buffer_pool_pages_flushed",
   (char*) &export_vars.innodb_buffer_pool_pages_flushed,  SHOW_LONG},
   {"buffer_pool_pages_free",
@@ -567,6 +641,12 @@ static SHOW_VAR innodb_status_variables[]= {
   (char*) &export_vars.innodb_truncated_status_writes,	  SHOW_LONG},
   {"available_undo_logs",
   (char*) &export_vars.innodb_available_undo_logs,        SHOW_LONG},
+#ifdef UNIV_DEBUG
+  {"purge_trx_id_age",
+  (char*) &export_vars.innodb_purge_trx_id_age,           SHOW_LONG},
+  {"purge_view_trx_id_age",
+  (char*) &export_vars.innodb_purge_view_trx_id_age,      SHOW_LONG},
+#endif /* UNIV_DEBUG */
   {NullS, NullS, SHOW_LONG}
 };
 
@@ -598,18 +678,8 @@ innobase_close_connection(
 	THD*		thd);		/*!< in: MySQL thread handle for
 					which to close the connection */
 
-static
-void
-innobase_commit_ordered(
-/*======================*/
-        handlerton *hton,               /*!< in/out: Innodb handlerton */
-        THD* thd,                       /*!< in: MySQL thread handle */
-        bool all);			/*!< in: TRUE - commit transaction
-                                             FALSE - the current SQL statement
-                                             ended */
-static
-void
-innobase_kill_query(handlerton *hton, THD* thd, enum thd_kill_levels level);
+static void innobase_kill_query(handlerton *hton, THD* thd, enum thd_kill_levels level);
+static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
 
 /*****************************************************************//**
 Commits a transaction in an InnoDB database or marks an SQL statement
@@ -684,14 +754,7 @@ innobase_release_savepoint(
 					savepoint should be released */
 	void*		savepoint);	/*!< in: savepoint data */
 
-/*****************************************************************//**
-Handle a commit checkpoint request from server layer.
-We simply flush the redo log immediately and do the notify call.*/
-static
-void
-innobase_checkpoint_request(
-	handlerton *hton,
-	void *cookie);
+static void innobase_checkpoint_request(handlerton *hton, void *cookie);
 
 /************************************************************************//**
 Function for constructing an InnoDB table handler instance. */
@@ -745,13 +808,6 @@ int
 innobase_file_format_validate_and_set(
 /*==================================*/
 	const char*	format_max);	/*!< in: parameter value */
-/****************************************************************//**
-Return alter table flags supported in an InnoDB database. */
-static
-uint
-innobase_alter_table_flags(
-/*=======================*/
-	uint	flags);
 
 /*******************************************************************//**
 This function is used to prepare an X/Open XA distributed transaction.
@@ -925,6 +981,21 @@ innodb_enable_monitor_at_startup(
 /*=============================*/
 	char*	str);	/*!< in: monitor counter enable list */
 
+/*********************************************************************
+Normalizes a table name string. A normalized name consists of the
+database name catenated to '/' and table name. An example:
+test/mytable. On Windows normalization puts both the database name and the
+table name always to lower case if "set_lower_case" is set to TRUE. */
+static
+void
+normalize_table_name_low(
+/*=====================*/
+	char*           norm_name,      /* out: normalized name as a
+					null-terminated string */
+	const char*     name,           /* in: table name string */
+	ibool           set_lower_case); /* in: TRUE if we want to set
+					 name to lower case */
+
 /*************************************************************//**
 Check for a valid value of innobase_commit_concurrency.
 @return	0 for valid innodb_commit_concurrency */
@@ -967,7 +1038,7 @@ innobase_create_handler(
 	TABLE_SHARE*	table,
 	MEM_ROOT*	mem_root)
 {
-	return new (mem_root) ha_innobase(hton, table);
+	return(new (mem_root) ha_innobase(hton, table));
 }
 
 /* General functions */
@@ -1008,9 +1079,22 @@ UNIV_INTERN
 ibool
 thd_is_replication_slave_thread(
 /*============================*/
-	void*	thd)	/*!< in: thread handle (THD*) */
+	THD*	thd)	/*!< in: thread handle */
+{
+	return((ibool) thd_slave_thread(thd));
+}
+
+/******************************************************************//**
+Gets information on the durability property requested by thread.
+Used when writing either a prepare or commit record to the log
+buffer. @return the durability property. */
+UNIV_INTERN
+enum durability_properties
+thd_requested_durability(
+/*=====================*/
+	const THD* thd)	/*!< in: thread handle */
 {
-	return((ibool) thd_slave_thread((THD*) thd));
+	return(thd_get_durability_property(thd));
 }
 
 /******************************************************************//**
@@ -1020,10 +1104,9 @@ UNIV_INTERN
 ibool
 thd_trx_is_read_only(
 /*=================*/
-	void*	thd)	/*!< in: thread handle (THD*) */
+	THD*	thd)	/*!< in: thread handle */
 {
-	/* Waiting on WL#6046 to complete. */
-	return(FALSE);
+	return(thd != 0 && thd_tx_is_read_only(thd));
 }
 
 /******************************************************************//**
@@ -1034,11 +1117,11 @@ UNIV_INTERN
 ibool
 thd_trx_is_auto_commit(
 /*===================*/
-	void*	thd)	/*!< in: thread handle (THD*) can be NULL */
+	THD*	thd)	/*!< in: thread handle, can be NULL */
 {
 	return(thd != NULL
 	       && !thd_test_options(
-		       static_cast<THD*>(thd),
+		       thd,
 		       OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)
 	       && thd_is_select(thd));
 }
@@ -1114,6 +1197,17 @@ innobase_srv_conc_force_exit_innodb(
 }
 
 /******************************************************************//**
+Returns the NUL terminated value of glob_hostname.
+@return	pointer to glob_hostname. */
+UNIV_INTERN
+const char*
+server_get_hostname()
+/*=================*/
+{
+	return(glob_hostname);
+}
+
+/******************************************************************//**
 Returns true if the transaction this thread is processing has edited
 non-transactional tables. Used by the deadlock detector when deciding
 which transaction to rollback in case of a deadlock - we try to avoid
@@ -1123,9 +1217,9 @@ UNIV_INTERN
 ibool
 thd_has_edited_nontrans_tables(
 /*===========================*/
-	void*	thd)	/*!< in: thread handle (THD*) */
+	THD*	thd)	/*!< in: thread handle */
 {
-	return((ibool) thd_non_transactional_update((THD*) thd));
+	return((ibool) thd_non_transactional_update(thd));
 }
 
 /******************************************************************//**
@@ -1135,9 +1229,9 @@ UNIV_INTERN
 ibool
 thd_is_select(
 /*==========*/
-	const void*	thd)	/*!< in: thread handle (THD*) */
+	const THD*	thd)	/*!< in: thread handle */
 {
-	return(thd_sql_command((const THD*) thd) == SQLCOM_SELECT);
+	return(thd_sql_command(thd) == SQLCOM_SELECT);
 }
 
 /******************************************************************//**
@@ -1148,10 +1242,10 @@ UNIV_INTERN
 ibool
 thd_supports_xa(
 /*============*/
-	void*	thd)	/*!< in: thread handle (THD*), or NULL to query
+	THD*	thd)	/*!< in: thread handle, or NULL to query
 			the global innodb_supports_xa */
 {
-	return(THDVAR((THD*) thd, support_xa));
+	return(THDVAR(thd, support_xa));
 }
 
 /******************************************************************//**
@@ -1161,12 +1255,12 @@ UNIV_INTERN
 ulong
 thd_lock_wait_timeout(
 /*==================*/
-	void*	thd)	/*!< in: thread handle (THD*), or NULL to query
+	THD*	thd)	/*!< in: thread handle, or NULL to query
 			the global innodb_lock_wait_timeout */
 {
 	/* According to <mysql/plugin.h>, passing thd == NULL
 	returns the global value of the session variable. */
-	return(THDVAR((THD*) thd, lock_wait_timeout));
+	return(THDVAR(thd, lock_wait_timeout));
 }
 
 /******************************************************************//**
@@ -1175,17 +1269,18 @@ UNIV_INTERN
 void
 thd_set_lock_wait_time(
 /*===================*/
-	void*	thd,	/*!< in: thread handle (THD*) */
+	THD*	thd,	/*!< in/out: thread handle */
 	ulint	value)	/*!< in: time waited for the lock */
 {
 	if (thd) {
-		thd_storage_lock_wait((THD*) thd, value);
+		thd_storage_lock_wait(thd, value);
 	}
 }
 
 /********************************************************************//**
 Obtain the InnoDB transaction of a MySQL thread.
 @return	reference to transaction pointer */
+__attribute__((warn_unused_result, nonnull))
 static inline
 trx_t*&
 thd_to_trx(
@@ -1245,11 +1340,11 @@ Converts an InnoDB error code to a MySQL error code and also tells to MySQL
 about a possible transaction rollback inside InnoDB caused by a lock wait
 timeout or a deadlock.
 @return	MySQL error code */
-UNIV_INTERN
+static
 int
 convert_error_code_to_mysql(
 /*========================*/
-	int	error,	/*!< in: InnoDB error code */
+	dberr_t	error,	/*!< in: InnoDB error code */
 	ulint	flags,  /*!< in: InnoDB table flags, or 0 */
 	THD*	thd)	/*!< in: user thread handle or NULL */
 {
@@ -1287,7 +1382,7 @@ convert_error_code_to_mysql(
 		return(HA_ERR_FOUND_DUPP_KEY);
 
 	case DB_READ_ONLY:
-		return(HA_ERR_READ_ONLY_TRANSACTION);
+		return(HA_ERR_TABLE_READONLY);
 
 	case DB_FOREIGN_DUPLICATE_KEY:
 		return(HA_ERR_FOREIGN_DUPLICATE_KEY);
@@ -1344,12 +1439,19 @@ convert_error_code_to_mysql(
 	case DB_OUT_OF_FILE_SPACE:
 		return(HA_ERR_RECORD_FILE_FULL);
 
+	case DB_TABLE_IN_FK_CHECK:
+		return(HA_ERR_TABLE_IN_FK_CHECK);
+
 	case DB_TABLE_IS_BEING_USED:
 		return(HA_ERR_WRONG_COMMAND);
 
+	case DB_TABLESPACE_DELETED:
 	case DB_TABLE_NOT_FOUND:
 		return(HA_ERR_NO_SUCH_TABLE);
 
+	case DB_TABLESPACE_NOT_FOUND:
+		return(HA_ERR_NO_SUCH_TABLE);
+
 	case DB_TOO_BIG_RECORD: {
 		/* If prefix is true then a 768-byte prefix is stored
 		locally for BLOB fields. Refer to dict_table_get_format() */
@@ -1365,7 +1467,7 @@ convert_error_code_to_mysql(
 			"or ROW_FORMAT=COMPRESSED ": "",
 			prefix ? DICT_MAX_FIXED_COL_LEN : 0);
 		return(HA_ERR_TO_BIG_ROW);
-        }
+	}
 
 	case DB_TOO_BIG_INDEX_COL:
 		my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
@@ -1386,27 +1488,21 @@ convert_error_code_to_mysql(
 
 		return(HA_ERR_LOCK_TABLE_FULL);
 
-	case DB_PRIMARY_KEY_IS_NULL:
-		return(ER_PRIMARY_CANT_HAVE_NULL);
-
 	case DB_FTS_INVALID_DOCID:
 		return(HA_FTS_INVALID_DOCID);
 
 	case DB_TOO_MANY_CONCURRENT_TRXS:
-		/* New error code HA_ERR_TOO_MANY_CONCURRENT_TRXS is only
-		available in 5.1.38 and later, but the plugin should still
-		work with previous versions of MySQL. */
-#ifdef HA_ERR_TOO_MANY_CONCURRENT_TRXS
 		return(HA_ERR_TOO_MANY_CONCURRENT_TRXS);
-#else /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */
-		return(HA_ERR_RECORD_FILE_FULL);
-#endif /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */
 	case DB_UNSUPPORTED:
 		return(HA_ERR_UNSUPPORTED);
 	case DB_INDEX_CORRUPT:
 		return(HA_ERR_INDEX_CORRUPT);
 	case DB_UNDO_RECORD_TOO_BIG:
 		return(HA_ERR_UNDO_REC_TOO_BIG);
+	case DB_OUT_OF_MEMORY:
+		return(HA_ERR_OUT_OF_MEM);
+	case DB_TABLESPACE_EXISTS:
+		return(HA_ERR_TABLESPACE_EXISTS);
 	}
 }
 
@@ -1417,18 +1513,30 @@ void
 innobase_mysql_print_thd(
 /*=====================*/
 	FILE*	f,		/*!< in: output stream */
-	void*	thd,		/*!< in: pointer to a MySQL THD object */
+	THD*	thd,		/*!< in: MySQL THD object */
 	uint	max_query_len)	/*!< in: max query length to print, or 0 to
 				use the default max length */
 {
 	char	buffer[1024];
 
-	fputs(thd_security_context((THD*) thd, buffer, sizeof buffer,
+	fputs(thd_security_context(thd, buffer, sizeof buffer,
 				   max_query_len), f);
 	putc('\n', f);
 }
 
 /******************************************************************//**
+Get the error message format string.
+@return the format string or 0 if not found. */
+UNIV_INTERN
+const char*
+innobase_get_err_msg(
+/*=================*/
+	int	error_code)	/*!< in: MySQL error code */
+{
+	return(my_get_err_msg(error_code));
+}
+
+/******************************************************************//**
 Get the variable length bounds of the given character set. */
 UNIV_INTERN
 void
@@ -1456,7 +1564,7 @@ innobase_get_cset_width(
 
 			/* Fix bug#46256: allow tables to be dropped if the
 			collation is not found, but issue a warning. */
-			if ((global_system_variables.log_warnings)
+			if ((log_warnings)
 			    && (cset != 0)){
 
 				sql_print_warning(
@@ -1572,9 +1680,9 @@ UNIV_INTERN
 struct charset_info_st*
 innobase_get_charset(
 /*=================*/
-	void*	mysql_thd)	/*!< in: MySQL thread handle */
+	THD*	mysql_thd)	/*!< in: MySQL thread handle */
 {
-	return(thd_charset((THD*) mysql_thd));
+	return(thd_charset(mysql_thd));
 }
 
 /**********************************************************************//**
@@ -1584,12 +1692,12 @@ UNIV_INTERN
 const char*
 innobase_get_stmt(
 /*==============*/
-	void*	mysql_thd,	/*!< in: MySQL thread handle */
+	THD*	thd,		/*!< in: MySQL thread handle */
 	size_t*	length)		/*!< out: length of the SQL statement */
 {
 	LEX_STRING* stmt;
 
-	stmt = thd_query_string((THD*) mysql_thd);
+	stmt = thd_query_string(thd);
 	*length = stmt->length;
 	return(stmt->str);
 }
@@ -1621,99 +1729,6 @@ innobase_get_lower_case_table_names(void)
 	return(lower_case_table_names);
 }
 
-#if defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN)
-extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list;
-/*******************************************************************//**
-Map an OS error to an errno value. The OS error number is stored in
-_doserrno and the mapped value is stored in errno) */
-void __cdecl
-_dosmaperr(
-	unsigned long);	/*!< in: OS error value */
-
-/*********************************************************************//**
-Creates a temporary file.
-@return	temporary file descriptor, or < 0 on error */
-UNIV_INTERN
-int
-innobase_mysql_tmpfile(void)
-/*========================*/
-{
-	int	fd;				/* handle of opened file */
-	HANDLE	osfh;				/* OS handle of opened file */
-	char*	tmpdir;				/* point to the directory
-						where to create file */
-	TCHAR	path_buf[MAX_PATH - 14];	/* buffer for tmp file path.
-						The length cannot be longer
-						than MAX_PATH - 14, or
-						GetTempFileName will fail. */
-	char	filename[MAX_PATH];		/* name of the tmpfile */
-	DWORD	fileaccess = GENERIC_READ	/* OS file access */
-			     | GENERIC_WRITE
-			     | DELETE;
-	DWORD	fileshare = FILE_SHARE_READ	/* OS file sharing mode */
-			    | FILE_SHARE_WRITE
-			    | FILE_SHARE_DELETE;
-	DWORD	filecreate = CREATE_ALWAYS;	/* OS method of open/create */
-	DWORD	fileattrib =			/* OS file attribute flags */
-			     FILE_ATTRIBUTE_NORMAL
-			     | FILE_FLAG_DELETE_ON_CLOSE
-			     | FILE_ATTRIBUTE_TEMPORARY
-			     | FILE_FLAG_SEQUENTIAL_SCAN;
-
-	DBUG_ENTER("innobase_mysql_tmpfile");
-
-	tmpdir = my_tmpdir(&mysql_tmpdir_list);
-
-	/* The tmpdir parameter can not be NULL for GetTempFileName. */
-	if (!tmpdir) {
-		uint	ret;
-
-		/* Use GetTempPath to determine path for temporary files. */
-		ret = GetTempPath(sizeof(path_buf), path_buf);
-		if (ret > sizeof(path_buf) || (ret == 0)) {
-
-			_dosmaperr(GetLastError());	/* map error */
-			DBUG_RETURN(-1);
-		}
-
-		tmpdir = path_buf;
-	}
-
-	/* Use GetTempFileName to generate a unique filename. */
-	if (!GetTempFileName(tmpdir, "ib", 0, filename)) {
-
-		_dosmaperr(GetLastError());	/* map error */
-		DBUG_RETURN(-1);
-	}
-
-	DBUG_PRINT("info", ("filename: %s", filename));
-
-	/* Open/Create the file. */
-	osfh = CreateFile(filename, fileaccess, fileshare, NULL,
-			  filecreate, fileattrib, NULL);
-	if (osfh == INVALID_HANDLE_VALUE) {
-
-		/* open/create file failed! */
-		_dosmaperr(GetLastError());	/* map error */
-		DBUG_RETURN(-1);
-	}
-
-	do {
-		/* Associates a CRT file descriptor with the OS file handle. */
-		fd = _open_osfhandle((intptr_t) osfh, 0);
-	} while (fd == -1 && errno == EINTR);
-
-	if (fd == -1) {
-		/* Open failed, close the file handle. */
-
-		_dosmaperr(GetLastError());	/* map error */
-		CloseHandle(osfh);		/* no need to check if
-						CloseHandle fails */
-	}
-
-	DBUG_RETURN(fd);
-}
-#else
 /*********************************************************************//**
 Creates a temporary file.
 @return	temporary file descriptor, or < 0 on error */
@@ -1724,6 +1739,9 @@ innobase_mysql_tmpfile(void)
 {
 	int	fd2 = -1;
 	File	fd = mysql_tmpfile("ib");
+
+	DBUG_EXECUTE_IF("innobase_tmpfile_creation_failure", return(-1););
+
 	if (fd >= 0) {
 		/* Copy the file descriptor, so that the additional resources
 		allocated by create_temp_file() can be freed by invoking
@@ -1767,7 +1785,6 @@ innobase_mysql_tmpfile(void)
 	}
 	return(fd2);
 }
-#endif /* defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) */
 
 /*********************************************************************//**
 Wrapper around MySQL's copy_and_convert function.
@@ -1845,11 +1862,11 @@ values we want to reserve for multi-value inserts e.g.,
 
 	INSERT INTO T VALUES(), (), ();
 
-innobase_next_autoinc() will be called with increment set to
-n * 3 where autoinc_lock_mode != TRADITIONAL because we want
-to reserve 3 values for the multi-value INSERT above.
+innobase_next_autoinc() will be called with increment set to 3 where
+autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
+the multi-value INSERT above.
 @return	the next value */
-static
+UNIV_INTERN
 ulonglong
 innobase_next_autoinc(
 /*==================*/
@@ -1886,6 +1903,7 @@ innobase_next_autoinc(
 	in reality a negative value.The visual studio compilers converts
 	large double values automatically into unsigned long long datatype
 	maximum value */
+
 	if (block >= max_value
 	    || offset > max_value
 	    || current >= max_value
@@ -2055,7 +2073,7 @@ trx_deregister_from_2pc(
 	trx_t*	trx)	/* in: transaction */
 {
 	trx->is_registered = 0;
-	trx->active_commit_ordered = 0;
+        trx->active_commit_ordered = 0;
 }
 
 /*********************************************************************//**
@@ -2082,6 +2100,78 @@ trx_is_started(
 }
 
 /*********************************************************************//**
+Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
+Those flags are stored in .frm file and end up in the MySQL table object,
+but are frequently used inside InnoDB so we keep their copies into the
+InnoDB table object. */
+UNIV_INTERN
+void
+innobase_copy_frm_flags_from_create_info(
+/*=====================================*/
+	dict_table_t*	innodb_table,		/*!< in/out: InnoDB table */
+	HA_CREATE_INFO*	create_info)		/*!< in: create info */
+{
+	ibool	ps_on;
+	ibool	ps_off;
+
+	if (dict_table_is_temporary(innodb_table) || srv_read_only_mode) {
+		/* Temp tables do not use persistent stats. */
+		ps_on = FALSE;
+		ps_off = TRUE;
+	} else {
+		ps_on = create_info->table_options
+			& HA_OPTION_STATS_PERSISTENT;
+		ps_off = create_info->table_options
+			& HA_OPTION_NO_STATS_PERSISTENT;
+	}
+
+	dict_stats_set_persistent(innodb_table, ps_on, ps_off);
+
+	dict_stats_auto_recalc_set(
+		innodb_table,
+		create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON,
+		create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF);
+
+	innodb_table->stats_sample_pages = create_info->stats_sample_pages;
+}
+
+/*********************************************************************//**
+Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
+Those flags are stored in .frm file and end up in the MySQL table object,
+but are frequently used inside InnoDB so we keep their copies into the
+InnoDB table object. */
+UNIV_INTERN
+void
+innobase_copy_frm_flags_from_table_share(
+/*=====================================*/
+	dict_table_t*	innodb_table,		/*!< in/out: InnoDB table */
+	TABLE_SHARE*	table_share)		/*!< in: table share */
+{
+	ibool	ps_on;
+	ibool	ps_off;
+
+	if (dict_table_is_temporary(innodb_table) || srv_read_only_mode) {
+		/* Temp tables do not use persistent stats */
+		ps_on = FALSE;
+		ps_off = TRUE;
+	} else {
+		ps_on = table_share->db_create_options
+			& HA_OPTION_STATS_PERSISTENT;
+		ps_off = table_share->db_create_options
+			& HA_OPTION_NO_STATS_PERSISTENT;
+	}
+
+	dict_stats_set_persistent(innodb_table, ps_on, ps_off);
+
+	dict_stats_auto_recalc_set(
+		innodb_table,
+		table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON,
+		table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF);
+
+	innodb_table->stats_sample_pages = table_share->stats_sample_pages;
+}
+
+/*********************************************************************//**
 Construct ha_innobase handler. */
 UNIV_INTERN
 ha_innobase::ha_innobase(
@@ -2090,14 +2180,15 @@ ha_innobase::ha_innobase(
 	TABLE_SHARE*	table_arg)
 	:handler(hton, table_arg),
 	int_table_flags(HA_REC_NOT_IN_SEQ |
-		  HA_NULL_IN_KEY | HA_CAN_VIRTUAL_COLUMNS |
+		  HA_NULL_IN_KEY |
 		  HA_CAN_INDEX_BLOBS |
 		  HA_CAN_SQL_HANDLER |
 		  HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
 		  HA_PRIMARY_KEY_IN_READ_INDEX |
 		  HA_BINLOG_ROW_CAPABLE |
 		  HA_CAN_GEOMETRY | HA_PARTIAL_COLUMN_READ |
-		  HA_TABLE_SCAN_ON_INDEX | HA_CAN_FULLTEXT),
+		  HA_TABLE_SCAN_ON_INDEX | HA_CAN_FULLTEXT |
+		  HA_CAN_FULLTEXT_EXT | HA_CAN_EXPORT),
 	start_of_scan(0),
 	num_write_row(0)
 {}
@@ -2122,6 +2213,9 @@ ha_innobase::update_thd(
 {
 	trx_t*		trx;
 
+	/* The table should have been opened in ha_innobase::open(). */
+	DBUG_ASSERT(prebuilt->table->n_ref_count > 0);
+
 	trx = check_trx_exists(thd);
 
 	if (prebuilt->trx != trx) {
@@ -2209,7 +2303,9 @@ invalidation to the transaction commit.
 2) To store or retrieve a value from the query cache of an InnoDB table TBL,
 any query must first ask InnoDB's permission. We must pass the thd as a
 parameter because InnoDB will look at the trx id, if any, associated with
-that thd.
+that thd. Also the full_name which is used as key to search for the table
+object. The full_name is a string containing the normalized path to the
+table in the canonical format.
 
 3) Use of the query cache for InnoDB tables is now allowed also when
 AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
@@ -2244,11 +2340,9 @@ innobase_query_caching_of_table_permitted(
 	THD*	thd,		/*!< in: thd of the user who is trying to
 				store a result to the query cache or
 				retrieve it */
-	char*	full_name,	/*!< in: concatenation of database name,
-				the null character NUL, and the table
-				name */
-	uint	full_name_len,	/*!< in: length of the full name, i.e.
-				len(dbname) + len(tablename) + 1 */
+	char*	full_name,	/*!< in: normalized path to the table */
+	uint	full_name_len,	/*!< in: length of the normalized path 
+                                to the table */
 	ulonglong *unused)	/*!< unused for this engine */
 {
 	ibool	is_autocommit;
@@ -2308,16 +2402,7 @@ innobase_query_caching_of_table_permitted(
 	}
 
 	/* Normalize the table name to InnoDB format */
-
-	memcpy(norm_name, full_name, full_name_len);
-
-	norm_name[strlen(norm_name)] = '/'; 	/* InnoDB uses '/' as the
-						separator between db and
-						table */
-	norm_name[full_name_len] = '\0';
-#ifdef __WIN__
-	innobase_casedn_str(norm_name);
-#endif
+	normalize_table_name(norm_name, full_name);
 
 	innobase_register_trx(innodb_hton_ptr, thd, trx);
 
@@ -2355,7 +2440,7 @@ innobase_invalidate_query_cache(
 
 	/* Argument TRUE below means we are using transactions */
 #ifdef HAVE_QUERY_CACHE
-	mysql_query_cache_invalidate4((THD*) trx->mysql_thd,
+	mysql_query_cache_invalidate4(trx->mysql_thd,
 				      full_name,
 				      (uint32) full_name_len,
 				      TRUE);
@@ -2374,7 +2459,7 @@ innobase_convert_identifier(
 	ulint		buflen,	/*!< in: length of buf, in bytes */
 	const char*	id,	/*!< in: identifier to convert */
 	ulint		idlen,	/*!< in: length of id, in bytes */
-	void*		thd,	/*!< in: MySQL connection thread, or NULL */
+	THD*		thd,	/*!< in: MySQL connection thread, or NULL */
 	ibool		file_id)/*!< in: TRUE=id is a table or database name;
 				FALSE=id is an UTF-8 string */
 {
@@ -2397,7 +2482,7 @@ innobase_convert_identifier(
 		nz[idlen] = 0;
 
 		s = nz2;
-		idlen = explain_filename((THD*) thd, nz, nz2, sizeof nz2,
+		idlen = explain_filename(thd, nz, nz2, sizeof nz2,
 					 EXPLAIN_PARTITIONS_AS_COMMENT);
 		goto no_quote;
 	}
@@ -2406,7 +2491,7 @@ innobase_convert_identifier(
 	if (UNIV_UNLIKELY(!thd)) {
 		q = '"';
 	} else {
-		q = get_quote_char_for_identifier((THD*) thd, s, (int) idlen);
+		q = get_quote_char_for_identifier(thd, s, (int) idlen);
 	}
 
 	if (q == EOF) {
@@ -2462,7 +2547,7 @@ innobase_convert_name(
 	ulint		buflen,	/*!< in: length of buf, in bytes */
 	const char*	id,	/*!< in: identifier to convert */
 	ulint		idlen,	/*!< in: length of id, in bytes */
-	void*		thd,	/*!< in: MySQL connection thread, or NULL */
+	THD*		thd,	/*!< in: MySQL connection thread, or NULL */
 	ibool		table_id)/*!< in: TRUE=id is a table or database name;
 				FALSE=id is an index name */
 {
@@ -2504,14 +2589,13 @@ no_db_name:
 	}
 
 	return(s);
-
 }
 
 /*****************************************************************//**
 A wrapper function of innobase_convert_name(), convert a table or
 index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
 @return	pointer to the end of buf */
-static inline
+UNIV_INTERN
 void
 innobase_format_name(
 /*==================*/
@@ -2537,9 +2621,9 @@ UNIV_INTERN
 ibool
 trx_is_interrupted(
 /*===============*/
-	trx_t*	trx)	/*!< in: transaction */
+	const trx_t*	trx)	/*!< in: transaction */
 {
-	return(trx && trx->mysql_thd && thd_kill_level((THD*) trx->mysql_thd));
+  return(trx && trx->mysql_thd && thd_kill_level(trx->mysql_thd));
 }
 
 /**********************************************************************//**
@@ -2551,8 +2635,20 @@ trx_is_strict(
 /*==========*/
 	trx_t*	trx)	/*!< in: transaction */
 {
-	return(trx && trx->mysql_thd
-	       && THDVAR((THD*) trx->mysql_thd, strict_mode));
+	return(trx && trx->mysql_thd && THDVAR(trx->mysql_thd, strict_mode));
+}
+
+/**********************************************************************//**
+Determines if the current MySQL thread is running in strict mode.
+If thd==NULL, THDVAR returns the global value of innodb-strict-mode.
+@return	TRUE if strict */
+UNIV_INLINE
+ibool
+thd_is_strict(
+/*==========*/
+	THD*	thd)	/*!< in: MySQL thread descriptor */
+{
+	return(THDVAR(thd, strict_mode));
 }
 
 /**************************************************************//**
@@ -2568,6 +2664,7 @@ ha_innobase::reset_template(void)
 
 	prebuilt->keep_other_fields_on_keyread = 0;
 	prebuilt->read_just_key = 0;
+	prebuilt->in_fts_query = 0;
 	/* Reset index condition pushdown state. */
 	if (prebuilt->idx_cond) {
 		prebuilt->idx_cond = NULL;
@@ -2663,14 +2760,14 @@ innobase_init(
 	innobase_hton->savepoint_rollback = innobase_rollback_to_savepoint;
 	innobase_hton->savepoint_release = innobase_release_savepoint;
 	innobase_hton->prepare_ordered= NULL;
-        innobase_hton->commit_ordered= innobase_commit_ordered;
+	innobase_hton->commit_ordered= innobase_commit_ordered;
 	innobase_hton->commit = innobase_commit;
 	innobase_hton->rollback = innobase_rollback;
 	innobase_hton->prepare = innobase_xa_prepare;
 	innobase_hton->recover = innobase_xa_recover;
 	innobase_hton->commit_by_xid = innobase_commit_by_xid;
 	innobase_hton->rollback_by_xid = innobase_rollback_by_xid;
-        innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
+	innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
 	innobase_hton->create_cursor_read_view = innobase_create_cursor_view;
 	innobase_hton->set_cursor_read_view = innobase_set_cursor_view;
 	innobase_hton->close_cursor_read_view = innobase_close_cursor_view;
@@ -2687,9 +2784,8 @@ innobase_init(
 
 	innobase_hton->release_temporary_latches =
 		innobase_release_temporary_latches;
-
-	innobase_hton->alter_table_flags = innobase_alter_table_flags;
-        innobase_hton->kill_query = innobase_kill_query;
+	innobase_hton->kill_query = innobase_kill_query;
+	innobase_hton->data = &innodb_api_cb;
 
 	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
 
@@ -2756,12 +2852,12 @@ innobase_init(
 	srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
 			 default_path);
 
-	/* Set default InnoDB data file size to 10 MB and let it be
+	/* Set default InnoDB data file size to 12 MB and let it be
 	auto-extending. Thus users can use InnoDB in >= 4.0 without having
 	to specify any startup options. */
 
 	if (!innobase_data_file_path) {
-		innobase_data_file_path = (char*) "ibdata1:10M:autoextend";
+		innobase_data_file_path = (char*) "ibdata1:12M:autoextend";
 	}
 
 	/* Since InnoDB edits the argument in the next call, we make another
@@ -2785,8 +2881,8 @@ mem_free_and_error:
 
 	/* The default dir for log files is the datadir of MySQL */
 
-	if (!innobase_log_group_home_dir) {
-		innobase_log_group_home_dir = default_path;
+	if (!srv_log_group_home_dir) {
+		srv_log_group_home_dir = default_path;
 	}
 
 #ifdef UNIV_LOG_ARCHIVE
@@ -2799,12 +2895,12 @@ mem_free_and_error:
 	srv_arch_dir = innobase_log_arch_dir;
 #endif /* UNIG_LOG_ARCHIVE */
 
-	ret = (bool)
-		srv_parse_log_group_home_dirs(innobase_log_group_home_dir);
+	srv_normalize_path_for_win(srv_log_group_home_dir);
 
-	if (ret == FALSE || innobase_mirrored_log_groups != 1) {
-	  sql_print_error("syntax error in innodb_log_group_home_dir, or a "
-			  "wrong number of mirrored log groups");
+	if (strchr(srv_log_group_home_dir, ';')
+	    || innobase_mirrored_log_groups != 1) {
+		sql_print_error("syntax error in innodb_log_group_home_dir, "
+				"or a wrong number of mirrored log groups");
 
 		goto mem_free_and_error;
 	}
@@ -2896,12 +2992,52 @@ innobase_change_buffering_inited_ok:
 	innobase_change_buffering = (char*)
 		innobase_change_buffering_values[ibuf_use];
 
+	/* Check that interdependent parameters have sane values. */
+	if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) {
+		sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm"
+				  " cannot be set higher than"
+				  " innodb_max_dirty_pages_pct.\n"
+				  "InnoDB: Setting"
+				  " innodb_max_dirty_pages_pct_lwm to %lu\n",
+				  srv_max_buf_pool_modified_pct);
+
+		srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct;
+	}
+
+	if (srv_max_io_capacity == SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT) {
+
+		if (srv_io_capacity >= SRV_MAX_IO_CAPACITY_LIMIT / 2) {
+			/* Avoid overflow. */
+			srv_max_io_capacity = SRV_MAX_IO_CAPACITY_LIMIT;
+		} else {
+			/* The user has not set the value. We should
+			set it based on innodb_io_capacity. */
+			srv_max_io_capacity =
+				ut_max(2 * srv_io_capacity, 2000);
+		}
+
+	} else if (srv_max_io_capacity < srv_io_capacity) {
+		sql_print_warning("InnoDB: innodb_io_capacity"
+				  " cannot be set higher than"
+				  " innodb_io_capacity_max.\n"
+				  "InnoDB: Setting"
+				  " innodb_io_capacity to %lu\n",
+				  srv_max_io_capacity);
+
+		srv_io_capacity = srv_max_io_capacity;
+	}
+
+	if (!is_filename_allowed(srv_buf_dump_filename,
+				 strlen(srv_buf_dump_filename), FALSE)) {
+		sql_print_error("InnoDB: innodb_buffer_pool_filename"
+			" cannot have colon (:) in the file name.");
+		goto mem_free_and_error;
+	}
+
 	/* --------------------------------------------------*/
 
 	srv_file_flush_method_str = innobase_file_flush_method;
 
-	srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
-	srv_n_log_files = (ulint) innobase_log_files_in_group;
 	srv_log_file_size = (ib_uint64_t) innobase_log_file_size;
 
 #ifdef UNIV_LOG_ARCHIVE
@@ -2927,6 +3063,18 @@ innobase_change_buffering_inited_ok:
 
 	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
 
+	if (innobase_buffer_pool_instances == 0) {
+		innobase_buffer_pool_instances = 8;
+
+#if defined(__WIN__) && !defined(_WIN64)
+		if (innobase_buffer_pool_size > 1331 * 1024 * 1024) {
+			innobase_buffer_pool_instances
+				= ut_min(MAX_BUFFER_POOLS,
+					(long) (innobase_buffer_pool_size
+					/ (128 * 1024 * 1024)));
+		}
+#endif /* defined(__WIN__) && !defined(_WIN64) */
+	}
 	srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
 	srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances;
 
@@ -2959,9 +3107,10 @@ innobase_change_buffering_inited_ok:
 	srv_n_read_io_threads = (ulint) innobase_read_io_threads;
 	srv_n_write_io_threads = (ulint) innobase_write_io_threads;
 
-	srv_force_recovery = (ulint) innobase_force_recovery;
-
 	srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
+
+	page_compression_level = (ulint) innobase_compression_level;
+
 	if (!innobase_use_checksums) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
@@ -2992,6 +3141,12 @@ innobase_change_buffering_inited_ok:
 			"level instead, see " REFMAN "set-transaction.html.\n");
 	}
 
+	if (innobase_open_files < 10) {
+		innobase_open_files = 300;
+		if (srv_file_per_table && table_cache_size > 300) {
+			innobase_open_files = table_cache_size;
+		}
+	}
 	srv_max_n_open_files = (ulint) innobase_open_files;
 	srv_innodb_status = (ibool) innobase_create_status_file;
 
@@ -3059,7 +3214,7 @@ innobase_change_buffering_inited_ok:
 
 	/* Since we in this module access directly the fields of a trx
 	struct, and due to different headers and flags it might happen that
-	mutex_t has a different size in this module and in InnoDB
+	ib_mutex_t has a different size in this module and in InnoDB
 	modules, we check at run time that the size is the same in
 	these compilation modules. */
 
@@ -3174,28 +3329,13 @@ innobase_flush_logs(
 	DBUG_ENTER("innobase_flush_logs");
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
-	log_buffer_flush_to_disk();
+	if (!srv_read_only_mode) {
+		log_buffer_flush_to_disk();
+	}
 
 	DBUG_RETURN(result);
 }
 
-/****************************************************************//**
-Return alter table flags supported in an InnoDB database. */
-static
-uint
-innobase_alter_table_flags(
-/*=======================*/
-	uint	flags)
-{
-	return(HA_INPLACE_ADD_INDEX_NO_READ_WRITE
-		| HA_INPLACE_ADD_INDEX_NO_WRITE
-		| HA_INPLACE_DROP_INDEX_NO_READ_WRITE
-		| HA_INPLACE_ADD_UNIQUE_INDEX_NO_READ_WRITE
-		| HA_INPLACE_ADD_UNIQUE_INDEX_NO_WRITE
-		| HA_INPLACE_DROP_UNIQUE_INDEX_NO_READ_WRITE
-		| HA_INPLACE_ADD_PK_INDEX_NO_READ_WRITE);
-}
-
 /*****************************************************************//**
 Commits a transaction in an InnoDB database. */
 static
@@ -3410,9 +3550,6 @@ innobase_commit(
 			innobase_commit_ordered_2(trx, thd);
 		}
 
-		/* We were instructed to commit the whole transaction, or
-		this is an SQL statement end and autocommit is on */
-
 		/* We did the first part already in innobase_commit_ordered(),
 		Now finish by doing a write + flush of logs. */
 		trx_commit_complete_for_mysql(trx);
@@ -3462,7 +3599,7 @@ innobase_rollback(
 					transaction FALSE - rollback the current
 					statement only */
 {
-	int	error = 0;
+	dberr_t	error;
 	trx_t*	trx;
 
 	DBUG_ENTER("innobase_rollback");
@@ -3511,7 +3648,7 @@ innobase_rollback_trx(
 /*==================*/
 	trx_t*	trx)	/*!< in: transaction */
 {
-	int	error = 0;
+	dberr_t	error = DB_SUCCESS;
 
 	DBUG_ENTER("innobase_rollback_trx");
 	DBUG_PRINT("trans", ("aborting transaction"));
@@ -3610,6 +3747,7 @@ innobase_checkpoint_request(
 Log code calls this whenever log has been written and/or flushed up
 to a new position. We use this to notify upper layer of a new commit
 checkpoint when necessary.*/
+extern "C" UNIV_INTERN
 void
 innobase_mysql_log_notify(
 /*===============*/
@@ -3692,7 +3830,7 @@ innobase_rollback_to_savepoint(
 	void*		savepoint)	/*!< in: savepoint data */
 {
 	ib_int64_t	mysql_binlog_cache_pos;
-	int		error = 0;
+	dberr_t		error;
 	trx_t*		trx;
 	char		name[64];
 
@@ -3713,7 +3851,7 @@ innobase_rollback_to_savepoint(
 
 	longlong2str((ulint) savepoint, name, 36);
 
-	error = (int) trx_rollback_to_savepoint_for_mysql(
+	error = trx_rollback_to_savepoint_for_mysql(
 		trx, name, &mysql_binlog_cache_pos);
 
 	if (error == DB_SUCCESS && trx->fts_trx != NULL) {
@@ -3737,7 +3875,7 @@ innobase_release_savepoint(
 					savepoint should be released */
 	void*		savepoint)	/*!< in: savepoint data */
 {
-	int		error = 0;
+	dberr_t		error;
 	trx_t*		trx;
 	char		name[64];
 
@@ -3750,7 +3888,7 @@ innobase_release_savepoint(
 
 	longlong2str((ulint) savepoint, name, 36);
 
-	error = (int) trx_release_savepoint_for_mysql(trx, name);
+	error = trx_release_savepoint_for_mysql(trx, name);
 
 	if (error == DB_SUCCESS && trx->fts_trx != NULL) {
 		fts_savepoint_release(trx, name);
@@ -3770,7 +3908,7 @@ innobase_savepoint(
 	THD*	thd,		/*!< in: handle to the MySQL thread */
 	void*	savepoint)	/*!< in: savepoint data */
 {
-	int	error = 0;
+	dberr_t	error;
 	trx_t*	trx;
 
 	DBUG_ENTER("innobase_savepoint");
@@ -3797,7 +3935,7 @@ innobase_savepoint(
 	char name[64];
 	longlong2str((ulint) savepoint,name,36);
 
-	error = (int) trx_savepoint_for_mysql(trx, name, (ib_int64_t)0);
+	error = trx_savepoint_for_mysql(trx, name, (ib_int64_t)0);
 
 	if (error == DB_SUCCESS && trx->fts_trx != NULL) {
 		fts_savepoint_take(trx, name);
@@ -3831,7 +3969,7 @@ innobase_close_connection(
 				"but transaction is active");
 	}
 
-	if (trx_is_started(trx) && global_system_variables.log_warnings) {
+	if (trx_is_started(trx) && log_warnings) {
 
 		sql_print_warning(
 			"MySQL is closing a connection that has an active "
@@ -3848,6 +3986,27 @@ innobase_close_connection(
 }
 
 /*****************************************************************//**
+Frees a possible InnoDB trx object associated with the current THD.
+@return	0 or error number */
+UNIV_INTERN
+int
+innobase_close_thd(
+/*===============*/
+	THD*		thd)	/*!< in: handle to the MySQL thread of the user
+				whose resources should be free'd */
+{
+	trx_t*	trx = thd_to_trx(thd);
+
+	if (!trx) {
+		return(0);
+	}
+
+	return(innobase_close_connection(innodb_hton_ptr, thd));
+}
+
+UNIV_INTERN void lock_cancel_waiting_and_release(lock_t* lock);
+
+/*****************************************************************//**
 Cancel any pending lock request associated with the current THD. */
 static
 void
@@ -3862,10 +4021,17 @@ innobase_kill_query(
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
 	trx = thd_to_trx(thd);
-	/* Cancel a pending lock request. */
-	if (trx) {
-		lock_trx_handle_wait(trx);
-	}
+
+        if (trx)
+        {
+          /* Cancel a pending lock request. */
+          lock_mutex_enter();
+          trx_mutex_enter(trx);
+          if (trx->lock.wait_lock)
+            lock_cancel_waiting_and_release(trx->lock.wait_lock);
+          trx_mutex_exit(trx);
+          lock_mutex_exit();
+        }
 
 	DBUG_VOID_RETURN;
 }
@@ -3981,9 +4147,9 @@ ha_innobase::index_flags(
 	uint,
 	bool) const
 {
-	ulong extra_flag= 0;
-	if (key == table_share->primary_key)
-	  extra_flag= HA_CLUSTERED_INDEX;
+       ulong extra_flag= 0;
+       if (table && key == table->s->primary_key)
+             extra_flag= HA_CLUSTERED_INDEX;
 	return((table_share->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT)
 		 ? 0
 		 : (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
@@ -4065,19 +4231,10 @@ ha_innobase::primary_key_is_clustered()
 	return(true);
 }
 
-/** Always normalize table name to lower case on Windows */
-#ifdef __WIN__
-#define normalize_table_name(norm_name, name)		\
-	normalize_table_name_low(norm_name, name, TRUE)
-#else
-#define normalize_table_name(norm_name, name)           \
-	normalize_table_name_low(norm_name, name, FALSE)
-#endif /* __WIN__ */
-
 /*****************************************************************//**
 Normalizes a table name string. A normalized name consists of the
-database name catenated to '/' and table name. An example:
-test/mytable. On Windows normalization puts both the database name and the
+database name catenated to '/' and table name. Example: test/mytable.
+On Windows normalization puts both the database name and the
 table name always to lower case if "set_lower_case" is set to TRUE. */
 static
 void
@@ -4090,9 +4247,11 @@ normalize_table_name_low(
 					to lower case */
 {
 	char*	name_ptr;
+	ulint	name_len;
 	char*	db_ptr;
 	ulint	db_len;
 	char*	ptr;
+	ulint	norm_len;
 
 	/* Scan name from the end */
 
@@ -4104,6 +4263,7 @@ normalize_table_name_low(
 	}
 
 	name_ptr = ptr + 1;
+	name_len = strlen(name_ptr);
 
 	/* skip any number of path separators */
 	while (ptr >= name && (*ptr == '\\' || *ptr == '/')) {
@@ -4122,11 +4282,15 @@ normalize_table_name_low(
 
 	db_ptr = ptr + 1;
 
+	norm_len = db_len + name_len + sizeof "/";
+	ut_a(norm_len < FN_REFLEN - 1);
+
 	memcpy(norm_name, db_ptr, db_len);
 
 	norm_name[db_len] = '/';
 
-	memcpy(norm_name + db_len + 1, name_ptr, strlen(name_ptr) + 1);
+	/* Copy the name and null-byte. */
+	memcpy(norm_name + db_len + 1, name_ptr, name_len + 1);
 
 	if (set_lower_case) {
 		innobase_casedn_str(norm_name);
@@ -4141,7 +4305,7 @@ void
 test_normalize_table_name_low()
 /*===========================*/
 {
-	char		norm_name[128];
+	char		norm_name[FN_REFLEN];
 	const char*	test_data[][2] = {
 		/* input, expected result */
 		{"./mysqltest/t1", "mysqltest/t1"},
@@ -4197,12 +4361,84 @@ test_normalize_table_name_low()
 		}
 	}
 }
+
+/*********************************************************************
+Test ut_format_name(). */
+static
+void
+test_ut_format_name()
+/*=================*/
+{
+	char		buf[NAME_LEN * 3];
+
+	struct {
+		const char*	name;
+		ibool		is_table;
+		ulint		buf_size;
+		const char*	expected;
+	} test_data[] = {
+		{"test/t1",	TRUE,	sizeof(buf),	"\"test\".\"t1\""},
+		{"test/t1",	TRUE,	12,		"\"test\".\"t1\""},
+		{"test/t1",	TRUE,	11,		"\"test\".\"t1"},
+		{"test/t1",	TRUE,	10,		"\"test\".\"t"},
+		{"test/t1",	TRUE,	9,		"\"test\".\""},
+		{"test/t1",	TRUE,	8,		"\"test\"."},
+		{"test/t1",	TRUE,	7,		"\"test\""},
+		{"test/t1",	TRUE,	6,		"\"test"},
+		{"test/t1",	TRUE,	5,		"\"tes"},
+		{"test/t1",	TRUE,	4,		"\"te"},
+		{"test/t1",	TRUE,	3,		"\"t"},
+		{"test/t1",	TRUE,	2,		"\""},
+		{"test/t1",	TRUE,	1,		""},
+		{"test/t1",	TRUE,	0,		"BUF_NOT_CHANGED"},
+		{"table",	TRUE,	sizeof(buf),	"\"table\""},
+		{"ta'le",	TRUE,	sizeof(buf),	"\"ta'le\""},
+		{"ta\"le",	TRUE,	sizeof(buf),	"\"ta\"\"le\""},
+		{"ta`le",	TRUE,	sizeof(buf),	"\"ta`le\""},
+		{"index",	FALSE,	sizeof(buf),	"\"index\""},
+		{"ind/ex",	FALSE,	sizeof(buf),	"\"ind/ex\""},
+	};
+
+	for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
+
+		memcpy(buf, "BUF_NOT_CHANGED", strlen("BUF_NOT_CHANGED") + 1);
+
+		char*	ret;
+
+		ret = ut_format_name(test_data[i].name,
+				     test_data[i].is_table,
+				     buf,
+				     test_data[i].buf_size);
+
+		ut_a(ret == buf);
+
+		if (strcmp(buf, test_data[i].expected) == 0) {
+			fprintf(stderr,
+				"ut_format_name(%s, %s, buf, %lu), "
+				"expected %s, OK\n",
+				test_data[i].name,
+				test_data[i].is_table ? "TRUE" : "FALSE",
+				test_data[i].buf_size,
+				test_data[i].expected);
+		} else {
+			fprintf(stderr,
+				"ut_format_name(%s, %s, buf, %lu), "
+				"expected %s, ERROR: got %s\n",
+				test_data[i].name,
+				test_data[i].is_table ? "TRUE" : "FALSE",
+				test_data[i].buf_size,
+				test_data[i].expected,
+				buf);
+			ut_error;
+		}
+	}
+}
 #endif /* !DBUG_OFF */
 
 /********************************************************************//**
 Get the upper limit of the MySQL integral and floating-point type.
 @return maximum allowed value for the field */
-static
+UNIV_INTERN
 ulonglong
 innobase_get_int_col_max_value(
 /*===========================*/
@@ -4282,12 +4518,13 @@ innobase_match_index_columns(
 	DBUG_ENTER("innobase_match_index_columns");
 
 	/* Check whether user defined index column count matches */
-	if (key_info->key_parts != index_info->n_user_defined_cols) {
+	if (key_info->user_defined_key_parts !=
+		index_info->n_user_defined_cols) {
 		DBUG_RETURN(FALSE);
 	}
 
 	key_part = key_info->key_part;
-	key_end = key_part + key_info->key_parts;
+	key_end = key_part + key_info->user_defined_key_parts;
 	innodb_idx_fld = index_info->fields;
 	innodb_idx_fld_end = index_info->fields + index_info->n_fields;
 
@@ -4546,6 +4783,7 @@ ha_innobase::innobase_initialize_autoinc()
 
 			auto_inc = innobase_next_autoinc(
 				read_auto_inc, 1, 1, 0, col_max_value);
+
 			break;
 		}
 		case DB_RECORD_NOT_FOUND:
@@ -4595,12 +4833,12 @@ ha_innobase::open(
 	uint		test_if_locked)	/*!< in: not used */
 {
 	dict_table_t*	ib_table;
-	char		norm_name[1000];
+	char		norm_name[FN_REFLEN];
 	THD*		thd;
 	ulint		retries = 0;
 	char*		is_part = NULL;
 	ibool		par_case_name_set = FALSE;
-	char		par_case_name[MAX_FULL_NAME_LEN + 1];
+	char		par_case_name[FN_REFLEN];
 
 	DBUG_ENTER("ha_innobase::open");
 
@@ -4642,7 +4880,31 @@ ha_innobase::open(
 
 retry:
 	/* Get pointer to a table object in InnoDB dictionary cache */
-	ib_table = dict_table_open_on_name(norm_name, FALSE);
+	ib_table = dict_table_open_on_name(norm_name, FALSE, TRUE,
+					   DICT_ERR_IGNORE_NONE);
+
+	if (ib_table
+	    && ((!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
+		 && table->s->fields != dict_table_get_n_user_cols(ib_table))
+		|| (DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
+		    && (table->s->fields
+			!= dict_table_get_n_user_cols(ib_table) - 1)))) {
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"table %s contains %lu user defined columns "
+			"in InnoDB, but %lu columns in MySQL. Please "
+			"check INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and "
+			REFMAN "innodb-troubleshooting.html "
+			"for how to resolve it",
+			norm_name, (ulong) dict_table_get_n_user_cols(ib_table),
+			(ulong) table->s->fields);
+
+		/* Mark this table as corrupted, so the drop table
+		or force recovery can still use it, but not others. */
+		ib_table->corrupted = true;
+		dict_table_close(ib_table, FALSE, FALSE);
+		ib_table = NULL;
+		is_part = NULL;
+	}
 
 	if (NULL == ib_table) {
 		if (is_part && retries < 10) {
@@ -4656,13 +4918,13 @@ retry:
 
 			1) If boot against an installation from Windows
 			platform, then its partition table name could
-			be all be in lower case in system tables. So we
-			will need to check lower case name when load table.
+			be in lower case in system tables. So we will
+			need to check lower case name when load table.
 
-			2) If  we boot an installation from other case
+			2) If we boot an installation from other case
 			sensitive platform in Windows, we might need to
-			check the existence of table name without lowering
-			case them in the system table. */
+			check the existence of table name without lower
+			case in the system table. */
 			if (innobase_get_lower_case_table_names() == 1) {
 
 				if (!par_case_name_set) {
@@ -4670,9 +4932,7 @@ retry:
 					/* Check for the table using lower
 					case name, including the partition
 					separator "P" */
-					memcpy(par_case_name, norm_name,
-					       strlen(norm_name));
-					par_case_name[strlen(norm_name)] = 0;
+					strcpy(par_case_name, norm_name);
 					innobase_casedn_str(par_case_name);
 #else
 					/* On Windows platfrom, check
@@ -4686,7 +4946,8 @@ retry:
 				}
 
 				ib_table = dict_table_open_on_name(
-					par_case_name, FALSE);
+					par_case_name, FALSE, TRUE,
+					DICT_ERR_IGNORE_NONE);
 			}
 
 			if (!ib_table) {
@@ -4724,21 +4985,13 @@ retry:
 					retries);
 		}
 
-		sql_print_error("Cannot find or open table %s from\n"
-				"the internal data dictionary of InnoDB "
-				"though the .frm file for the\n"
-				"table exists. Maybe you have deleted and "
-				"recreated InnoDB data\n"
-				"files but have forgotten to delete the "
-				"corresponding .frm files\n"
-				"of InnoDB tables, or you have moved .frm "
-				"files to another database?\n"
-				"or, the table contains indexes that this "
-				"version of the engine\n"
-				"doesn't support.\n"
-				"See " REFMAN "innodb-troubleshooting.html\n"
-				"how you can resolve the problem.\n",
-				norm_name);
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Cannot open table %s from the internal data "
+			"dictionary of InnoDB though the .frm file "
+			"for the table exists. See "
+			REFMAN "innodb-troubleshooting.html for how "
+			"you can resolve the problem.", norm_name);
+
 		free_share(share);
 		my_errno = ENOENT;
 
@@ -4747,21 +5000,47 @@ retry:
 
 table_opened:
 
+	innobase_copy_frm_flags_from_table_share(ib_table, table->s);
+
+	dict_stats_init(ib_table);
+
 	MONITOR_INC(MONITOR_TABLE_OPEN);
 
-	if (ib_table->ibd_file_missing && !thd_tablespace_op(thd)) {
-		sql_print_error("MySQL is trying to open a table handle but "
-				"the .ibd file for\ntable %s does not exist.\n"
-				"Have you deleted the .ibd file from the "
-				"database directory under\nthe MySQL datadir, "
-				"or have you used DISCARD TABLESPACE?\n"
-				"See " REFMAN "innodb-troubleshooting.html\n"
-				"how you can resolve the problem.\n",
-				norm_name);
+	bool	no_tablespace;
+
+	if (dict_table_is_discarded(ib_table)) {
+
+		ib_senderrf(thd,
+			IB_LOG_LEVEL_WARN, ER_TABLESPACE_DISCARDED,
+			table->s->table_name.str);
+
+		/* Allow an open because a proper DISCARD should have set
+		all the flags and index root page numbers to FIL_NULL that
+		should prevent any DML from running but it should allow DDL
+		operations. */
+
+		no_tablespace = false;
+
+	} else if (ib_table->ibd_file_missing) {
+
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN,
+			ER_TABLESPACE_MISSING, norm_name);
+
+		/* This means we have no idea what happened to the tablespace
+		file, best to play it safe. */
+
+		no_tablespace = true;
+	} else {
+		no_tablespace = false;
+	}
+
+	if (!thd_tablespace_op(thd) && no_tablespace) {
 		free_share(share);
 		my_errno = ENOENT;
 
-		dict_table_close(ib_table, FALSE);
+		dict_table_close(ib_table, FALSE, FALSE);
+
 		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
 	}
 
@@ -4909,7 +5188,9 @@ table_opened:
 	}
 
 	/* Only if the table has an AUTOINC column. */
-	if (prebuilt->table != NULL && table->found_next_number_field != NULL) {
+	if (prebuilt->table != NULL
+	    && !prebuilt->table->ibd_file_missing
+	    && table->found_next_number_field != NULL) {
 		dict_table_autoinc_lock(prebuilt->table);
 
 		/* Since a table can already be "open" in InnoDB's internal
@@ -4930,6 +5211,31 @@ table_opened:
 }
 
 UNIV_INTERN
+handler*
+ha_innobase::clone(
+/*===============*/
+	const char*	name,		/*!< in: table name */
+	MEM_ROOT*	mem_root)	/*!< in: memory context */
+{
+	ha_innobase* new_handler;
+
+	DBUG_ENTER("ha_innobase::clone");
+
+	new_handler = static_cast<ha_innobase*>(handler::clone(name,
+							       mem_root));
+	if (new_handler) {
+		DBUG_ASSERT(new_handler->prebuilt != NULL);
+		DBUG_ASSERT(new_handler->user_thd == user_thd);
+		DBUG_ASSERT(new_handler->prebuilt->trx == prebuilt->trx);
+
+		new_handler->prebuilt->select_lock_type
+			= prebuilt->select_lock_type;
+	}
+
+	DBUG_RETURN(new_handler);
+}
+
+UNIV_INTERN
 uint
 ha_innobase::max_supported_key_part_length() const
 /*==============================================*/
@@ -4994,36 +5300,6 @@ get_field_offset(
 	return((uint) (field->ptr - table->record[0]));
 }
 
-/**************************************************************//**
-Checks if a field in a record is SQL NULL. Uses the record format
-information in table to track the null bit in record.
-@return	1 if NULL, 0 otherwise */
-static inline
-uint
-field_in_record_is_null(
-/*====================*/
-	TABLE*	table,	/*!< in: MySQL table object */
-	Field*	field,	/*!< in: MySQL field object */
-	char*	record)	/*!< in: a row in MySQL format */
-{
-	int	null_offset;
-
-	if (!field->null_ptr) {
-
-		return(0);
-	}
-
-	null_offset = (uint) ((char*) field->null_ptr
-		    - (char*) table->record[0]);
-
-	if (record[null_offset] & field->null_bit) {
-
-		return(1);
-	}
-
-	return(0);
-}
-
 /*************************************************************//**
 InnoDB uses this function to compare two data fields for which the data type
 is such that we must use MySQL code to compare them. NOTE that the prototype
@@ -5483,6 +5759,7 @@ get_innobase_type_from_mysql_type(
 	case HA_KEYTYPE_END:
 		ut_error;
 	}
+
 	return(0);
 }
 
@@ -5512,7 +5789,7 @@ innobase_read_from_2_little_endian(
 /*===============================*/
 	const uchar*	buf)	/*!< in: from where to read */
 {
-	return (uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1])));
+	return((uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1]))));
 }
 
 /*******************************************************************//**
@@ -5530,7 +5807,8 @@ ha_innobase::store_key_val_for_row(
 {
 	KEY*		key_info	= table->key_info + keynr;
 	KEY_PART_INFO*	key_part	= key_info->key_part;
-	KEY_PART_INFO*	end		= key_part + key_info->key_parts;
+	KEY_PART_INFO*	end		=
+		key_part + key_info->user_defined_key_parts;
 	char*		buff_start	= buff;
 	enum_field_types mysql_type;
 	Field*		field;
@@ -5906,10 +6184,9 @@ build_template_field(
 		templ->rec_field_no = dict_index_get_nth_col_pos(index, i);
 	}
 
-	if (field->null_ptr) {
+	if (field->real_maybe_null()) {
 		templ->mysql_null_byte_offset =
-			(ulint) ((char*) field->null_ptr
-				 - (char*) table->record[0]);
+			field->null_offset();
 
 		templ->mysql_null_bit_mask = (ulint) field->null_bit;
 	} else {
@@ -6011,6 +6288,10 @@ ha_innobase::build_template(
 
 	prebuilt->need_to_access_clustered = (index == clust_index);
 
+	/* Either prebuilt->index should be a secondary index, or it
+	should be the clustered index. */
+	ut_ad(dict_index_is_clust(index) == (index == clust_index));
+
 	/* Below we check column by column if we need to access
 	the clustered index. */
 
@@ -6227,11 +6508,13 @@ min value of the autoinc interval. Once that is fixed we can get rid of
 the special lock handling.
 @return	DB_SUCCESS if all OK else error code */
 UNIV_INTERN
-ulint
+dberr_t
 ha_innobase::innobase_lock_autoinc(void)
 /*====================================*/
 {
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
+
+	ut_ad(!srv_read_only_mode);
 
 	switch (innobase_autoinc_lock_mode) {
 	case AUTOINC_NO_LOCKING:
@@ -6276,19 +6559,19 @@ ha_innobase::innobase_lock_autoinc(void)
 		ut_error;
 	}
 
-	return(ulong(error));
+	return(error);
 }
 
 /********************************************************************//**
 Reset the autoinc value in the table.
 @return	DB_SUCCESS if all went well else error code */
 UNIV_INTERN
-ulint
+dberr_t
 ha_innobase::innobase_reset_autoinc(
 /*================================*/
 	ulonglong	autoinc)	/*!< in: value to store */
 {
-	ulint		error;
+	dberr_t		error;
 
 	error = innobase_lock_autoinc();
 
@@ -6299,7 +6582,7 @@ ha_innobase::innobase_reset_autoinc(
 		dict_table_autoinc_unlock(prebuilt->table);
 	}
 
-	return(ulong(error));
+	return(error);
 }
 
 /********************************************************************//**
@@ -6307,12 +6590,12 @@ Store the autoinc value in the table. The autoinc value is only set if
 it's greater than the existing autoinc value in the table.
 @return	DB_SUCCESS if all went well else error code */
 UNIV_INTERN
-ulint
+dberr_t
 ha_innobase::innobase_set_max_autoinc(
 /*==================================*/
 	ulonglong	auto_inc)	/*!< in: value to store */
 {
-	ulint		error;
+	dberr_t		error;
 
 	error = innobase_lock_autoinc();
 
@@ -6323,7 +6606,7 @@ ha_innobase::innobase_set_max_autoinc(
 		dict_table_autoinc_unlock(prebuilt->table);
 	}
 
-	return(ulong(error));
+	return(error);
 }
 
 /********************************************************************//**
@@ -6336,7 +6619,7 @@ ha_innobase::write_row(
 /*===================*/
 	uchar*	record)	/*!< in: a row in MySQL format */
 {
-	ulint		error = 0;
+	dberr_t		error;
 	int		error_result= 0;
 	ibool		auto_inc_used= FALSE;
 	ulint		sql_command;
@@ -6344,7 +6627,10 @@ ha_innobase::write_row(
 
 	DBUG_ENTER("ha_innobase::write_row");
 
-	if (prebuilt->trx != trx) {
+	if (srv_read_only_mode) {
+		ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+		DBUG_RETURN(HA_ERR_TABLE_READONLY);
+	} else if (prebuilt->trx != trx) {
 		sql_print_error("The transaction object for the table handle "
 				"is at %p, but for the current thread it is at "
 				"%p",
@@ -6362,6 +6648,8 @@ ha_innobase::write_row(
 		++trx->will_lock;
 	}
 
+	ha_statistic_increment(&SSV::ha_write_count);
+
 	sql_command = thd_sql_command(user_thd);
 
 	if ((sql_command == SQLCOM_ALTER_TABLE
@@ -6441,7 +6729,7 @@ no_commit:
 		innobase_get_auto_increment(). */
 		prebuilt->autoinc_error = DB_SUCCESS;
 
-		if ((error = update_auto_increment())) {
+		if ((error_result = update_auto_increment())) {
 			/* We don't want to mask autoinc overflow errors. */
 
 			/* Handle the case where the AUTOINC sub-system
@@ -6452,15 +6740,11 @@ no_commit:
 				my_error(ER_AUTOINC_READ_FAILED, MYF(0));
 				goto func_exit;
 			} else if (prebuilt->autoinc_error != DB_SUCCESS) {
-				error = (int) prebuilt->autoinc_error;
+				error = prebuilt->autoinc_error;
 				goto report_error;
 			}
 
-			/* MySQL errors are passed straight back. except for
-                           HA_ERR_AUTO_INC_READ_FAILED. This can only happen
-                           for values out of range.
-                         */
-			error_result = (int) error;
+			/* MySQL errors are passed straight back. */
 			goto func_exit;
 		}
 
@@ -6479,10 +6763,10 @@ no_commit:
 	innobase_srv_conc_enter_innodb(prebuilt->trx);
 
 	error = row_insert_for_mysql((byte*) record, prebuilt);
+	DEBUG_SYNC(user_thd, "ib_after_row_insert");
 
 	/* Handle duplicate key errors */
 	if (auto_inc_used) {
-		ulint		err;
 		ulonglong	auto_inc;
 		ulonglong	col_max_value;
 
@@ -6544,6 +6828,7 @@ set_max_autoinc:
 
 					ulonglong	offset;
 					ulonglong	increment;
+					dberr_t		err;
 
 					offset = prebuilt->autoinc_offset;
 					increment = prebuilt->autoinc_increment;
@@ -6562,13 +6847,22 @@ set_max_autoinc:
 				}
 			}
 			break;
+		default:
+			break;
 		}
 	}
 
 	innobase_srv_conc_exit_innodb(prebuilt->trx);
 
 report_error:
-	error_result = convert_error_code_to_mysql((int) error,
+	if (error == DB_TABLESPACE_DELETED) {
+		ib_senderrf(
+			trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_TABLESPACE_DISCARDED,
+			table->s->table_name.str);
+	}
+
+	error_result = convert_error_code_to_mysql(error,
 						   prebuilt->table->flags,
 						   user_thd);
 
@@ -6585,9 +6879,9 @@ func_exit:
 /**********************************************************************//**
 Checks which fields have changed in a row and stores information
 of them to an update vector.
-@return	error number or 0 */
+@return	DB_SUCCESS or error code */
 static
-int
+dberr_t
 calc_row_difference(
 /*================*/
 	upd_t*		uvect,		/*!< in/out: update vector */
@@ -6617,12 +6911,13 @@ calc_row_difference(
 	dfield_t	dfield;
 	dict_index_t*	clust_index;
 	uint		i;
-	ulint		error = DB_SUCCESS;
 	ibool		changes_fts_column = FALSE;
 	ibool		changes_fts_doc_col = FALSE;
 	trx_t*          trx = thd_to_trx(thd);
 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
 
+	ut_ad(!srv_read_only_mode);
+
 	n_fields = table->s->fields;
 	clust_index = dict_table_get_first_index(prebuilt->table);
 
@@ -6694,14 +6989,12 @@ calc_row_difference(
 		}
 
 
-		if (field->null_ptr) {
-			if (field_in_record_is_null(table, field,
-							(char*) old_row)) {
+		if (field->real_maybe_null()) {
+			if (field->is_null_in_record(old_row)) {
 				o_len = UNIV_SQL_NULL;
 			}
 
-			if (field_in_record_is_null(table, field,
-							(char*) new_row)) {
+			if (field->is_null_in_record(new_row)) {
 				n_len = UNIV_SQL_NULL;
 			}
 		}
@@ -6838,13 +7131,7 @@ calc_row_difference(
 		fts_update_doc_id(
 			innodb_table, ufield, &trx->fts_next_doc_id);
 
-		if (error == DB_SUCCESS) {
-			++n_changed;
-		} else {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: Error (%lu) while updating "
-				"doc id in calc_row_difference().\n", error);
-		}
+		++n_changed;
 	} else {
 		/* We have a Doc ID column, but none of FTS indexed
 		columns are touched, nor the Doc ID column, so set
@@ -6858,7 +7145,7 @@ calc_row_difference(
 
 	ut_a(buf <= (byte*) original_upd_buff + buff_len);
 
-	return(error);
+	return(DB_SUCCESS);
 }
 
 /**********************************************************************//**
@@ -6877,14 +7164,17 @@ ha_innobase::update_row(
 	uchar*		new_row)	/*!< in: new row in MySQL format */
 {
 	upd_t*		uvect;
-	int		error = 0;
+	dberr_t		error;
 	trx_t*		trx = thd_to_trx(user_thd);
 
 	DBUG_ENTER("ha_innobase::update_row");
 
 	ut_a(prebuilt->trx == trx);
 
-	if (!trx_is_started(trx)) {
+	if (srv_read_only_mode) {
+		ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+		DBUG_RETURN(HA_ERR_TABLE_READONLY);
+	} else if (!trx_is_started(trx)) {
 		++trx->will_lock;
 	}
 
@@ -6905,6 +7195,8 @@ ha_innobase::update_row(
 		}
 	}
 
+	ha_statistic_increment(&SSV::ha_update_count);
+
 	if (prebuilt->upd_node) {
 		uvect = prebuilt->upd_node->update;
 	} else {
@@ -6972,18 +7264,18 @@ ha_innobase::update_row(
 	innobase_srv_conc_exit_innodb(trx);
 
 func_exit:
-	error = convert_error_code_to_mysql(error,
+	int err = convert_error_code_to_mysql(error,
 					    prebuilt->table->flags, user_thd);
 
 	/* If success and no columns were updated. */
-	if (error == 0 && uvect->n_fields == 0) {
+	if (err == 0 && uvect->n_fields == 0) {
 
 		/* This is the same as success, but instructs
 		MySQL that the row is not really updated and it
 		should not increase the count of updated rows.
 		This is fix for http://bugs.mysql.com/29157 */
-		error = HA_ERR_RECORD_IS_THE_SAME;
-	} else if (error == HA_FTS_INVALID_DOCID) {
+		err = HA_ERR_RECORD_IS_THE_SAME;
+	} else if (err == HA_FTS_INVALID_DOCID) {
 		my_error(HA_FTS_INVALID_DOCID, MYF(0));
 	}
 
@@ -6992,7 +7284,7 @@ func_exit:
 
 	innobase_active_small();
 
-	DBUG_RETURN(error);
+	DBUG_RETURN(err);
 }
 
 /**********************************************************************//**
@@ -7004,17 +7296,22 @@ ha_innobase::delete_row(
 /*====================*/
 	const uchar*	record)	/*!< in: a row in MySQL format */
 {
-	int		error = 0;
+	dberr_t		error;
 	trx_t*		trx = thd_to_trx(user_thd);
 
 	DBUG_ENTER("ha_innobase::delete_row");
 
 	ut_a(prebuilt->trx == trx);
 
-	if (!trx_is_started(trx)) {
+	if (srv_read_only_mode) {
+		ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+		DBUG_RETURN(HA_ERR_TABLE_READONLY);
+	} else if (!trx_is_started(trx)) {
 		++trx->will_lock;
 	}
 
+	ha_statistic_increment(&SSV::ha_delete_count);
+
 	if (!prebuilt->upd_node) {
 		row_get_prebuilt_update_vector(prebuilt);
 	}
@@ -7029,15 +7326,13 @@ ha_innobase::delete_row(
 
 	innobase_srv_conc_exit_innodb(trx);
 
-	error = convert_error_code_to_mysql(
-		error, prebuilt->table->flags, user_thd);
-
 	/* Tell the InnoDB server that there might be work for
 	utility threads: */
 
 	innobase_active_small();
 
-	DBUG_RETURN(error);
+	DBUG_RETURN(convert_error_code_to_mysql(
+			    error, prebuilt->table->flags, user_thd));
 }
 
 /**********************************************************************//**
@@ -7270,21 +7565,19 @@ ha_innobase::index_read(
 	dict_index_t*	index;
 	ulint		match_mode	= 0;
 	int		error;
-	ulint		ret;
+	dberr_t		ret;
 
 	DBUG_ENTER("index_read");
 	DEBUG_SYNC_C("ha_innobase_index_read_begin");
 
 	ut_a(prebuilt->trx == thd_to_trx(user_thd));
+	ut_ad(key_len != 0 || find_flag != HA_READ_KEY_EXACT);
+
+	ha_statistic_increment(&SSV::ha_read_key_count);
 
 	index = prebuilt->index;
 
 	if (UNIV_UNLIKELY(index == NULL) || dict_index_is_corrupted(index)) {
-          DBUG_PRINT("error", ("index: %p  index_corrupt: %d  data_corrupt: %d",
-                               index,
-                               index ? test(index->type & DICT_CORRUPT) : 0,
-                               (index && index->table ?
-                                test(index->table->corrupted) : 0)));
 		prebuilt->index_usable = FALSE;
 		DBUG_RETURN(HA_ERR_CRASHED);
 	}
@@ -7357,6 +7650,7 @@ ha_innobase::index_read(
 	case DB_SUCCESS:
 		error = 0;
 		table->status = 0;
+		srv_stats.n_rows_read.add((size_t) prebuilt->trx->id, 1);
 		break;
 	case DB_RECORD_NOT_FOUND:
 		error = HA_ERR_KEY_NOT_FOUND;
@@ -7366,10 +7660,30 @@ ha_innobase::index_read(
 		error = HA_ERR_KEY_NOT_FOUND;
 		table->status = STATUS_NOT_FOUND;
 		break;
+	case DB_TABLESPACE_DELETED:
+
+		ib_senderrf(
+			prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_TABLESPACE_DISCARDED,
+			table->s->table_name.str);
+
+		table->status = STATUS_NOT_FOUND;
+		error = HA_ERR_NO_SUCH_TABLE;
+		break;
+	case DB_TABLESPACE_NOT_FOUND:
+
+		ib_senderrf(
+			prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_TABLESPACE_MISSING, MYF(0),
+			table->s->table_name.str);
+
+		table->status = STATUS_NOT_FOUND;
+		error = HA_ERR_NO_SUCH_TABLE;
+		break;
 	default:
-		error = convert_error_code_to_mysql((int) ret,
-						    prebuilt->table->flags,
-						    user_thd);
+		error = convert_error_code_to_mysql(
+			ret, prebuilt->table->flags, user_thd);
+
 		table->status = STATUS_NOT_FOUND;
 		break;
 	}
@@ -7571,8 +7885,8 @@ ha_innobase::general_fetch(
 	uint	match_mode)	/*!< in: 0, ROW_SEL_EXACT, or
 				ROW_SEL_EXACT_PREFIX */
 {
-	ulint		ret;
-	int		error	= 0;
+	dberr_t	ret;
+	int	error;
 
 	DBUG_ENTER("general_fetch");
 
@@ -7589,6 +7903,7 @@ ha_innobase::general_fetch(
 	case DB_SUCCESS:
 		error = 0;
 		table->status = 0;
+		srv_stats.n_rows_read.add((size_t) prebuilt->trx->id, 1);
 		break;
 	case DB_RECORD_NOT_FOUND:
 		error = HA_ERR_END_OF_FILE;
@@ -7598,9 +7913,30 @@ ha_innobase::general_fetch(
 		error = HA_ERR_END_OF_FILE;
 		table->status = STATUS_NOT_FOUND;
 		break;
+	case DB_TABLESPACE_DELETED:
+
+		ib_senderrf(
+			prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_TABLESPACE_DISCARDED,
+			table->s->table_name.str);
+
+		table->status = STATUS_NOT_FOUND;
+		error = HA_ERR_NO_SUCH_TABLE;
+		break;
+	case DB_TABLESPACE_NOT_FOUND:
+
+		ib_senderrf(
+			prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_TABLESPACE_MISSING,
+			table->s->table_name.str);
+
+		table->status = STATUS_NOT_FOUND;
+		error = HA_ERR_NO_SUCH_TABLE;
+		break;
 	default:
 		error = convert_error_code_to_mysql(
-			(int) ret, prebuilt->table->flags, user_thd);
+			ret, prebuilt->table->flags, user_thd);
+
 		table->status = STATUS_NOT_FOUND;
 		break;
 	}
@@ -7619,6 +7955,8 @@ ha_innobase::index_next(
 	uchar*		buf)	/*!< in/out: buffer for next row in MySQL
 				format */
 {
+	ha_statistic_increment(&SSV::ha_read_next_count);
+
 	return(general_fetch(buf, ROW_SEL_NEXT, 0));
 }
 
@@ -7633,6 +7971,8 @@ ha_innobase::index_next_same(
 	const uchar*	key,	/*!< in: key value */
 	uint		keylen)	/*!< in: key value length */
 {
+	ha_statistic_increment(&SSV::ha_read_next_count);
+
 	return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
 }
 
@@ -7646,6 +7986,8 @@ ha_innobase::index_prev(
 /*====================*/
 	uchar*	buf)	/*!< in/out: buffer for previous row in MySQL format */
 {
+	ha_statistic_increment(&SSV::ha_read_prev_count);
+
 	return(general_fetch(buf, ROW_SEL_PREV, 0));
 }
 
@@ -7662,6 +8004,7 @@ ha_innobase::index_first(
 	int	error;
 
 	DBUG_ENTER("index_first");
+	ha_statistic_increment(&SSV::ha_read_first_count);
 
 	error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
 
@@ -7687,6 +8030,7 @@ ha_innobase::index_last(
 	int	error;
 
 	DBUG_ENTER("index_last");
+	ha_statistic_increment(&SSV::ha_read_last_count);
 
 	error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
 
@@ -7756,6 +8100,7 @@ ha_innobase::rnd_next(
 	int	error;
 
 	DBUG_ENTER("rnd_next");
+	ha_statistic_increment(&SSV::ha_read_rnd_next_count);
 
 	if (start_of_scan) {
 		error = index_first(buf);
@@ -7789,6 +8134,8 @@ ha_innobase::rnd_pos(
 	DBUG_ENTER("rnd_pos");
 	DBUG_DUMP("key", pos, ref_length);
 
+	ha_statistic_increment(&SSV::ha_read_rnd_count);
+
 	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
 
 	/* Note that we assume the length of the row reference is fixed
@@ -7813,8 +8160,6 @@ ha_innobase::ft_init()
 {
 	DBUG_ENTER("ft_init");
 
-	fprintf(stderr, "ft_init()\n");
-
 	trx_t*	trx = check_trx_exists(ha_thd());
 
 	/* FTS queries are not treated as autocommit non-locking selects.
@@ -7853,15 +8198,15 @@ ha_innobase::ft_init_ext(
 	ulint			buf_tmp_used;
 	uint			num_errors;
 
-	fprintf(stderr, "ft_init_ext()\n");
-
-	fprintf(stderr, "keynr=%u, '%.*s'\n",
-		keynr, (int) key->length(), (byte*) key->ptr());
+	if (fts_enable_diag_print) {
+		fprintf(stderr, "keynr=%u, '%.*s'\n",
+			keynr, (int) key->length(), (byte*) key->ptr());
 
-	if (flags & FT_BOOL) {
-		fprintf(stderr, "BOOL search\n");
-	} else {
-		fprintf(stderr, "NL search\n");
+		if (flags & FT_BOOL) {
+			fprintf(stderr, "BOOL search\n");
+		} else {
+			fprintf(stderr, "NL search\n");
+		}
 	}
 
 	/* FIXME: utf32 and utf16 are not compatible with some
@@ -7908,7 +8253,7 @@ ha_innobase::ft_init_ext(
 
 	if (!index || index->type != DICT_FTS) {
 		my_error(ER_TABLE_HAS_NO_FT, MYF(0));
-		return NULL;
+		return(NULL);
 	}
 
 	if (!(table->fts->fts_status & ADDED_TABLE_SYNCED)) {
@@ -7919,25 +8264,69 @@ ha_innobase::ft_init_ext(
 
 	error = fts_query(trx, index, flags, query, query_len, &result);
 
-	prebuilt->result = result;
-
 	// FIXME: Proper error handling and diagnostic
 	if (error != DB_SUCCESS) {
 		fprintf(stderr, "Error processing query\n");
 	} else {
-		/* Must return an instance of a result even if it's empty */
-		ut_a(prebuilt->result);
-
 		/* Allocate FTS handler, and instantiate it before return */
 		fts_hdl = (NEW_FT_INFO*) my_malloc(sizeof(NEW_FT_INFO),
 						   MYF(0));
 
 		fts_hdl->please = (struct _ft_vft*)(&ft_vft_result);
+		fts_hdl->could_you = (struct _ft_vft_ext*)(&ft_vft_ext_result);
 		fts_hdl->ft_prebuilt = prebuilt;
 		fts_hdl->ft_result = result;
+
+		/* FIXME: Re-evluate the condition when Bug 14469540
+		is resolved */
+		prebuilt->in_fts_query = true;
 	}
 
-	return ((FT_INFO*) fts_hdl);
+	return((FT_INFO*) fts_hdl);
+}
+
+/*****************************************************************//**
+Set up search tuple for a query through FTS_DOC_ID_INDEX on
+supplied Doc ID. This is used by MySQL to retrieve the documents
+once the search result (Doc IDs) is available */
+static
+void
+innobase_fts_create_doc_id_key(
+/*===========================*/
+	dtuple_t*	tuple,		/* in/out: prebuilt->search_tuple */
+	const dict_index_t*
+			index,		/* in: index (FTS_DOC_ID_INDEX) */
+	doc_id_t*	doc_id)		/* in/out: doc id to search, value
+					could be changed to storage format
+					used for search. */
+{
+	doc_id_t	temp_doc_id;
+	dfield_t*	dfield = dtuple_get_nth_field(tuple, 0);
+
+	ut_a(dict_index_get_n_unique(index) == 1);
+
+	dtuple_set_n_fields(tuple, index->n_fields);
+	dict_index_copy_types(tuple, index, index->n_fields);
+
+#ifdef UNIV_DEBUG
+	/* The unique Doc ID field should be an eight-bytes integer */
+	dict_field_t*	field = dict_index_get_nth_field(index, 0);
+        ut_a(field->col->mtype == DATA_INT);
+	ut_ad(sizeof(*doc_id) == field->fixed_len);
+	ut_ad(innobase_strcasecmp(index->name, FTS_DOC_ID_INDEX_NAME) == 0);
+#endif /* UNIV_DEBUG */
+
+	/* Convert to storage byte order */
+	mach_write_to_8(reinterpret_cast<byte*>(&temp_doc_id), *doc_id);
+	*doc_id = temp_doc_id;
+	dfield_set_data(dfield, doc_id, sizeof(*doc_id));
+
+        dtuple_set_n_fields_cmp(tuple, 1);
+
+	for (ulint i = 1; i < index->n_fields; i++) {
+		dfield = dtuple_get_nth_field(tuple, i);
+		dfield_set_null(dfield);
+	}
 }
 
 /**********************************************************************//**
@@ -7984,6 +8373,14 @@ next_record:
 	if (result->current != NULL) {
 		dict_index_t*	index;
 		dtuple_t*	tuple = prebuilt->search_tuple;
+		doc_id_t	search_doc_id;
+
+		/* If we only need information from result we can return
+		   without fetching the table row */
+		if (ft_prebuilt->read_just_key) {
+			table->status= 0;
+			return(0);
+		}
 
 		index = dict_table_get_index_on_name(
 			prebuilt->table, FTS_DOC_ID_INDEX_NAME);
@@ -7997,48 +8394,74 @@ next_record:
 		fts_ranking_t*	ranking = rbt_value(
 			fts_ranking_t, result->current);
 
-		/* We pass a pointer to the doc_id because we need to
-		convert it to storage byte order. */
-		row_create_key(tuple, index, &ranking->doc_id);
+		search_doc_id = ranking->doc_id;
+
+		/* We pass a pointer of search_doc_id because it will be
+		converted to storage byte order used in the search
+		tuple. */
+		innobase_fts_create_doc_id_key(tuple, index, &search_doc_id);
 
 		innobase_srv_conc_enter_innodb(prebuilt->trx);
 
-		ulint ret = row_search_for_mysql(
+		dberr_t ret = row_search_for_mysql(
 			(byte*) buf, PAGE_CUR_GE, prebuilt, ROW_SEL_EXACT, 0);
 
 		innobase_srv_conc_exit_innodb(prebuilt->trx);
 
-
-		if (ret == DB_SUCCESS) {
+		switch (ret) {
+		case DB_SUCCESS:
 			error = 0;
 			table->status = 0;
-
-		} else if (ret == DB_RECORD_NOT_FOUND) {
-
+			break;
+		case DB_RECORD_NOT_FOUND:
 			result->current = const_cast<ib_rbt_node_t*>(
 				rbt_next(result->rankings_by_rank,
 					 result->current));
 
 			if (!result->current) {
-				error = HA_ERR_KEY_NOT_FOUND;
+				/* exhaust the result set, should return
+				HA_ERR_END_OF_FILE just like
+				ha_innobase::general_fetch() and/or
+				ha_innobase::index_first() etc. */
+				error = HA_ERR_END_OF_FILE;
 				table->status = STATUS_NOT_FOUND;
 			} else {
 				goto next_record;
 			}
+			break;
+		case DB_END_OF_INDEX:
+			error = HA_ERR_END_OF_FILE;
+			table->status = STATUS_NOT_FOUND;
+			break;
+		case DB_TABLESPACE_DELETED:
 
-		} else if (ret == DB_END_OF_INDEX) {
+			ib_senderrf(
+				prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+				ER_TABLESPACE_DISCARDED,
+				table->s->table_name.str);
 
-			error = HA_ERR_KEY_NOT_FOUND;
 			table->status = STATUS_NOT_FOUND;
-		} else {
+			error = HA_ERR_NO_SUCH_TABLE;
+			break;
+		case DB_TABLESPACE_NOT_FOUND:
 
+			ib_senderrf(
+				prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+				ER_TABLESPACE_MISSING,
+				table->s->table_name.str);
+
+			table->status = STATUS_NOT_FOUND;
+			error = HA_ERR_NO_SUCH_TABLE;
+			break;
+		default:
 			error = convert_error_code_to_mysql(
-				(int) ret, 0, user_thd);
+				ret, 0, user_thd);
 
 			table->status = STATUS_NOT_FOUND;
+			break;
 		}
 
-		return (error);
+		return(error);
 	}
 
 	return(HA_ERR_END_OF_FILE);
@@ -8052,11 +8475,6 @@ ha_innobase::ft_end()
 {
 	fprintf(stderr, "ft_end()\n");
 
-	if (prebuilt->result != NULL) {
-		fts_query_free_result(prebuilt->result);
-		prebuilt->result = NULL;
-	}
-
 	rnd_end();
 }
 
@@ -8110,23 +8528,21 @@ See http://bugs.mysql.com/32710 for expl. why we choose PROCESS. */
 /*****************************************************************//**
 Check whether there exist a column named as "FTS_DOC_ID", which is
 reserved for InnoDB FTS Doc ID
-@return TRUE if there exist a "FTS_DOC_ID" column */
+@return true if there exist a "FTS_DOC_ID" column */
 static
-ibool
+bool
 create_table_check_doc_id_col(
 /*==========================*/
 	trx_t*		trx,		/*!< in: InnoDB transaction handle */
-	TABLE*		form,		/*!< in: information on table
+	const TABLE*	form,		/*!< in: information on table
 					columns and indexes */
 	ulint*		doc_id_col)	/*!< out: Doc ID column number if
-					there exist a FTS_DOC_ID column,						ULINT_UNDEFINED if column is of the
+					there exist a FTS_DOC_ID column,
+					ULINT_UNDEFINED if column is of the
 					wrong type/name/size */
 {
-	ibool		find_doc_id = FALSE;
-	ulint		i;
-
-	for (i = 0; i < form->s->fields; i++) {
-		Field*		field;
+	for (ulint i = 0; i < form->s->fields; i++) {
+		const Field*	field;
 		ulint		col_type;
 		ulint		col_len;
 		ulint		unsigned_type;
@@ -8141,21 +8557,19 @@ create_table_check_doc_id_col(
 		if (innobase_strcasecmp(field->field_name,
 					FTS_DOC_ID_COL_NAME) == 0) {
 
-			find_doc_id = TRUE;
-
 			/* Note the name is case sensitive due to
 			our internal query parser */
 			if (col_type == DATA_INT
-			    && !field->null_ptr
+			    && !field->real_maybe_null()
 			    && col_len == sizeof(doc_id_t)
 			    && (strcmp(field->field_name,
 				      FTS_DOC_ID_COL_NAME) == 0)) {
 				*doc_id_col = i;
 			} else {
 				push_warning_printf(
-					(THD*) trx->mysql_thd,
+					trx->mysql_thd,
 					Sql_condition::WARN_LEVEL_WARN,
-					HA_WRONG_CREATE_OPTION,
+					ER_ILLEGAL_HA_CREATE_OPTION,
 					"InnoDB: FTS_DOC_ID column must be "
 					"of BIGINT NOT NULL type, and named "
 					"in all capitalized characters");
@@ -8164,38 +8578,39 @@ create_table_check_doc_id_col(
 				*doc_id_col = ULINT_UNDEFINED;
 			}
 
-			break;
+			return(true);
 		}
 	}
 
-	return(find_doc_id);
+	return(false);
 }
 
 /*****************************************************************//**
 Creates a table definition to an InnoDB database. */
-static
+static __attribute__((nonnull, warn_unused_result))
 int
 create_table_def(
 /*=============*/
 	trx_t*		trx,		/*!< in: InnoDB transaction handle */
-	TABLE*		form,		/*!< in: information on table
+	const TABLE*	form,		/*!< in: information on table
 					columns and indexes */
 	const char*	table_name,	/*!< in: table name */
-	const char*	path_of_temp_table,/*!< in: if this is a table explicitly
+	const char*	temp_path,	/*!< in: if this is a table explicitly
 					created by the user with the
 					TEMPORARY keyword, then this
 					parameter is the dir path where the
 					table should be placed if we create
 					an .ibd file for it (no .ibd extension
-					in the path, though); otherwise this
-					is NULL */
+					in the path, though). Otherwise this
+					is a zero length-string */
+	const char*	remote_path,	/*!< in: Remote path or zero length-string */
 	ulint		flags,		/*!< in: table flags */
 	ulint		flags2)		/*!< in: table flags2 */
 {
-	Field*		field;
+	THD*		thd = trx->mysql_thd;
 	dict_table_t*	table;
 	ulint		n_cols;
-	int		error;
+	dberr_t		err;
 	ulint		col_type;
 	ulint		col_len;
 	ulint		nulls_allowed;
@@ -8206,17 +8621,18 @@ create_table_def(
 	ulint		i;
 	ulint		doc_id_col = 0;
 	ibool		has_doc_id_col = FALSE;
+	mem_heap_t*	heap;
 
 	DBUG_ENTER("create_table_def");
 	DBUG_PRINT("enter", ("table_name: %s", table_name));
 
-	ut_a(trx->mysql_thd != NULL);
+	DBUG_ASSERT(thd != NULL);
 
 	/* MySQL does the name length check. But we do additional check
 	on the name length here */
 	if (strlen(table_name) > MAX_FULL_NAME_LEN) {
 		push_warning_printf(
-			(THD*) trx->mysql_thd, Sql_condition::WARN_LEVEL_WARN,
+			thd, Sql_condition::WARN_LEVEL_WARN,
 			ER_TABLE_NAME,
 			"InnoDB: Table Name or Database Name is too long");
 
@@ -8228,7 +8644,7 @@ create_table_def(
 	if (strcmp(strchr(table_name, '/') + 1,
 		   "innodb_table_monitor") == 0) {
 		push_warning(
-			(THD*) trx->mysql_thd, Sql_condition::WARN_LEVEL_WARN,
+			thd, Sql_condition::WARN_LEVEL_WARN,
 			HA_ERR_WRONG_COMMAND,
 			DEPRECATED_MSG_INNODB_TABLE_MONITOR);
 	}
@@ -8242,7 +8658,7 @@ create_table_def(
 		if (doc_id_col == ULINT_UNDEFINED) {
 			trx_commit_for_mysql(trx);
 
-			error = DB_ERROR;
+			err = DB_ERROR;
 			goto error_ret;
 		} else {
 			has_doc_id_col = TRUE;
@@ -8270,42 +8686,41 @@ create_table_def(
 					      flags, flags2);
 	}
 
-	if (path_of_temp_table) {
+	if (flags2 & DICT_TF2_TEMPORARY) {
+		ut_a(strlen(temp_path));
 		table->dir_path_of_temp_table =
-			mem_heap_strdup(table->heap, path_of_temp_table);
+			mem_heap_strdup(table->heap, temp_path);
+	}
+
+	if (DICT_TF_HAS_DATA_DIR(flags)) {
+		ut_a(strlen(remote_path));
+		table->data_dir_path = mem_heap_strdup(table->heap, remote_path);
+	} else {
+		table->data_dir_path = NULL;
 	}
+	heap = mem_heap_create(1000);
 
 	for (i = 0; i < n_cols; i++) {
-		field = form->field[i];
+		Field*	field = form->field[i];
 
 		col_type = get_innobase_type_from_mysql_type(&unsigned_type,
 							     field);
 
 		if (!col_type) {
 			push_warning_printf(
-				(THD*) trx->mysql_thd,
-				Sql_condition::WARN_LEVEL_WARN,
+				thd, Sql_condition::WARN_LEVEL_WARN,
 				ER_CANT_CREATE_TABLE,
 				"Error creating table '%s' with "
 				"column '%s'. Please check its "
 				"column type and try to re-create "
 				"the table with an appropriate "
 				"column type.",
-				table->name, (char*) field->field_name);
+				table->name, field->field_name);
 			goto err_col;
 		}
 
-		if (field->null_ptr) {
-			nulls_allowed = 0;
-		} else {
-			nulls_allowed = DATA_NOT_NULL;
-		}
-
-		if (field->binary()) {
-			binary_type = DATA_BINARY_TYPE;
-		} else {
-			binary_type = 0;
-		}
+		nulls_allowed = field->real_maybe_null() ? 0 : DATA_NOT_NULL;
+		binary_type = field->binary() ? DATA_BINARY_TYPE : 0;
 
 		charset_no = 0;
 
@@ -8317,13 +8732,13 @@ create_table_def(
 				/* in data0type.h we assume that the
 				number fits in one byte in prtype */
 				push_warning_printf(
-					(THD*) trx->mysql_thd,
-					Sql_condition::WARN_LEVEL_WARN,
+					thd, Sql_condition::WARN_LEVEL_WARN,
 					ER_CANT_CREATE_TABLE,
 					"In InnoDB, charset-collation codes"
 					" must be below 256."
 					" Unsupported code %lu.",
 					(ulong) charset_no);
+				mem_heap_free(heap);
 				DBUG_RETURN(ER_CANT_CREATE_TABLE);
 			}
 		}
@@ -8355,14 +8770,15 @@ create_table_def(
 				 field->field_name);
 err_col:
 			dict_mem_table_free(table);
+			mem_heap_free(heap);
 			trx_commit_for_mysql(trx);
 
-			error = DB_ERROR;
+			err = DB_ERROR;
 			goto error_ret;
 		}
 
-		dict_mem_table_add_col(table, table->heap,
-			(char*) field->field_name,
+		dict_mem_table_add_col(table, heap,
+			field->field_name,
 			col_type,
 			dtype_form_prtype(
 				(ulint) field->type()
@@ -8374,25 +8790,33 @@ err_col:
 
 	/* Add the FTS doc_id hidden column. */
 	if (flags2 & DICT_TF2_FTS && !has_doc_id_col) {
-		fts_add_doc_id_column(table);
+		fts_add_doc_id_column(table, heap);
 	}
 
-	error = row_create_table_for_mysql(table, trx);
+	err = row_create_table_for_mysql(table, trx, false);
 
-	if (error == DB_DUPLICATE_KEY) {
-		char buf[100];
+	mem_heap_free(heap);
+
+	if (err == DB_DUPLICATE_KEY || err == DB_TABLESPACE_EXISTS) {
+		char display_name[FN_REFLEN];
 		char* buf_end = innobase_convert_identifier(
-			buf, sizeof buf - 1, table_name, strlen(table_name),
-			trx->mysql_thd, TRUE);
+			display_name, sizeof(display_name) - 1,
+			table_name, strlen(table_name),
+			thd, TRUE);
 
 		*buf_end = '\0';
-		my_error(ER_TABLE_EXISTS_ERROR, MYF(0), buf);
+
+		my_error(err == DB_DUPLICATE_KEY
+			 ? ER_TABLE_EXISTS_ERROR
+			 : ER_TABLESPACE_EXISTS, MYF(0), display_name);
 	}
 
-error_ret:
-	error = convert_error_code_to_mysql(error, flags, NULL);
+	if (err == DB_SUCCESS && (flags2 & DICT_TF2_FTS)) {
+		fts_optimize_add_table(table);
+	}
 
-	DBUG_RETURN(error);
+error_ret:
+	DBUG_RETURN(convert_error_code_to_mysql(err, flags, thd));
 }
 
 /*****************************************************************//**
@@ -8402,108 +8826,113 @@ int
 create_index(
 /*=========*/
 	trx_t*		trx,		/*!< in: InnoDB transaction handle */
-	TABLE*		form,		/*!< in: information on table
+	const TABLE*	form,		/*!< in: information on table
 					columns and indexes */
 	ulint		flags,		/*!< in: InnoDB table flags */
 	const char*	table_name,	/*!< in: table name */
 	uint		key_num)	/*!< in: index number */
 {
-	Field*		field;
 	dict_index_t*	index;
 	int		error;
-	ulint		n_fields;
-	KEY*		key;
-	KEY_PART_INFO*	key_part;
+	const KEY*	key;
 	ulint		ind_type;
-	ulint		col_type;
-	ulint		prefix_len = 0;
-	ulint		is_unsigned;
-	ulint		i;
-	ulint		j;
-	ulint*		field_lengths = NULL;
+	ulint*		field_lengths;
 
 	DBUG_ENTER("create_index");
 
 	key = form->key_info + key_num;
 
-	n_fields = key->key_parts;
-
 	/* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */
 	ut_a(innobase_strcasecmp(key->name, innobase_index_reserve_name) != 0);
 
-	ind_type = 0;
-
 	if (key->flags & HA_FULLTEXT) {
-		ind_type = DICT_FTS;
-	} else {
-		if (key_num == form->s->primary_key) {
-			ind_type = ind_type | DICT_CLUSTERED;
+		index = dict_mem_index_create(table_name, key->name, 0,
+					      DICT_FTS,
+					      key->user_defined_key_parts);
+
+		for (ulint i = 0; i < key->user_defined_key_parts; i++) {
+			KEY_PART_INFO*	key_part = key->key_part + i;
+			dict_mem_index_add_field(
+				index, key_part->field->field_name, 0);
 		}
 
-		if (key->flags & HA_NOSAME ) {
-			ind_type = ind_type | DICT_UNIQUE;
-		}
-	}
+		DBUG_RETURN(convert_error_code_to_mysql(
+				    row_create_index_for_mysql(
+					    index, trx, NULL),
+				    flags, NULL));
 
-	/* We pass 0 as the space id, and determine at a lower level the space
-	id where to store the table */
+	}
 
-	index = dict_mem_index_create(table_name, key->name, 0,
-				      ind_type, n_fields);
+	ind_type = 0;
 
-	if (ind_type != DICT_FTS) {
-		field_lengths = (ulint*) my_malloc(
-			sizeof(ulint) * n_fields, MYF(MY_FAE));
+	if (key_num == form->s->primary_key) {
+		ind_type |= DICT_CLUSTERED;
+	}
 
-		ut_ad(!(index->type & DICT_FTS));
+	if (key->flags & HA_NOSAME) {
+		ind_type |= DICT_UNIQUE;
 	}
 
-	for (i = 0; i < n_fields; i++) {
-		key_part = key->key_part + i;
+	field_lengths = (ulint*) my_malloc(
+		key->user_defined_key_parts * sizeof *
+				field_lengths, MYF(MY_FAE));
 
-		if (ind_type != DICT_FTS) {
+	/* We pass 0 as the space id, and determine at a lower level the space
+	id where to store the table */
 
-			/* (The flag HA_PART_KEY_SEG denotes in MySQL a
-			column prefix field in an index: we only store a
-			specified number of first bytes of the column to
-			the index field.) The flag does not seem to be
-			properly set by MySQL. Let us fall back on testing
-			the length of the key part versus the column. */
+	index = dict_mem_index_create(table_name, key->name, 0,
+				      ind_type, key->user_defined_key_parts);
 
-			field = NULL;
+	for (ulint i = 0; i < key->user_defined_key_parts; i++) {
+		KEY_PART_INFO*	key_part = key->key_part + i;
+		ulint		prefix_len;
+		ulint		col_type;
+		ulint		is_unsigned;
 
-			for (j = 0; j < form->s->fields; j++) {
 
-				field = form->field[j];
+		/* (The flag HA_PART_KEY_SEG denotes in MySQL a
+		column prefix field in an index: we only store a
+		specified number of first bytes of the column to
+		the index field.) The flag does not seem to be
+		properly set by MySQL. Let us fall back on testing
+		the length of the key part versus the column. */
 
-				if (0 == innobase_strcasecmp(
-						field->field_name,
-						key_part->field->field_name)) {
-					/* Found the corresponding column */
+		Field*	field = NULL;
 
-					break;
-				}
-			}
+		for (ulint j = 0; j < form->s->fields; j++) {
 
-			ut_a(j < form->s->fields);
+			field = form->field[j];
 
-			col_type = get_innobase_type_from_mysql_type(
-						&is_unsigned, key_part->field);
+			if (0 == innobase_strcasecmp(
+				    field->field_name,
+				    key_part->field->field_name)) {
+				/* Found the corresponding column */
 
-			if (DATA_BLOB == col_type
-				|| (key_part->length < field->pack_length()
-					&& field->type() != MYSQL_TYPE_VARCHAR)
-				|| (field->type() == MYSQL_TYPE_VARCHAR
-					&& key_part->length < field->pack_length()
-					- ((Field_varstring*) field)->length_bytes)) {
+				goto found;
+			}
+		}
 
+		ut_error;
+found:
+		col_type = get_innobase_type_from_mysql_type(
+			&is_unsigned, key_part->field);
+
+		if (DATA_BLOB == col_type
+		    || (key_part->length < field->pack_length()
+			&& field->type() != MYSQL_TYPE_VARCHAR)
+		    || (field->type() == MYSQL_TYPE_VARCHAR
+			&& key_part->length < field->pack_length()
+			- ((Field_varstring*) field)->length_bytes)) {
+
+			switch (col_type) {
+			default:
 				prefix_len = key_part->length;
-
-				if (col_type == DATA_INT
-					|| col_type == DATA_FLOAT
-					|| col_type == DATA_DOUBLE
-					|| col_type == DATA_DECIMAL) {
-					sql_print_error(
+				break;
+			case DATA_INT:
+			case DATA_FLOAT:
+			case DATA_DOUBLE:
+			case DATA_DECIMAL:
+				sql_print_error(
 					"MySQL is trying to create a column "
 					"prefix index field, on an "
 					"inappropriate data type. Table "
@@ -8511,17 +8940,16 @@ create_index(
 					table_name,
 					key_part->field->field_name);
 
-					prefix_len = 0;
-				}
-			} else {
 				prefix_len = 0;
 			}
-
-			field_lengths[i] = key_part->length;
+		} else {
+			prefix_len = 0;
 		}
 
-		dict_mem_index_add_field(index,
-			(char*) key_part->field->field_name, prefix_len);
+		field_lengths[i] = key_part->length;
+
+		dict_mem_index_add_field(
+			index, key_part->field->field_name, prefix_len);
 	}
 
 	ut_ad(key->flags & HA_FULLTEXT || !(index->type & DICT_FTS));
@@ -8529,9 +8957,10 @@ create_index(
 	/* Even though we've defined max_supported_key_part_length, we
 	still do our own checking using field_lengths to be absolutely
 	sure we don't create too long indexes. */
-	error = row_create_index_for_mysql(index, trx, field_lengths);
 
-	error = convert_error_code_to_mysql(error, flags, NULL);
+	error = convert_error_code_to_mysql(
+		row_create_index_for_mysql(index, trx, field_lengths),
+		flags, NULL);
 
 	my_free(field_lengths);
 
@@ -8550,7 +8979,7 @@ create_clustered_index_when_no_primary(
 	const char*	table_name)	/*!< in: table name */
 {
 	dict_index_t*	index;
-	int		error;
+	dberr_t		error;
 
 	/* We pass 0 as the space id, and determine at a lower level the space
 	id where to store the table */
@@ -8560,9 +8989,7 @@ create_clustered_index_when_no_primary(
 
 	error = row_create_index_for_mysql(index, trx, NULL);
 
-	error = convert_error_code_to_mysql(error, flags, NULL);
-
-	return(error);
+	return(convert_error_code_to_mysql(error, flags, NULL));
 }
 
 /*****************************************************************//**
@@ -8599,11 +9026,11 @@ get_row_format_name(
 	if (!use_tablespace) {					\
 		push_warning_printf(				\
 			thd, Sql_condition::WARN_LEVEL_WARN,	\
-			HA_WRONG_CREATE_OPTION,		\
+			ER_ILLEGAL_HA_CREATE_OPTION,		\
 			"InnoDB: ROW_FORMAT=%s requires"	\
 			" innodb_file_per_table.",		\
 			get_row_format_name(row_format));	\
-		ret = FALSE;					\
+		ret = "ROW_FORMAT";					\
 	}
 
 /** If file-format is Antelope, issue warning and set ret false */
@@ -8611,11 +9038,11 @@ get_row_format_name(
 	if (srv_file_format < UNIV_FORMAT_B) {		\
 		push_warning_printf(				\
 			thd, Sql_condition::WARN_LEVEL_WARN,	\
-			HA_WRONG_CREATE_OPTION,		\
+			ER_ILLEGAL_HA_CREATE_OPTION,		\
 			"InnoDB: ROW_FORMAT=%s requires"	\
 			" innodb_file_format > Antelope.",	\
 			get_row_format_name(row_format));	\
-		ret = FALSE;					\
+		ret = "ROW_FORMAT";				\
 	}
 
 
@@ -8624,11 +9051,11 @@ Validates the create options. We may build on this function
 in future. For now, it checks two specifiers:
 KEY_BLOCK_SIZE and ROW_FORMAT
 If innodb_strict_mode is not set then this function is a no-op
-@return	TRUE if valid. */
-static
-ibool
-create_options_are_valid(
-/*=====================*/
+@return	NULL if valid, string if not. */
+UNIV_INTERN
+const char*
+create_options_are_invalid(
+/*=======================*/
 	THD*		thd,		/*!< in: connection thread. */
 	TABLE*		form,		/*!< in: information on table
 					columns and indexes */
@@ -8636,14 +9063,14 @@ create_options_are_valid(
 	bool		use_tablespace)	/*!< in: srv_file_per_table */
 {
 	ibool	kbs_specified	= FALSE;
-	ibool	ret		= TRUE;
+	const char*	ret	= NULL;
 	enum row_type	row_format	= form->s->row_type;
 
 	ut_ad(thd != NULL);
 
 	/* If innodb_strict_mode is not set don't do any validation. */
 	if (!(THDVAR(thd, strict_mode))) {
-		return(TRUE);
+		return(NULL);
 	}
 
 	ut_ad(form != NULL);
@@ -8663,18 +9090,18 @@ create_options_are_valid(
 			if (!use_tablespace) {
 				push_warning(
 					thd, Sql_condition::WARN_LEVEL_WARN,
-					HA_WRONG_CREATE_OPTION,
+					ER_ILLEGAL_HA_CREATE_OPTION,
 					"InnoDB: KEY_BLOCK_SIZE requires"
 					" innodb_file_per_table.");
-				ret = FALSE;
+				ret = "KEY_BLOCK_SIZE";
 			}
 			if (srv_file_format < UNIV_FORMAT_B) {
 				push_warning(
 					thd, Sql_condition::WARN_LEVEL_WARN,
-					HA_WRONG_CREATE_OPTION,
+					ER_ILLEGAL_HA_CREATE_OPTION,
 					"InnoDB: KEY_BLOCK_SIZE requires"
 					" innodb_file_format > Antelope.");
-				ret = FALSE;
+				ret = "KEY_BLOCK_SIZE";
 			}
 
 			/* The maximum KEY_BLOCK_SIZE (KBS) is 16. But if
@@ -8686,22 +9113,22 @@ create_options_are_valid(
 			if (create_info->key_block_size > kbs_max) {
 				push_warning_printf(
 					thd, Sql_condition::WARN_LEVEL_WARN,
-					HA_WRONG_CREATE_OPTION,
+					ER_ILLEGAL_HA_CREATE_OPTION,
 					"InnoDB: KEY_BLOCK_SIZE=%ld"
 					" cannot be larger than %ld.",
 					create_info->key_block_size,
 					kbs_max);
-				ret = FALSE;
+				ret = "KEY_BLOCK_SIZE";
 			}
 			break;
 		default:
 			push_warning_printf(
 				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
+				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: invalid KEY_BLOCK_SIZE = %lu."
 				" Valid values are [1, 2, 4, 8, 16]",
 				create_info->key_block_size);
-			ret = FALSE;
+			ret = "KEY_BLOCK_SIZE";
 			break;
 		}
 	}
@@ -8722,11 +9149,11 @@ create_options_are_valid(
 		if (kbs_specified) {
 			push_warning_printf(
 				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
+				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: cannot specify ROW_FORMAT = %s"
 				" with KEY_BLOCK_SIZE.",
 				get_row_format_name(row_format));
-			ret = FALSE;
+			ret = "KEY_BLOCK_SIZE";
 		}
 		break;
 	case ROW_TYPE_DEFAULT:
@@ -8736,12 +9163,42 @@ create_options_are_valid(
 	case ROW_TYPE_NOT_USED:
 		push_warning(
 			thd, Sql_condition::WARN_LEVEL_WARN,
-			HA_WRONG_CREATE_OPTION,		\
+			ER_ILLEGAL_HA_CREATE_OPTION,		\
 			"InnoDB: invalid ROW_FORMAT specifier.");
-		ret = FALSE;
+		ret = "ROW_TYPE";
 		break;
 	}
 
+	/* Use DATA DIRECTORY only with file-per-table. */
+	if (create_info->data_file_name && !use_tablespace) {
+		push_warning(
+			thd, Sql_condition::WARN_LEVEL_WARN,
+			ER_ILLEGAL_HA_CREATE_OPTION,
+			"InnoDB: DATA DIRECTORY requires"
+			" innodb_file_per_table.");
+		ret = "DATA DIRECTORY";
+	}
+
+	/* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
+	if (create_info->data_file_name
+	    && create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+		push_warning(
+			thd, Sql_condition::WARN_LEVEL_WARN,
+			ER_ILLEGAL_HA_CREATE_OPTION,
+			"InnoDB: DATA DIRECTORY cannot be used"
+			" for TEMPORARY tables.");
+		ret = "DATA DIRECTORY";
+	}
+
+	/* Do not allow INDEX_DIRECTORY */
+	if (create_info->index_file_name) {
+		push_warning_printf(
+			thd, Sql_condition::WARN_LEVEL_WARN,
+			ER_ILLEGAL_HA_CREATE_OPTION,
+			"InnoDB: INDEX DIRECTORY is not supported");
+		ret = "INDEX DIRECTORY";
+	}
+
 	return(ret);
 }
 
@@ -8757,11 +9214,18 @@ ha_innobase::update_create_info(
 		ha_innobase::info(HA_STATUS_AUTO);
 		create_info->auto_increment_value = stats.auto_increment_value;
 	}
+
+	/* Update the DATA DIRECTORY name from SYS_DATAFILES. */
+	dict_get_and_save_data_dir_path(prebuilt->table, false);
+
+	if (prebuilt->table->data_dir_path) {
+		create_info->data_file_name = prebuilt->table->data_dir_path;
+	}
 }
 
 /*****************************************************************//**
 Initialize the table FTS stopword list
-@TRUE if succeed */
+@return TRUE if success */
 UNIV_INTERN
 ibool
 innobase_fts_load_stopword(
@@ -8770,68 +9234,38 @@ innobase_fts_load_stopword(
 	trx_t*		trx,	/*!< in: transaction */
 	THD*		thd)	/*!< in: current thread */
 {
-	return (fts_load_stopword(table, trx,
-				  fts_server_stopword_table,
-				  THDVAR(thd, ft_user_stopword_table),
-				  THDVAR(thd, ft_enable_stopword), FALSE));
+	return(fts_load_stopword(table, trx,
+				 fts_server_stopword_table,
+				 THDVAR(thd, ft_user_stopword_table),
+				 THDVAR(thd, ft_enable_stopword), FALSE));
 }
+
 /*****************************************************************//**
-Creates a new table to an InnoDB database.
-@return	error number */
+Parses the table name into normal name and either temp path or remote path
+if needed.
+@return	0 if successful, otherwise, error number */
 UNIV_INTERN
 int
-ha_innobase::create(
-/*================*/
-	const char*	name,		/*!< in: table name */
-	TABLE*		form,		/*!< in: information on table
-					columns and indexes */
-	HA_CREATE_INFO*	create_info)	/*!< in: more information of the
+ha_innobase::parse_table_name(
+/*==========================*/
+	const char*	name,		/*!< in/out: table name provided*/
+	HA_CREATE_INFO*	create_info,	/*!< in: more information of the
 					created table, contains also the
 					create statement string */
+	ulint		flags,		/*!< in: flags*/
+	ulint		flags2,		/*!< in: flags2*/
+	char*		norm_name,	/*!< out: normalized table name */
+	char*		temp_path,	/*!< out: absolute path of table */
+	char*		remote_path)	/*!< out: remote path of table */
 {
-	int		error;
-	trx_t*		parent_trx;
-	trx_t*		trx;
-	int		primary_key_no;
-	uint		i;
-	char		name2[FN_REFLEN];
-	char		norm_name[FN_REFLEN];
 	THD*		thd = ha_thd();
-	ib_int64_t	auto_inc_value;
-	ulint		fts_indexes = 0;
-	ibool		zip_allowed = TRUE;
-	enum row_type	row_format;
-	rec_format_t	innodb_row_format = REC_FORMAT_COMPACT;
-
-	/* Cache the global variable "srv_file_per_table" to a local
-	variable before using it. Note that "srv_file_per_table"
-	is not under dict_sys mutex protection, and could be changed
-	while creating the table. So we read the current value here
-	and make all further decisions based on this. */
-	bool		use_tablespace = srv_file_per_table;
-
-	/* Zip Shift Size - log2 - 9 of compressed page size,
-	zero for uncompressed */
-	ulint		zip_ssize = 0;
-	ulint		flags = 0;
-	ulint		flags2 = 0;
-	dict_table_t*	innobase_table = NULL;
-
-	/* Cache the value of innodb_file_format, in case it is
-	modified by another thread while the table is being created. */
-	const ulint	file_format_allowed = srv_file_format;
-	const char*	stmt;
-	size_t		stmt_len;
-
-	DBUG_ENTER("ha_innobase::create");
-
-	DBUG_ASSERT(thd != NULL);
-	DBUG_ASSERT(create_info != NULL);
+	bool		use_tablespace = flags2 & DICT_TF2_USE_TABLESPACE;
+	DBUG_ENTER("ha_innobase::parse_table_name");
 
 #ifdef __WIN__
 	/* Names passed in from server are in two formats:
 	1. <database_name>/<table_name>: for normal table creation
-	2. full path: for temp table creation, or sym link
+	2. full path: for temp table creation, or DATA DIRECTORY.
 
 	When srv_file_per_table is on and mysqld_embedded is off,
 	check for full path pattern, i.e.
@@ -8842,7 +9276,7 @@ ha_innobase::create(
 
 	if (use_tablespace
 	    && !mysqld_embedded
-	    && (!create_info->options & HA_LEX_CREATE_TMP_TABLE)) {
+	    && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) {
 
 		if ((name[1] == ':')
 		    || (name[0] == '\\' && name[1] == '\\')) {
@@ -8852,26 +9286,113 @@ ha_innobase::create(
 	}
 #endif
 
-	if (form->s->fields > 1000) {
-		/* The limit probably should be REC_MAX_N_FIELDS - 3 = 1020,
-		but we play safe here */
+	normalize_table_name(norm_name, name);
+	temp_path[0] = '\0';
+	remote_path[0] = '\0';
 
-		DBUG_RETURN(HA_ERR_TO_BIG_ROW);
+	/* A full path is used for TEMPORARY TABLE and DATA DIRECTORY.
+	In the case of;
+	  CREATE TEMPORARY TABLE ... DATA DIRECTORY={path} ... ;
+	We ignore the DATA DIRECTORY. */
+	if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+		strncpy(temp_path, name, FN_REFLEN - 1);
 	}
 
+	if (create_info->data_file_name) {
+		bool ignore = false;
+
+		/* Use DATA DIRECTORY only with file-per-table. */
+		if (!use_tablespace) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				ER_ILLEGAL_HA_CREATE_OPTION,
+				"InnoDB: DATA DIRECTORY requires"
+				" innodb_file_per_table.");
+			ignore = true;
+		}
+
+		/* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
+		if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				ER_ILLEGAL_HA_CREATE_OPTION,
+				"InnoDB: DATA DIRECTORY cannot be"
+				" used for TEMPORARY tables.");
+			ignore = true;
+		}
+
+		if (ignore) {
+			push_warning_printf(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				WARN_OPTION_IGNORED,
+				ER_DEFAULT(WARN_OPTION_IGNORED),
+				"DATA DIRECTORY");
+		} else {
+			strncpy(remote_path, create_info->data_file_name,
+				FN_REFLEN - 1);
+		}
+	}
+
+	if (create_info->index_file_name) {
+		push_warning_printf(
+			thd, Sql_condition::WARN_LEVEL_WARN,
+			WARN_OPTION_IGNORED,
+			ER_DEFAULT(WARN_OPTION_IGNORED),
+			"INDEX DIRECTORY");
+	}
+
+	DBUG_RETURN(0);
+}
+
+/*****************************************************************//**
+Determines InnoDB table flags.
+@retval true if successful, false if error */
+UNIV_INTERN
+bool
+innobase_table_flags(
+/*=================*/
+	const TABLE*		form,		/*!< in: table */
+	const HA_CREATE_INFO*	create_info,	/*!< in: information
+						on table columns and indexes */
+	THD*			thd,		/*!< in: connection */
+	bool			use_tablespace,	/*!< in: whether to create
+						outside system tablespace */
+	ulint*			flags,		/*!< out: DICT_TF flags */
+	ulint*			flags2)		/*!< out: DICT_TF2 flags */
+{
+	DBUG_ENTER("innobase_table_flags");
+
+	const char*	fts_doc_id_index_bad = NULL;
+	bool		zip_allowed = true;
+	ulint		zip_ssize = 0;
+	enum row_type	row_format;
+	rec_format_t	innodb_row_format = REC_FORMAT_COMPACT;
+	bool		use_data_dir;
+
+	/* Cache the value of innodb_file_format, in case it is
+	modified by another thread while the table is being created. */
+	const ulint	file_format_allowed = srv_file_format;
+
+	*flags = 0;
+	*flags2 = 0;
+
 	/* Check if there are any FTS indexes defined on this table. */
-	for (i = 0; i < form->s->keys; i++) {
-		KEY*    key = form->key_info + i;
+	for (uint i = 0; i < form->s->keys; i++) {
+		const KEY*	key = &form->key_info[i];
 
 		if (key->flags & HA_FULLTEXT) {
-			++fts_indexes;
+			*flags2 |= DICT_TF2_FTS;
 
 			/* We don't support FTS indexes in temporary
 			tables. */
 			if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
 
 				my_error(ER_INNODB_NO_FT_TEMP_TABLE, MYF(0));
-				DBUG_RETURN(-1);
+				DBUG_RETURN(false);
+			}
+
+			if (fts_doc_id_index_bad) {
+				goto index_bad;
 			}
 		}
 
@@ -8884,41 +9405,15 @@ ha_innobase::create(
 		    || strcmp(key->name, FTS_DOC_ID_INDEX_NAME)
 		    || strcmp(key->key_part[0].field->field_name,
 			      FTS_DOC_ID_COL_NAME)) {
-			push_warning_printf(thd,
-					    Sql_condition::WARN_LEVEL_WARN,
-					    ER_WRONG_NAME_FOR_INDEX,
-					    " InnoDB: Index name %s is reserved"
-					    " for the unique index on"
-					    " FTS_DOC_ID column for FTS"
-					    " document ID indexing"
-					    " on table %s. Please check"
-					    " the index definition to"
-					    " make sure it is of correct"
-					    " type\n",
-					    FTS_DOC_ID_INDEX_NAME,
-					    name);
-			my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
-				 FTS_DOC_ID_INDEX_NAME);
-			DBUG_RETURN(-1);
+			fts_doc_id_index_bad = key->name;
 		}
-	}
-
-	strcpy(name2, name);
-
-	normalize_table_name(norm_name, name2);
 
-	/* Create the table definition in InnoDB */
-
-	flags = 0;
-
-	if (fts_indexes > 0) {
-		flags2 = DICT_TF2_FTS;
-	}
-
-	/* Validate create options if innodb_strict_mode is set. */
-	if (!create_options_are_valid(
-			thd, form, create_info, use_tablespace)) {
-		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
+		if (fts_doc_id_index_bad && (*flags2 & DICT_TF2_FTS)) {
+index_bad:
+			my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0),
+				 fts_doc_id_index_bad);
+			DBUG_RETURN(false);
+		}
 	}
 
 	if (create_info->key_block_size) {
@@ -8942,7 +9437,7 @@ ha_innobase::create(
 		if (!use_tablespace) {
 			push_warning(
 				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
+				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: KEY_BLOCK_SIZE requires"
 				" innodb_file_per_table.");
 			zip_allowed = FALSE;
@@ -8951,7 +9446,7 @@ ha_innobase::create(
 		if (file_format_allowed < UNIV_FORMAT_B) {
 			push_warning(
 				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
+				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: KEY_BLOCK_SIZE requires"
 				" innodb_file_format > Antelope.");
 			zip_allowed = FALSE;
@@ -8962,7 +9457,7 @@ ha_innobase::create(
 				       PAGE_ZIP_SSIZE_MAX)) {
 			push_warning_printf(
 				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
+				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: ignoring KEY_BLOCK_SIZE=%lu.",
 				create_info->key_block_size);
 		}
@@ -8984,7 +9479,7 @@ ha_innobase::create(
 			with ALTER TABLE anyway. */
 			push_warning_printf(
 				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
+				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: ignoring KEY_BLOCK_SIZE=%lu"
 				" unless ROW_FORMAT=COMPRESSED.",
 				create_info->key_block_size);
@@ -9012,14 +9507,14 @@ ha_innobase::create(
 		if (!use_tablespace) {
 			push_warning_printf(
 				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
+				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: ROW_FORMAT=%s requires"
 				" innodb_file_per_table.",
 				get_row_format_name(row_format));
 		} else if (file_format_allowed == UNIV_FORMAT_A) {
 			push_warning_printf(
 				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
+				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: ROW_FORMAT=%s requires"
 				" innodb_file_format > Antelope.",
 				get_row_format_name(row_format));
@@ -9036,7 +9531,7 @@ ha_innobase::create(
 	case ROW_TYPE_PAGE:
 		push_warning(
 			thd, Sql_condition::WARN_LEVEL_WARN,
-			HA_WRONG_CREATE_OPTION,
+			ER_ILLEGAL_HA_CREATE_OPTION,
 			"InnoDB: assuming ROW_FORMAT=COMPACT.");
 	case ROW_TYPE_DEFAULT:
 		/* If we fell through, set row format to Compact. */
@@ -9049,12 +9544,100 @@ ha_innobase::create(
 	if (!zip_allowed) {
 		zip_ssize = 0;
 	}
-	dict_tf_set(&flags, innodb_row_format, zip_ssize);
+
+	use_data_dir = use_tablespace
+		       && ((create_info->data_file_name != NULL)
+		       && !(create_info->options & HA_LEX_CREATE_TMP_TABLE));
+
+	dict_tf_set(flags, innodb_row_format, zip_ssize, use_data_dir);
+
+	if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+		*flags2 |= DICT_TF2_TEMPORARY;
+	}
+
+	if (use_tablespace) {
+		*flags2 |= DICT_TF2_USE_TABLESPACE;
+	}
+
+	DBUG_RETURN(true);
+}
+
+/*****************************************************************//**
+Creates a new table to an InnoDB database.
+@return	error number */
+UNIV_INTERN
+int
+ha_innobase::create(
+/*================*/
+	const char*	name,		/*!< in: table name */
+	TABLE*		form,		/*!< in: information on table
+					columns and indexes */
+	HA_CREATE_INFO*	create_info)	/*!< in: more information of the
+					created table, contains also the
+					create statement string */
+{
+	int		error;
+	trx_t*		parent_trx;
+	trx_t*		trx;
+	int		primary_key_no;
+	uint		i;
+	char		norm_name[FN_REFLEN];	/* {database}/{tablename} */
+	char		temp_path[FN_REFLEN];	/* absolute path of temp frm */
+	char		remote_path[FN_REFLEN];	/* absolute path of table */
+	THD*		thd = ha_thd();
+	ib_int64_t	auto_inc_value;
+
+	/* Cache the global variable "srv_file_per_table" to a local
+	variable before using it. Note that "srv_file_per_table"
+	is not under dict_sys mutex protection, and could be changed
+	while creating the table. So we read the current value here
+	and make all further decisions based on this. */
+	bool		use_tablespace = srv_file_per_table;
+
+	/* Zip Shift Size - log2 - 9 of compressed page size,
+	zero for uncompressed */
+	ulint		flags;
+	ulint		flags2;
+	dict_table_t*	innobase_table = NULL;
+
+	const char*	stmt;
+	size_t		stmt_len;
+
+	DBUG_ENTER("ha_innobase::create");
+
+	DBUG_ASSERT(thd != NULL);
+	DBUG_ASSERT(create_info != NULL);
+
+	if (form->s->fields > REC_MAX_N_USER_FIELDS) {
+		DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS);
+	} else if (srv_read_only_mode) {
+		DBUG_RETURN(HA_ERR_TABLE_READONLY);
+	}
+
+	/* Create the table definition in InnoDB */
+
+	/* Validate create options if innodb_strict_mode is set. */
+	if (create_options_are_invalid(
+			thd, form, create_info, use_tablespace)) {
+		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
+	}
+
+	if (!innobase_table_flags(form, create_info,
+				  thd, use_tablespace,
+				  &flags, &flags2)) {
+		DBUG_RETURN(-1);
+	}
+
+	error = parse_table_name(name, create_info, flags, flags2,
+				 norm_name, temp_path, remote_path);
+	if (error) {
+		DBUG_RETURN(error);
+	}
 
 	/* Look for a primary key */
 	primary_key_no = (form->s->primary_key != MAX_KEY ?
-			 (int) form->s->primary_key :
-			 -1);
+			  (int) form->s->primary_key :
+			  -1);
 
 	/* Our function innobase_get_mysql_key_number_for_index assumes
 	the primary key is always number 0, if it exists */
@@ -9071,14 +9654,6 @@ ha_innobase::create(
 		DBUG_RETURN(HA_ERR_GENERIC);
 	}
 
-	if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
-		flags2 |= DICT_TF2_TEMPORARY;
-	}
-
-	if (use_tablespace) {
-		flags2 |= DICT_TF2_USE_TABLESPACE;
-	}
-
 	/* Get the transaction associated with the current thd, or create one
 	if not yet created */
 
@@ -9097,10 +9672,8 @@ ha_innobase::create(
 
 	row_mysql_lock_data_dictionary(trx);
 
-	error = create_table_def(trx, form, norm_name,
-		create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL,
-		flags, flags2);
-
+	error = create_table_def(trx, form, norm_name, temp_path,
+				 remote_path, flags, flags2);
 	if (error) {
 		goto cleanup;
 	}
@@ -9130,20 +9703,20 @@ ha_innobase::create(
 
 	/* Create the ancillary tables that are common to all FTS indexes on
 	this table. */
-	if (fts_indexes > 0) {
-		ulint	ret = 0;
+	if (flags2 & DICT_TF2_FTS) {
+		enum fts_doc_id_index_enum	ret;
 
-		innobase_table = dict_table_open_on_name_no_stats(
-			norm_name, TRUE, DICT_ERR_IGNORE_NONE);
+		innobase_table = dict_table_open_on_name(
+			norm_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
 
 		ut_a(innobase_table);
 
-		/* Check whether there alreadys exist FTS_DOC_ID_INDEX */
+		/* Check whether there already exists FTS_DOC_ID_INDEX */
 		ret = innobase_fts_check_doc_id_index_in_def(
 			form->s->keys, form->s->key_info);
 
-		/* Raise error if FTS_DOC_ID_INDEX is of wrong format */
-		if (ret == FTS_INCORRECT_DOC_ID_INDEX) {
+		switch (ret) {
+		case FTS_INCORRECT_DOC_ID_INDEX:
 			push_warning_printf(thd,
 					    Sql_condition::WARN_LEVEL_WARN,
 					    ER_WRONG_NAME_FOR_INDEX,
@@ -9162,20 +9735,23 @@ ha_innobase::create(
 				fts_free(innobase_table);
 			}
 
-			dict_table_close(innobase_table, TRUE);
+			dict_table_close(innobase_table, TRUE, FALSE);
 			my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
 				 FTS_DOC_ID_INDEX_NAME);
 			error = -1;
 			goto cleanup;
+		case FTS_EXIST_DOC_ID_INDEX:
+		case FTS_NOT_EXIST_DOC_ID_INDEX:
+			break;
 		}
 
-		error = fts_create_common_tables(
+		dberr_t	err = fts_create_common_tables(
 			trx, innobase_table, norm_name,
 			(ret == FTS_EXIST_DOC_ID_INDEX));
 
-		error = convert_error_code_to_mysql(error, 0, NULL);
+		error = convert_error_code_to_mysql(err, 0, NULL);
 
-		dict_table_close(innobase_table, TRUE);
+		dict_table_close(innobase_table, TRUE, FALSE);
 
 		if (error) {
 			goto cleanup;
@@ -9196,11 +9772,11 @@ ha_innobase::create(
 	stmt = innobase_get_stmt(thd, &stmt_len);
 
 	if (stmt) {
-		error = row_table_add_foreign_constraints(
+		dberr_t	err = row_table_add_foreign_constraints(
 			trx, stmt, stmt_len, norm_name,
 			create_info->options & HA_LEX_CREATE_TMP_TABLE);
 
-		switch (error) {
+		switch (err) {
 
 		case DB_PARENT_NO_INDEX:
 			push_warning_printf(
@@ -9221,9 +9797,11 @@ ha_innobase::create(
 				" table where referencing columns appear"
 				" as the first columns.\n", norm_name);
 			break;
+		default:
+			break;
 		}
 
-		error = convert_error_code_to_mysql(error, flags, NULL);
+		error = convert_error_code_to_mysql(err, flags, NULL);
 
 		if (error) {
 			goto cleanup;
@@ -9231,7 +9809,7 @@ ha_innobase::create(
 	}
 	/* Cache all the FTS indexes on this table in the FTS specific
 	structure. They are used for FTS indexed column update handling. */
-	if (fts_indexes > 0) {
+	if (flags2 & DICT_TF2_FTS) {
 		fts_t*          fts = innobase_table->fts;
 
 		ut_a(fts != NULL);
@@ -9249,10 +9827,15 @@ ha_innobase::create(
 
 	log_buffer_flush_to_disk();
 
-	innobase_table = dict_table_open_on_name(norm_name, FALSE);
+	innobase_table = dict_table_open_on_name(
+		norm_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	DBUG_ASSERT(innobase_table != 0);
 
+	innobase_copy_frm_flags_from_create_info(innobase_table, create_info);
+
+	dict_stats_update(innobase_table, DICT_STATS_EMPTY_TABLE);
+
 	if (innobase_table) {
 		/* We update the highest file format in the system table
 		space, if this table has higher file format setting. */
@@ -9263,9 +9846,9 @@ ha_innobase::create(
 	}
 
 	/* Load server stopword into FTS cache */
-	if (fts_indexes > 0) {
+	if (flags2 & DICT_TF2_FTS) {
 		if (!innobase_fts_load_stopword(innobase_table, NULL, thd)) {
-			dict_table_close(innobase_table, FALSE);
+			dict_table_close(innobase_table, FALSE, FALSE);
 			srv_active_wake_master_thread();
 			trx_free_for_mysql(trx);
 			DBUG_RETURN(-1);
@@ -9302,7 +9885,7 @@ ha_innobase::create(
 		dict_table_autoinc_unlock(innobase_table);
 	}
 
-	dict_table_close(innobase_table, FALSE);
+	dict_table_close(innobase_table, FALSE, FALSE);
 
 	/* Tell the InnoDB server that there might be work for
 	utility threads: */
@@ -9314,7 +9897,7 @@ ha_innobase::create(
 	DBUG_RETURN(0);
 
 cleanup:
-	innobase_commit_low(trx);
+	trx_rollback_for_mysql(trx);
 
 	row_mysql_unlock_data_dictionary(trx);
 
@@ -9332,9 +9915,8 @@ ha_innobase::discard_or_import_tablespace(
 /*======================================*/
 	my_bool discard)	/*!< in: TRUE if discard, else import */
 {
+	dberr_t		err;
 	dict_table_t*	dict_table;
-	trx_t*		trx;
-	int		err;
 
 	DBUG_ENTER("ha_innobase::discard_or_import_tablespace");
 
@@ -9342,18 +9924,85 @@ ha_innobase::discard_or_import_tablespace(
 	ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
 	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
 
+	if (srv_read_only_mode) {
+		DBUG_RETURN(HA_ERR_TABLE_READONLY);
+	}
+
 	dict_table = prebuilt->table;
-	trx = prebuilt->trx;
 
-	if (discard) {
-		err = row_discard_tablespace_for_mysql(dict_table->name, trx);
+	if (dict_table->space == TRX_SYS_SPACE) {
+
+		ib_senderrf(
+			prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_TABLE_IN_SYSTEM_TABLESPACE,
+			table->s->table_name.str);
+
+		DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
+	}
+
+	trx_start_if_not_started(prebuilt->trx);
+
+	/* In case MySQL calls this in the middle of a SELECT query, release
+	possible adaptive hash latch to avoid deadlocks of threads. */
+	trx_search_latch_release_if_reserved(prebuilt->trx);
+
+	/* Obtain an exclusive lock on the table. */
+	err = row_mysql_lock_table(
+		prebuilt->trx, dict_table, LOCK_X,
+		discard ? "setting table lock for DISCARD TABLESPACE"
+			: "setting table lock for IMPORT TABLESPACE");
+
+	if (err != DB_SUCCESS) {
+		/* unable to lock the table: do nothing */
+	} else if (discard) {
+
+		/* Discarding an already discarded tablespace should be an
+		idempotent operation. Also, if the .ibd file is missing the
+		user may want to set the DISCARD flag in order to IMPORT
+		a new tablespace. */
+
+		if (dict_table->ibd_file_missing) {
+			ib_senderrf(
+				prebuilt->trx->mysql_thd,
+				IB_LOG_LEVEL_WARN, ER_TABLESPACE_MISSING,
+				table->s->table_name.str);
+		}
+
+		err = row_discard_tablespace_for_mysql(
+			dict_table->name, prebuilt->trx);
+
+	} else if (!dict_table->ibd_file_missing) {
+		/* Commit the transaction in order to
+		release the table lock. */
+		trx_commit_for_mysql(prebuilt->trx);
+
+		ib_senderrf(
+			prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_TABLESPACE_EXISTS, table->s->table_name.str);
+
+		DBUG_RETURN(HA_ERR_TABLE_EXIST);
 	} else {
-		err = row_import_tablespace_for_mysql(dict_table->name, trx);
+		err = row_import_for_mysql(dict_table, prebuilt);
+
+		if (err == DB_SUCCESS) {
+
+			if (table->found_next_number_field) {
+				dict_table_autoinc_lock(dict_table);
+				innobase_initialize_autoinc();
+				dict_table_autoinc_unlock(dict_table);
+			}
+
+			info(HA_STATUS_TIME
+			     | HA_STATUS_CONST
+			     | HA_STATUS_VARIABLE
+			     | HA_STATUS_AUTO);
+		}
 	}
 
-	err = convert_error_code_to_mysql(err, dict_table->flags, NULL);
+	/* Commit the transaction in order to release the table lock. */
+	trx_commit_for_mysql(prebuilt->trx);
 
-	DBUG_RETURN(err);
+	DBUG_RETURN(convert_error_code_to_mysql(err, dict_table->flags, NULL));
 }
 
 /*****************************************************************//**
@@ -9364,10 +10013,15 @@ int
 ha_innobase::truncate()
 /*===================*/
 {
+	dberr_t		err;
 	int		error;
 
 	DBUG_ENTER("ha_innobase::truncate");
 
+	if (srv_read_only_mode) {
+		DBUG_RETURN(HA_ERR_TABLE_READONLY);
+	}
+
 	/* Get the transaction associated with the current thd, or create one
 	if not yet created, and update prebuilt->trx */
 
@@ -9378,11 +10032,28 @@ ha_innobase::truncate()
 	}
 	/* Truncate the table in InnoDB */
 
-	error = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
+	err = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
 
-	error = convert_error_code_to_mysql(error, prebuilt->table->flags,
-					    NULL);
+	switch (err) {
 
+	case DB_TABLESPACE_DELETED:
+	case DB_TABLESPACE_NOT_FOUND:
+		ib_senderrf(
+			prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			(err == DB_TABLESPACE_DELETED ?
+			ER_TABLESPACE_DISCARDED : ER_TABLESPACE_MISSING),
+			table->s->table_name.str);
+		table->status = STATUS_NOT_FOUND;
+		error = HA_ERR_NO_SUCH_TABLE;
+		break;
+
+	default:
+		error = convert_error_code_to_mysql(
+			err, prebuilt->table->flags,
+			prebuilt->trx->mysql_thd);
+		table->status = STATUS_NOT_FOUND;
+		break;
+	}
 	DBUG_RETURN(error);
 }
 
@@ -9400,12 +10071,11 @@ ha_innobase::delete_table(
 	const char*	name)	/*!< in: table name */
 {
 	ulint	name_len;
-	int	error;
+	dberr_t	err;
 	trx_t*	parent_trx;
 	trx_t*	trx;
-	THD	*thd = ha_thd();
-	char	norm_name[1000];
-	char	errstr[1024];
+	THD*	thd = ha_thd();
+	char	norm_name[FN_REFLEN];
 
 	DBUG_ENTER("ha_innobase::delete_table");
 
@@ -9413,29 +10083,21 @@ ha_innobase::delete_table(
 		"test_normalize_table_name_low",
 		test_normalize_table_name_low();
 	);
+	DBUG_EXECUTE_IF(
+		"test_ut_format_name",
+		test_ut_format_name();
+	);
 
 	/* Strangely, MySQL passes the table name without the '.frm'
 	extension, in contrast to ::create */
 	normalize_table_name(norm_name, name);
 
-	if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) {
+	if (srv_read_only_mode) {
+		DBUG_RETURN(HA_ERR_TABLE_READONLY);
+	} else if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) {
 		DBUG_RETURN(HA_ERR_GENERIC);
 	}
 
-	/* Remove stats for this table and all of its indexes from the
-	persistent storage if it exists and if there are stats for this
-	table in there. This function creates its own trx and commits
-	it. */
-	error = dict_stats_delete_table_stats(norm_name,
-					      errstr, sizeof(errstr));
-	if (error != DB_SUCCESS) {
-		push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
-			     ER_LOCK_WAIT_TIMEOUT, errstr);
-	}
-
-	/* Get the transaction associated with the current thd, or create one
-	if not yet created */
-
 	parent_trx = check_trx_exists(thd);
 
 	/* In case MySQL calls this in the middle of a SELECT query, release
@@ -9456,14 +10118,14 @@ ha_innobase::delete_table(
 
 	/* We are doing a DDL operation. */
 	++trx->will_lock;
+	trx->ddl = true;
 
 	/* Drop the table in InnoDB */
-	error = row_drop_table_for_mysql(norm_name, trx,
-					 thd_sql_command(thd)
-					 == SQLCOM_DROP_DB);
+	err = row_drop_table_for_mysql(
+		norm_name, trx, thd_sql_command(thd) == SQLCOM_DROP_DB);
 
 
-	if (error == DB_TABLE_NOT_FOUND
+	if (err == DB_TABLE_NOT_FOUND
 	    && innobase_get_lower_case_table_names() == 1) {
 		char*	is_part = NULL;
 #ifdef __WIN__
@@ -9473,25 +10135,25 @@ ha_innobase::delete_table(
 #endif /* __WIN__ */
 
 		if (is_part) {
-			char	par_case_name[MAX_FULL_NAME_LEN + 1];
+			char	par_case_name[FN_REFLEN];
 
 #ifndef __WIN__
 			/* Check for the table using lower
 			case name, including the partition
 			separator "P" */
-			memcpy(par_case_name, norm_name, strlen(norm_name));
-			par_case_name[strlen(norm_name)] = 0;
+			strcpy(par_case_name, norm_name);
 			innobase_casedn_str(par_case_name);
 #else
 			/* On Windows platfrom, check
 			whether there exists table name in
 			system table whose name is
 			not being normalized to lower case */
-			normalize_table_name_low(par_case_name, name, FALSE);
+			normalize_table_name_low(
+				par_case_name, name, FALSE);
 #endif
-			error = row_drop_table_for_mysql(par_case_name, trx,
-							 thd_sql_command(thd)
-							 == SQLCOM_DROP_DB);
+			err = row_drop_table_for_mysql(
+				par_case_name, trx,
+				thd_sql_command(thd) == SQLCOM_DROP_DB);
 		}
 	}
 
@@ -9510,9 +10172,7 @@ ha_innobase::delete_table(
 
 	trx_free_for_mysql(trx);
 
-	error = convert_error_code_to_mysql(error, 0, NULL);
-
-	DBUG_RETURN(error);
+	DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL));
 }
 
 /*****************************************************************//**
@@ -9538,6 +10198,10 @@ innobase_drop_database(
 
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
+	if (srv_read_only_mode) {
+		return;
+	}
+
 	/* In the Windows plugin, thd = current_thd is always NULL */
 	if (thd) {
 		trx_t*	parent_trx = check_trx_exists(thd);
@@ -9593,36 +10257,36 @@ innobase_drop_database(
 	innobase_commit_low(trx);
 	trx_free_for_mysql(trx);
 }
+
 /*********************************************************************//**
 Renames an InnoDB table.
-@return	0 or error code */
-static
-int
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 innobase_rename_table(
 /*==================*/
 	trx_t*		trx,	/*!< in: transaction */
 	const char*	from,	/*!< in: old name of the table */
-	const char*	to,	/*!< in: new name of the table */
-	ibool		lock_and_commit)
-				/*!< in: TRUE=lock data dictionary and commit */
+	const char*	to)	/*!< in: new name of the table */
 {
-	int	error;
-	char*	norm_to;
-	char*	norm_from;
+	dberr_t	error;
+	char	norm_to[FN_REFLEN];
+	char	norm_from[FN_REFLEN];
 
-	// Magic number 64 arbitrary
-	norm_to = (char*) my_malloc(strlen(to) + 64, MYF(0));
-	norm_from = (char*) my_malloc(strlen(from) + 64, MYF(0));
+	DBUG_ENTER("innobase_rename_table");
+	DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+
+	ut_ad(!srv_read_only_mode);
 
 	normalize_table_name(norm_to, to);
 	normalize_table_name(norm_from, from);
 
+	DEBUG_SYNC_C("innodb_rename_table_ready");
+
 	/* Serialize data dictionary operations with dictionary mutex:
 	no deadlocks can occur then in these operations */
 
-	if (lock_and_commit) {
-		row_mysql_lock_data_dictionary(trx);
-	}
+	row_mysql_lock_data_dictionary(trx);
 
 	/* Transaction must be flagged as a locking transaction or it hasn't
 	been started yet. */
@@ -9630,7 +10294,7 @@ innobase_rename_table(
 	ut_a(trx->will_lock > 0);
 
 	error = row_rename_table_for_mysql(
-		norm_from, norm_to, trx, lock_and_commit);
+		norm_from, norm_to, trx, TRUE);
 
 	if (error != DB_SUCCESS) {
 		if (error == DB_TABLE_NOT_FOUND
@@ -9643,39 +10307,36 @@ innobase_rename_table(
 #endif /* __WIN__ */
 
 			if (is_part) {
-				char	par_case_name[MAX_FULL_NAME_LEN + 1];
-
+				char	par_case_name[FN_REFLEN];
 #ifndef __WIN__
 				/* Check for the table using lower
 				case name, including the partition
 				separator "P" */
-				memcpy(par_case_name, norm_from,
-				       strlen(norm_from));
-				par_case_name[strlen(norm_from)] = 0;
+				strcpy(par_case_name, norm_from);
 				innobase_casedn_str(par_case_name);
 #else
 				/* On Windows platfrom, check
 				whether there exists table name in
 				system table whose name is
 				not being normalized to lower case */
-				normalize_table_name_low(par_case_name,
-							 from, FALSE);
+				normalize_table_name_low(
+					par_case_name, from, FALSE);
 #endif
 				error = row_rename_table_for_mysql(
-					par_case_name, norm_to, trx,
-					lock_and_commit);
-
+					par_case_name, norm_to, trx, TRUE);
 			}
 		}
 
 		if (error != DB_SUCCESS) {
-			FILE* ef = dict_foreign_err_file;
-
-			fputs("InnoDB: Renaming table ", ef);
-			ut_print_name(ef, trx, TRUE, norm_from);
-			fputs(" to ", ef);
-			ut_print_name(ef, trx, TRUE, norm_to);
-			fputs(" failed!\n", ef);
+			if (!srv_read_only_mode) {
+				FILE* ef = dict_foreign_err_file;
+
+				fputs("InnoDB: Renaming table ", ef);
+				ut_print_name(ef, trx, TRUE, norm_from);
+				fputs(" to ", ef);
+				ut_print_name(ef, trx, TRUE, norm_to);
+				fputs(" failed!\n", ef);
+			}
 		} else {
 #ifndef __WIN__
 			sql_print_warning("Rename partition table %s "
@@ -9696,20 +10357,15 @@ innobase_rename_table(
 		}
 	}
 
-	if (lock_and_commit) {
-		row_mysql_unlock_data_dictionary(trx);
-
-		/* Flush the log to reduce probability that the .frm
-		files and the InnoDB data dictionary get out-of-sync
-		if the user runs with innodb_flush_log_at_trx_commit = 0 */
+	row_mysql_unlock_data_dictionary(trx);
 
-		log_buffer_flush_to_disk();
-	}
+	/* Flush the log to reduce probability that the .frm
+	files and the InnoDB data dictionary get out-of-sync
+	if the user runs with innodb_flush_log_at_trx_commit = 0 */
 
-	my_free(norm_to);
-	my_free(norm_from);
+	log_buffer_flush_to_disk();
 
-	return(error);
+	DBUG_RETURN(error);
 }
 
 /*********************************************************************//**
@@ -9723,12 +10379,17 @@ ha_innobase::rename_table(
 	const char*	to)	/*!< in: new name of the table */
 {
 	trx_t*	trx;
-	int	error;
+	dberr_t	error;
 	trx_t*	parent_trx;
 	THD*	thd		= ha_thd();
 
 	DBUG_ENTER("ha_innobase::rename_table");
 
+	if (srv_read_only_mode) {
+		ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+		DBUG_RETURN(HA_ERR_TABLE_READONLY);
+	}
+
 	/* Get the transaction associated with the current thd, or create one
 	if not yet created */
 
@@ -9741,15 +10402,11 @@ ha_innobase::rename_table(
 
 	trx = innobase_trx_allocate(thd);
 
-	/* Either the transaction is already flagged as a locking transaction
-	or it hasn't been started yet. */
-
-	ut_a(!trx_is_started(trx) || trx->will_lock > 0);
-
 	/* We are doing a DDL operation. */
 	++trx->will_lock;
+	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
 
-	error = innobase_rename_table(trx, from, to, TRUE);
+	error = innobase_rename_table(trx, from, to);
 
 	DEBUG_SYNC(thd, "after_innobase_rename_table");
 
@@ -9761,6 +10418,27 @@ ha_innobase::rename_table(
 	innobase_commit_low(trx);
 	trx_free_for_mysql(trx);
 
+	if (error == DB_SUCCESS) {
+		char	norm_from[MAX_FULL_NAME_LEN];
+		char	norm_to[MAX_FULL_NAME_LEN];
+		char	errstr[512];
+		dberr_t	ret;
+
+		normalize_table_name(norm_from, from);
+		normalize_table_name(norm_to, to);
+
+		ret = dict_stats_rename_table(norm_from, norm_to,
+					      errstr, sizeof(errstr));
+
+		if (ret != DB_SUCCESS) {
+			ut_print_timestamp(stderr);
+			fprintf(stderr, " InnoDB: %s\n", errstr);
+
+			push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+				     ER_LOCK_WAIT_TIMEOUT, errstr);
+		}
+	}
+
 	/* Add a special case to handle the Duplicated Key error
 	and return DB_ERROR instead.
 	This is to avoid a possible SIGSEGV error from mysql error
@@ -9773,15 +10451,13 @@ ha_innobase::rename_table(
 	the dup key error here is due to an existing table whose name
 	is the one we are trying to rename to) and return the generic
 	error code. */
-	if (error == (int) DB_DUPLICATE_KEY) {
+	if (error == DB_DUPLICATE_KEY) {
 		my_error(ER_TABLE_EXISTS_ERROR, MYF(0), to);
 
 		error = DB_ERROR;
 	}
 
-	error = convert_error_code_to_mysql(error, 0, NULL);
-
-	DBUG_RETURN(error);
+	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
 }
 
 /*********************************************************************//**
@@ -9840,7 +10516,7 @@ ha_innobase::records_in_range(
 		goto func_exit;
 	}
 
-        key_parts= key->key_parts;
+        key_parts= key->ext_key_parts;
         if ((min_key && min_key->keypart_map>=(key_part_map) (1<<key_parts)) ||
             (max_key && max_key->keypart_map>=(key_part_map) (1<<key_parts)))
           key_parts= key->ext_key_parts;
@@ -9848,11 +10524,11 @@ ha_innobase::records_in_range(
 	heap = mem_heap_create(2 * (key_parts * sizeof(dfield_t)
 				    + sizeof(dtuple_t)));
 
-        range_start = dtuple_create(heap, key_parts);
-        dict_index_copy_types(range_start, index, key_parts);
+	range_start = dtuple_create(heap, key_parts);
+	dict_index_copy_types(range_start, index, key_parts);
 
-        range_end = dtuple_create(heap, key_parts);
-        dict_index_copy_types(range_end, index, key_parts);
+	range_end = dtuple_create(heap, key_parts);
+	dict_index_copy_types(range_end, index, key_parts);
 
 	row_sel_convert_mysql_key_to_innobase(
 				range_start,
@@ -9921,10 +10597,10 @@ ha_rows
 ha_innobase::estimate_rows_upper_bound()
 /*====================================*/
 {
-	dict_index_t*	index;
-	ulonglong	estimate;
-	ulonglong	local_data_file_length;
-	ulint		stat_n_leaf_pages;
+	const dict_index_t*	index;
+	ulonglong		estimate;
+	ulonglong		local_data_file_length;
+	ulint			stat_n_leaf_pages;
 
 	DBUG_ENTER("estimate_rows_upper_bound");
 
@@ -9934,8 +10610,7 @@ ha_innobase::estimate_rows_upper_bound()
 
 	update_thd(ha_thd());
 
-	prebuilt->trx->op_info = (char*)
-				 "calculating upper bound for table rows";
+	prebuilt->trx->op_info = "calculating upper bound for table rows";
 
 	/* In case MySQL calls this in the middle of a SELECT query, release
 	possible adaptive hash latch to avoid deadlocks of threads */
@@ -9951,16 +10626,15 @@ ha_innobase::estimate_rows_upper_bound()
 	local_data_file_length =
 		((ulonglong) stat_n_leaf_pages) * UNIV_PAGE_SIZE;
 
-
 	/* Calculate a minimum length for a clustered index record and from
 	that an upper bound for the number of rows. Since we only calculate
 	new statistics in row0mysql.cc when a table has grown by a threshold
 	factor, we must add a safety factor 2 in front of the formula below. */
 
-	estimate = 2 * local_data_file_length /
-					 dict_index_calc_min_rec_len(index);
+	estimate = 2 * local_data_file_length
+		/ dict_index_calc_min_rec_len(index);
 
-	prebuilt->trx->op_info = (char*)"";
+	prebuilt->trx->op_info = "";
 
 	DBUG_RETURN((ha_rows) estimate);
 }
@@ -9980,7 +10654,32 @@ ha_innobase::scan_time()
 	as a random disk read, that is, we do not divide the following
 	by 10, which would be physically realistic. */
 
-	return((double) (prebuilt->table->stat_clustered_index_size));
+	/* The locking below is disabled for performance reasons. Without
+	it we could end up returning uninitialized value to the caller,
+	which in the worst case could make some query plan go bogus or
+	issue a Valgrind warning. */
+#if 0
+	/* avoid potential lock order violation with dict_table_stats_lock()
+	below */
+	update_thd(ha_thd());
+	trx_search_latch_release_if_reserved(prebuilt->trx);
+#endif
+
+	ulint	stat_clustered_index_size;
+
+#if 0
+	dict_table_stats_lock(prebuilt->table, RW_S_LATCH);
+#endif
+
+	ut_a(prebuilt->table->stat_initialized);
+
+	stat_clustered_index_size = prebuilt->table->stat_clustered_index_size;
+
+#if 0
+	dict_table_stats_unlock(prebuilt->table, RW_S_LATCH);
+#endif
+
+	return((double) stat_clustered_index_size);
 }
 
 /******************************************************************//**
@@ -10016,6 +10715,16 @@ ha_innobase::read_time(
 	return(ranges + (double) rows / (double) total_rows * time_for_scan);
 }
 
+/******************************************************************//**
+Return the size of the InnoDB memory buffer. */
+UNIV_INTERN
+longlong
+ha_innobase::get_memory_buffer_size() const
+/*=======================================*/
+{
+	return(innobase_buffer_pool_size);
+}
+
 /*********************************************************************//**
 Calculates the key number used inside MySQL for an Innobase index. We will
 first check the "index translation table" for a match of the index to get
@@ -10041,9 +10750,6 @@ innobase_get_mysql_key_number_for_index(
 	unsigned int		i;
 
  	ut_a(index);
-	/*
-	ut_ad(strcmp(index->table->name, ib_table->name) == 0);
-	*/
 
 	/* If index does not belong to the table object of share structure
 	(ib_table comes from the share structure) search the index->table
@@ -10074,12 +10780,9 @@ innobase_get_mysql_key_number_for_index(
 			}
 		}
 
-		/* If index_count in translation table is set to 0, it
-		is possible we are in the process of rebuilding table,
-		do not spit error in this case */
-		if (share->idx_trans_tbl.index_count) {
-			/* Print an error message if we cannot find the index
-			** in the "index translation table". */
+		/* Print an error message if we cannot find the index
+		in the "index translation table". */
+		if (*index->name != TEMP_INDEX_PREFIX) {
 			sql_print_error("Cannot find index %s in InnoDB index "
 					"translation table.", index->name);
 		}
@@ -10103,10 +10806,16 @@ innobase_get_mysql_key_number_for_index(
 	     ind != NULL;
 	     ind = dict_table_get_next_index(ind)) {
 		if (index == ind) {
-			sql_print_error("Find index %s in InnoDB index list "
+			/* Temp index is internal to InnoDB, that is
+			not present in the MySQL index list, so no
+			need to print such mismatch warning. */
+			if (*(index->name) != TEMP_INDEX_PREFIX) {
+				sql_print_warning(
+					"Find index %s in InnoDB index list "
 					"but not its MySQL index number "
 					"It could be an InnoDB internal index.",
 					index->name);
+			}
 			return(-1);
 		}
 	}
@@ -10130,45 +10839,49 @@ innodb_rec_per_key(
 	ha_rows		records)	/*!< in: estimated total records */
 {
 	ha_rows		rec_per_key;
+	ib_uint64_t	n_diff;
+
+	ut_a(index->table->stat_initialized);
 
 	ut_ad(i < dict_index_get_n_unique(index));
 
-	/* Note the stat_n_diff_key_vals[] stores the diff value with
-	n-prefix indexing, so it is always stat_n_diff_key_vals[i + 1] */
-	if (index->stat_n_diff_key_vals[i + 1] == 0) {
+	n_diff = index->stat_n_diff_key_vals[i];
+
+	if (n_diff == 0) {
 
 		rec_per_key = records;
 	} else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) {
-		ib_uint64_t	num_null;
+		ib_uint64_t	n_null;
+		ib_uint64_t	n_non_null;
+
+		n_non_null = index->stat_n_non_null_key_vals[i];
 
 		/* In theory, index->stat_n_non_null_key_vals[i]
 		should always be less than the number of records.
 		Since this is statistics value, the value could
 		have slight discrepancy. But we will make sure
 		the number of null values is not a negative number. */
-		if (records < index->stat_n_non_null_key_vals[i]) {
-			num_null = 0;
+		if (records < n_non_null) {
+			n_null = 0;
 		} else {
-			num_null = records - index->stat_n_non_null_key_vals[i];
+			n_null = records - n_non_null;
 		}
 
 		/* If the number of NULL values is the same as or
 		large than that of the distinct values, we could
 		consider that the table consists mostly of NULL value.
 		Set rec_per_key to 1. */
-		if (index->stat_n_diff_key_vals[i + 1] <= num_null) {
+		if (n_diff <= n_null) {
 			rec_per_key = 1;
 		} else {
 			/* Need to exclude rows with NULL values from
 			rec_per_key calculation */
-			rec_per_key = (ha_rows)(
-				(records - num_null)
-				/ (index->stat_n_diff_key_vals[i + 1]
-				   - num_null));
+			rec_per_key = (ha_rows)
+				((records - n_null) / (n_diff - n_null));
 		}
 	} else {
-		rec_per_key = (ha_rows)
-			 (records / index->stat_n_diff_key_vals[i + 1]);
+		DEBUG_SYNC_C("after_checking_for_0");
+		rec_per_key = (ha_rows) (records / n_diff);
 	}
 
 	return(rec_per_key);
@@ -10182,17 +10895,12 @@ UNIV_INTERN
 int
 ha_innobase::info_low(
 /*==================*/
-	uint			flag,	/*!< in: what information MySQL
-					requests */
-	dict_stats_upd_option_t	stats_upd_option)
-					/*!< in: whether to (re) calc
-					the stats or to fetch them from
-					the persistent storage */
+	uint	flag,	/*!< in: what information is requested */
+	bool	is_analyze)
 {
 	dict_table_t*	ib_table;
-	dict_index_t*	index;
 	ha_rows		rec_per_key;
-	ib_int64_t	n_rows;
+	ib_uint64_t	n_rows;
 	char		path[FN_REFLEN];
 	os_file_stat_t	stat_info;
 
@@ -10216,37 +10924,52 @@ ha_innobase::info_low(
 	trx_search_latch_release_if_reserved(prebuilt->trx);
 
 	ib_table = prebuilt->table;
+	DBUG_ASSERT(ib_table->n_ref_count > 0);
 
 	if (flag & HA_STATUS_TIME) {
-		if (stats_upd_option != DICT_STATS_FETCH
-		    || innobase_stats_on_metadata) {
-			/* In sql_show we call with this flag: update
-			then statistics so that they are up-to-date */
-			enum db_err	ret;
+		if (is_analyze || innobase_stats_on_metadata) {
+
+			dict_stats_upd_option_t	opt;
+			dberr_t			ret;
 
 			prebuilt->trx->op_info = "updating table statistics";
 
+			if (dict_stats_is_persistent_enabled(ib_table)) {
+
+				ut_ad(!srv_read_only_mode);
+
+				if (is_analyze) {
+					opt = DICT_STATS_RECALC_PERSISTENT;
+				} else {
+					/* This is e.g. 'SHOW INDEXES', fetch
+					the persistent stats from disk. */
+					opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
+				}
+			} else {
+				opt = DICT_STATS_RECALC_TRANSIENT;
+			}
+
 			ut_ad(!mutex_own(&dict_sys->mutex));
-			ret = dict_stats_update(ib_table, stats_upd_option,
-						FALSE);
+			ret = dict_stats_update(ib_table, opt);
 
 			if (ret != DB_SUCCESS) {
 				prebuilt->trx->op_info = "";
 				DBUG_RETURN(HA_ERR_GENERIC);
 			}
 
-			prebuilt->trx->op_info = "returning various info to MySQL";
+			prebuilt->trx->op_info =
+				"returning various info to MySQL";
 		}
 
 		my_snprintf(path, sizeof(path), "%s/%s%s",
-				mysql_data_home, ib_table->name, reg_ext);
+			    mysql_data_home, ib_table->name, reg_ext);
 
 		unpack_filename(path,path);
 
 		/* Note that we do not know the access time of the table,
 		nor the CHECK TABLE time, nor the UPDATE or INSERT time. */
 
-		if (os_file_get_status(path,&stat_info)) {
+		if (os_file_get_status(path, &stat_info, false) == DB_SUCCESS) {
 			stats.create_time = (ulong) stat_info.ctime;
 		}
 	}
@@ -10254,13 +10977,28 @@ ha_innobase::info_low(
 	if (flag & HA_STATUS_VARIABLE) {
 
 		ulint	page_size;
+		ulint	stat_clustered_index_size;
+		ulint	stat_sum_of_other_index_sizes;
+
+		if (!(flag & HA_STATUS_NO_LOCK)) {
+			dict_table_stats_lock(ib_table, RW_S_LATCH);
+		}
+
+		ut_a(ib_table->stat_initialized);
 
 		n_rows = ib_table->stat_n_rows;
 
-		/* Because we do not protect stat_n_rows by any mutex in a
-		delete, it is theoretically possible that the value can be
-		smaller than zero! TODO: fix this race.
+		stat_clustered_index_size
+			= ib_table->stat_clustered_index_size;
+
+		stat_sum_of_other_index_sizes
+			= ib_table->stat_sum_of_other_index_sizes;
+
+		if (!(flag & HA_STATUS_NO_LOCK)) {
+			dict_table_stats_unlock(ib_table, RW_S_LATCH);
+		}
 
+		/*
 		The MySQL optimizer seems to assume in a left join that n_rows
 		is an accurate estimate if it is zero. Of course, it is not,
 		since we do not have any locks on the rows yet at this phase.
@@ -10270,10 +11008,6 @@ ha_innobase::info_low(
 		set. That way SHOW TABLE STATUS will show the best estimate,
 		while the optimizer never sees the table empty. */
 
-		if (n_rows < 0) {
-			n_rows = 0;
-		}
-
 		if (n_rows == 0 && !(flag & HA_STATUS_TIME)) {
 			n_rows++;
 		}
@@ -10303,10 +11037,10 @@ ha_innobase::info_low(
 		stats.records = (ha_rows) n_rows;
 		stats.deleted = 0;
 		stats.data_file_length
-			= ((ulonglong) ib_table->stat_clustered_index_size)
+			= ((ulonglong) stat_clustered_index_size)
 			* page_size;
-		stats.index_file_length =
-			((ulonglong) ib_table->stat_sum_of_other_index_sizes)
+		stats.index_file_length
+			= ((ulonglong) stat_sum_of_other_index_sizes)
 			* page_size;
 
 		/* Since fsp_get_available_space_in_free_extents() is
@@ -10346,8 +11080,8 @@ ha_innobase::info_low(
 					"space for table %s but its "
 					"tablespace has been discarded or "
 					"the .ibd file is missing. Setting "
-                                        "the free space to zero. "
-                                        "(Errcode: %M)",
+					"the free space to zero. "
+					"(errno: %M)",
 					ib_table->name, errno);
 
 				stats.delete_length = 0;
@@ -10357,7 +11091,7 @@ ha_innobase::info_low(
 		}
 
 		stats.check_time = 0;
-		stats.mrr_length_per_rec = ref_length + sizeof(void*);
+                stats.mrr_length_per_rec= ref_length +  8; // 8 = max(sizeof(void *));
 
 		if (stats.records == 0) {
 			stats.mean_rec_length = 0;
@@ -10373,12 +11107,40 @@ ha_innobase::info_low(
 		matches up. If prebuilt->clust_index_was_generated
 		holds, InnoDB defines GEN_CLUST_INDEX internally */
 		ulint	num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
-					- prebuilt->clust_index_was_generated;
+			- prebuilt->clust_index_was_generated;
+		if (table->s->keys < num_innodb_index) {
+			/* If there are too many indexes defined
+			inside InnoDB, ignore those that are being
+			created, because MySQL will only consider
+			the fully built indexes here. */
+
+			for (const dict_index_t* index
+				     = UT_LIST_GET_FIRST(ib_table->indexes);
+			     index != NULL;
+			     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+				/* First, online index creation is
+				completed inside InnoDB, and then
+				MySQL attempts to upgrade the
+				meta-data lock so that it can rebuild
+				the .frm file. If we get here in that
+				time frame, dict_index_is_online_ddl()
+				would not hold and the index would
+				still not be included in TABLE_SHARE. */
+				if (*index->name == TEMP_INDEX_PREFIX) {
+					num_innodb_index--;
+				}
+			}
+
+			if (table->s->keys < num_innodb_index
+			    && innobase_fts_check_doc_id_index(
+				    ib_table, NULL, NULL)
+			    == FTS_EXIST_DOC_ID_INDEX) {
+				num_innodb_index--;
+			}
+		}
 
-		if (table->s->keys != num_innodb_index
-		    && (innobase_fts_check_doc_id_index(ib_table, NULL)
-			== FTS_EXIST_DOC_ID_INDEX
-			&& table->s->keys != (num_innodb_index - 1))) {
+		if (table->s->keys != num_innodb_index) {
 			sql_print_error("InnoDB: Table %s contains %lu "
 					"indexes inside InnoDB, which "
 					"is different from the number of "
@@ -10387,6 +11149,12 @@ ha_innobase::info_low(
 					table->s->keys);
 		}
 
+		if (!(flag & HA_STATUS_NO_LOCK)) {
+			dict_table_stats_lock(ib_table, RW_S_LATCH);
+		}
+
+		ut_a(ib_table->stat_initialized);
+
 		for (i = 0; i < table->s->keys; i++) {
 			ulong	j;
                         rec_per_key = 1;
@@ -10395,7 +11163,7 @@ ha_innobase::info_low(
 			The identity of index (match up index name with
 			that of table->key_info[i]) is already verified in
 			innobase_get_index().  */
-			index = innobase_get_index(i);
+			dict_index_t* index = innobase_get_index(i);
 
 			if (index == NULL) {
 				sql_print_error("Table %s contains fewer "
@@ -10410,7 +11178,7 @@ ha_innobase::info_low(
 				break;
 			}
 
-			for (j = 0; j < table->key_info[i].key_parts; j++) {
+			for (j = 0; j < table->key_info[i].ext_key_parts; j++) {
 
 				if (table->key_info[i].flags & HA_FULLTEXT) {
 					/* The whole concept has no validity
@@ -10459,13 +11227,15 @@ ha_innobase::info_low(
                         key_part_map ext_key_part_map=
                                              key_info->ext_key_part_map;                               
 
-                        if (key_info->key_parts != key_info->ext_key_parts) {
+                        if (key_info->user_defined_key_parts !=
+                            key_info->ext_key_parts)
+                        {
 
                                 KEY *pk_key_info= key_info+
                                                   table->s->primary_key;
-                                uint k = key_info->key_parts;
+                                uint k = key_info->user_defined_key_parts;
                                 ha_rows k_rec_per_key = rec_per_key;
-                                uint pk_parts = pk_key_info->key_parts;
+                                uint pk_parts = pk_key_info->user_defined_key_parts;
                           
 		                index= innobase_get_index(
                                         table->s->primary_key);
@@ -10500,6 +11270,10 @@ ha_innobase::info_low(
 				}
 			}                                         
 		}
+
+		if (!(flag & HA_STATUS_NO_LOCK)) {
+			dict_table_stats_unlock(ib_table, RW_S_LATCH);
+		}
 	}
 
 	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
@@ -10522,7 +11296,7 @@ ha_innobase::info_low(
 			errkey = (unsigned int) (
 				(prebuilt->trx->error_key_num
 				 == ULINT_UNDEFINED)
-					? -1
+					? ~0
 					: prebuilt->trx->error_key_num);
 		}
 	}
@@ -10545,9 +11319,9 @@ UNIV_INTERN
 int
 ha_innobase::info(
 /*==============*/
-	uint	flag)	/*!< in: what information MySQL requests */
+	uint	flag)	/*!< in: what information is requested */
 {
-	return(info_low(flag, DICT_STATS_FETCH));
+	return(this->info_low(flag, false /* not ANALYZE */));
 }
 
 /**********************************************************************//**
@@ -10561,19 +11335,13 @@ ha_innobase::analyze(
 	THD*		thd,		/*!< in: connection thread handle */
 	HA_CHECK_OPT*	check_opt)	/*!< in: currently ignored */
 {
-	dict_stats_upd_option_t	upd_option;
-	int			ret;
+	int	ret;
 
-	if (THDVAR(thd, analyze_is_persistent)) {
-		upd_option = DICT_STATS_RECALC_PERSISTENT;
-	} else {
-		upd_option = DICT_STATS_RECALC_TRANSIENT;
-	}
-
-	/* Simply call ::info_low() with all the flags
+	/* Simply call this->info_low() with all the flags
 	and request recalculation of the statistics */
-	ret = info_low(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE,
-		       upd_option);
+	ret = this->info_low(
+		HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE,
+		true /* this is ANALYZE */);
 
 	if (ret != 0) {
 		return(HA_ADMIN_FAILED);
@@ -10646,19 +11414,23 @@ ha_innobase::check(
 		build_template(true);
 	}
 
-	if (prebuilt->table->ibd_file_missing) {
-		sql_print_error("InnoDB: Error:\n"
-			"InnoDB: MySQL is trying to use a table handle"
-			" but the .ibd file for\n"
-			"InnoDB: table %s does not exist.\n"
-			"InnoDB: Have you deleted the .ibd file"
-			" from the database directory under\n"
-			"InnoDB: the MySQL datadir, or have you"
-			" used DISCARD TABLESPACE?\n"
-			"InnoDB: Please refer to\n"
-			"InnoDB: " REFMAN "innodb-troubleshooting.html\n"
-			"InnoDB: how you can resolve the problem.\n",
-			prebuilt->table->name);
+	if (dict_table_is_discarded(prebuilt->table)) {
+
+		ib_senderrf(
+			thd,
+			IB_LOG_LEVEL_ERROR,
+			ER_TABLESPACE_DISCARDED,
+			table->s->table_name.str);
+
+		DBUG_RETURN(HA_ADMIN_CORRUPT);
+
+	} else if (prebuilt->table->ibd_file_missing) {
+
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_ERROR,
+			ER_TABLESPACE_MISSING,
+			table->s->table_name.str);
+
 		DBUG_RETURN(HA_ADMIN_CORRUPT);
 	}
 
@@ -10684,27 +11456,23 @@ ha_innobase::check(
 	/* Enlarge the fatal lock wait timeout during CHECK TABLE. */
 	os_increment_counter_by_amount(
 		server_mutex,
-		srv_fatal_semaphore_wait_threshold, 7200/*2 hours*/);
+		srv_fatal_semaphore_wait_threshold,
+		SRV_SEMAPHORE_WAIT_EXTENSION);
 
 	for (index = dict_table_get_first_index(prebuilt->table);
 	     index != NULL;
 	     index = dict_table_get_next_index(index)) {
 		char	index_name[MAX_FULL_NAME_LEN + 1];
-#if 0
-		fputs("Validating index ", stderr);
-		ut_print_name(stderr, trx, FALSE, index->name);
-		putc('\n', stderr);
-#endif
 
-		/* If this is an index being created, break */
+		/* If this is an index being created or dropped, break */
 		if (*index->name == TEMP_INDEX_PREFIX) {
 			break;
-		}  else if (!btr_validate_index(index, prebuilt->trx)) {
+		} else if (!btr_validate_index(index, prebuilt->trx)) {
 			is_ok = FALSE;
 
 			innobase_format_name(
 				index_name, sizeof index_name,
-				prebuilt->index->name, TRUE);
+				index->name, TRUE);
 
 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
 					    ER_NOT_KEYFILE,
@@ -10768,9 +11536,8 @@ ha_innobase::check(
 					    " index %s is corrupted.",
 					    index_name);
 			is_ok = FALSE;
-			row_mysql_lock_data_dictionary(prebuilt->trx);
-			dict_set_corrupted(index);
-			row_mysql_unlock_data_dictionary(prebuilt->trx);
+			dict_set_corrupted(
+				index, prebuilt->trx, "CHECK TABLE");
 		}
 
 		if (thd_kill_level(user_thd)) {
@@ -10805,9 +11572,8 @@ ha_innobase::check(
 		index = dict_table_get_first_index(prebuilt->table);
 
 		if (!dict_index_is_corrupted(index)) {
-			mutex_enter(&dict_sys->mutex);
-			dict_set_corrupted(index);
-			mutex_exit(&dict_sys->mutex);
+			dict_set_corrupted(
+				index, prebuilt->trx, "CHECK TABLE");
 		}
 		prebuilt->table->corrupted = TRUE;
 	}
@@ -10828,7 +11594,8 @@ ha_innobase::check(
 	/* Restore the fatal lock wait timeout after CHECK TABLE. */
 	os_decrement_counter_by_amount(
 		server_mutex,
-		srv_fatal_semaphore_wait_threshold, 7200/*2 hours*/);
+		srv_fatal_semaphore_wait_threshold,
+		SRV_SEMAPHORE_WAIT_EXTENSION);
 
 	prebuilt->trx->op_info = "";
 	if (thd_kill_level(user_thd)) {
@@ -10873,40 +11640,47 @@ ha_innobase::update_table_comment(
 
 	/* output the data to a temporary file */
 
-	mutex_enter(&srv_dict_tmpfile_mutex);
-	rewind(srv_dict_tmpfile);
+	if (!srv_read_only_mode) {
 
-	fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB",
-		fsp_get_available_space_in_free_extents(
-			prebuilt->table->space));
+		mutex_enter(&srv_dict_tmpfile_mutex);
 
-	dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile,
-				prebuilt->trx, prebuilt->table);
-	flen = ftell(srv_dict_tmpfile);
-	if (flen < 0) {
-		flen = 0;
-	} else if (length + flen + 3 > 64000) {
-		flen = 64000 - 3 - length;
-	}
+		rewind(srv_dict_tmpfile);
 
-	/* allocate buffer for the full string, and
-	read the contents of the temporary file */
+		fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB",
+			fsp_get_available_space_in_free_extents(
+				prebuilt->table->space));
 
-	str = (char*) my_malloc(length + flen + 3, MYF(0));
+		dict_print_info_on_foreign_keys(
+			FALSE, srv_dict_tmpfile, prebuilt->trx,
+			prebuilt->table);
 
-	if (str) {
-		char* pos	= str + length;
-		if (length) {
-			memcpy(str, comment, length);
-			*pos++ = ';';
-			*pos++ = ' ';
+		flen = ftell(srv_dict_tmpfile);
+
+		if (flen < 0) {
+			flen = 0;
+		} else if (length + flen + 3 > 64000) {
+			flen = 64000 - 3 - length;
 		}
-		rewind(srv_dict_tmpfile);
-		flen = (uint) fread(pos, 1, flen, srv_dict_tmpfile);
-		pos[flen] = 0;
-	}
 
-	mutex_exit(&srv_dict_tmpfile_mutex);
+		/* allocate buffer for the full string, and
+		read the contents of the temporary file */
+
+		str = (char*) my_malloc(length + flen + 3, MYF(0));
+
+		if (str) {
+			char* pos	= str + length;
+			if (length) {
+				memcpy(str, comment, length);
+				*pos++ = ';';
+				*pos++ = ' ';
+			}
+			rewind(srv_dict_tmpfile);
+			flen = (uint) fread(pos, 1, flen, srv_dict_tmpfile);
+			pos[flen] = 0;
+		}
+
+		mutex_exit(&srv_dict_tmpfile_mutex);
+	}
 
 	prebuilt->trx->op_info = (char*)"";
 
@@ -10923,8 +11697,8 @@ char*
 ha_innobase::get_foreign_key_create_info(void)
 /*==========================================*/
 {
-	char*	str	= 0;
 	long	flen;
+	char*	str	= 0;
 
 	ut_a(prebuilt != NULL);
 
@@ -10942,31 +11716,36 @@ ha_innobase::get_foreign_key_create_info(void)
 
 	trx_search_latch_release_if_reserved(prebuilt->trx);
 
-	mutex_enter(&srv_dict_tmpfile_mutex);
-	rewind(srv_dict_tmpfile);
+	if (!srv_read_only_mode) {
+		mutex_enter(&srv_dict_tmpfile_mutex);
+		rewind(srv_dict_tmpfile);
 
-	/* output the data to a temporary file */
-	dict_print_info_on_foreign_keys(TRUE, srv_dict_tmpfile,
-				prebuilt->trx, prebuilt->table);
-	prebuilt->trx->op_info = (char*)"";
+		/* Output the data to a temporary file */
+		dict_print_info_on_foreign_keys(
+			TRUE, srv_dict_tmpfile, prebuilt->trx,
+			prebuilt->table);
 
-	flen = ftell(srv_dict_tmpfile);
-	if (flen < 0) {
-		flen = 0;
-	}
+		prebuilt->trx->op_info = (char*)"";
 
-	/* allocate buffer for the string, and
-	read the contents of the temporary file */
+		flen = ftell(srv_dict_tmpfile);
 
-	str = (char*) my_malloc(flen + 1, MYF(0));
+		if (flen < 0) {
+			flen = 0;
+		}
 
-	if (str) {
-		rewind(srv_dict_tmpfile);
-		flen = (uint) fread(str, 1, flen, srv_dict_tmpfile);
-		str[flen] = 0;
-	}
+		/* Allocate buffer for the string, and
+		read the contents of the temporary file */
+
+		str = (char*) my_malloc(flen + 1, MYF(0));
+
+		if (str) {
+			rewind(srv_dict_tmpfile);
+			flen = (uint) fread(str, 1, flen, srv_dict_tmpfile);
+			str[flen] = 0;
+		}
 
-	mutex_exit(&srv_dict_tmpfile_mutex);
+		mutex_exit(&srv_dict_tmpfile_mutex);
+	}
 
 	return(str);
 }
@@ -11180,17 +11959,16 @@ ha_innobase::can_switch_engines(void)
 	bool	can_switch;
 
 	DBUG_ENTER("ha_innobase::can_switch_engines");
-
-	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+	update_thd();
 
 	prebuilt->trx->op_info =
 			"determining if there are foreign key constraints";
-	row_mysql_lock_data_dictionary(prebuilt->trx);
+	row_mysql_freeze_data_dictionary(prebuilt->trx);
 
 	can_switch = !UT_LIST_GET_FIRST(prebuilt->table->referenced_list)
 			&& !UT_LIST_GET_FIRST(prebuilt->table->foreign_list);
 
-	row_mysql_unlock_data_dictionary(prebuilt->trx);
+	row_mysql_unfreeze_data_dictionary(prebuilt->trx);
 	prebuilt->trx->op_info = "";
 
 	DBUG_RETURN(can_switch);
@@ -11239,50 +12017,52 @@ ha_innobase::extra(
 	enum ha_extra_function operation)
 			   /*!< in: HA_EXTRA_FLUSH or some other flag */
 {
+	check_trx_exists(ha_thd());
+
 	/* Warning: since it is not sure that MySQL calls external_lock
 	before calling this function, the trx field in prebuilt can be
 	obsolete! */
 
 	switch (operation) {
-		case HA_EXTRA_FLUSH:
-			if (prebuilt->blob_heap) {
-				row_mysql_prebuilt_free_blob_heap(prebuilt);
-			}
-			break;
-		case HA_EXTRA_RESET_STATE:
-			reset_template();
-			thd_to_trx(ha_thd())->duplicates = 0;
-			break;
-		case HA_EXTRA_NO_KEYREAD:
-			prebuilt->read_just_key = 0;
-			break;
-		case HA_EXTRA_KEYREAD:
-			prebuilt->read_just_key = 1;
-			break;
-		case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
-			prebuilt->keep_other_fields_on_keyread = 1;
-			break;
+	case HA_EXTRA_FLUSH:
+		if (prebuilt->blob_heap) {
+			row_mysql_prebuilt_free_blob_heap(prebuilt);
+		}
+		break;
+	case HA_EXTRA_RESET_STATE:
+		reset_template();
+		thd_to_trx(ha_thd())->duplicates = 0;
+		break;
+	case HA_EXTRA_NO_KEYREAD:
+		prebuilt->read_just_key = 0;
+		break;
+	case HA_EXTRA_KEYREAD:
+		prebuilt->read_just_key = 1;
+		break;
+	case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
+		prebuilt->keep_other_fields_on_keyread = 1;
+		break;
 
-			/* IMPORTANT: prebuilt->trx can be obsolete in
-			this method, because it is not sure that MySQL
-			calls external_lock before this method with the
-			parameters below.  We must not invoke update_thd()
-			either, because the calling threads may change.
-			CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */
-		case HA_EXTRA_INSERT_WITH_UPDATE:
-			thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE;
-			break;
-		case HA_EXTRA_NO_IGNORE_DUP_KEY:
-			thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_IGNORE;
-			break;
-		case HA_EXTRA_WRITE_CAN_REPLACE:
-			thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE;
-			break;
-		case HA_EXTRA_WRITE_CANNOT_REPLACE:
-			thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE;
-			break;
-		default:/* Do nothing */
-			;
+		/* IMPORTANT: prebuilt->trx can be obsolete in
+		this method, because it is not sure that MySQL
+		calls external_lock before this method with the
+		parameters below.  We must not invoke update_thd()
+		either, because the calling threads may change.
+		CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */
+	case HA_EXTRA_INSERT_WITH_UPDATE:
+		thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE;
+		break;
+	case HA_EXTRA_NO_IGNORE_DUP_KEY:
+		thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_IGNORE;
+		break;
+	case HA_EXTRA_WRITE_CAN_REPLACE:
+		thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE;
+		break;
+	case HA_EXTRA_WRITE_CANNOT_REPLACE:
+		thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE;
+		break;
+	default:/* Do nothing */
+		;
 	}
 
 	return(0);
@@ -11391,14 +12171,6 @@ ha_innobase::start_stmt(
 		++trx->will_lock;
 	}
 
-	if (prebuilt->result) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Warning: FTS result set not NULL\n");
-
-		fts_query_free_result(prebuilt->result);
-		prebuilt->result = NULL;
-	}
-
 	return(0);
 }
 
@@ -11471,6 +12243,24 @@ ha_innobase::external_lock(
 		}
 	}
 
+	/* Check for UPDATEs in read-only mode. */
+	if (srv_read_only_mode
+	    && (thd_sql_command(thd) == SQLCOM_UPDATE
+		|| thd_sql_command(thd) == SQLCOM_INSERT
+		|| thd_sql_command(thd) == SQLCOM_REPLACE
+		|| thd_sql_command(thd) == SQLCOM_DROP_TABLE
+		|| thd_sql_command(thd) == SQLCOM_ALTER_TABLE
+		|| thd_sql_command(thd) == SQLCOM_OPTIMIZE
+		|| thd_sql_command(thd) == SQLCOM_CREATE_TABLE
+		|| thd_sql_command(thd) == SQLCOM_CREATE_INDEX
+		|| thd_sql_command(thd) == SQLCOM_DROP_INDEX
+		|| thd_sql_command(thd) == SQLCOM_DELETE)) {
+
+		ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+
+		DBUG_RETURN(HA_ERR_TABLE_READONLY);
+	}
+
 	trx = prebuilt->trx;
 
 	prebuilt->sql_stat_start = TRUE;
@@ -11478,6 +12268,41 @@ ha_innobase::external_lock(
 
 	reset_template();
 
+	switch (prebuilt->table->quiesce) {
+	case QUIESCE_START:
+		/* Check for FLUSH TABLE t WITH READ LOCK; */
+		if (!srv_read_only_mode
+		    && thd_sql_command(thd) == SQLCOM_FLUSH
+		    && lock_type == F_RDLCK) {
+
+			row_quiesce_table_start(prebuilt->table, trx);
+
+			/* Use the transaction instance to track UNLOCK
+			TABLES. It can be done via START TRANSACTION; too
+			implicitly. */
+
+			++trx->flush_tables;
+		}
+		break;
+
+	case QUIESCE_COMPLETE:
+		/* Check for UNLOCK TABLES; implicit or explicit
+		or trx interruption. */
+		if (trx->flush_tables > 0
+		    && (lock_type == F_UNLCK || trx_is_interrupted(trx))) {
+
+			row_quiesce_table_complete(prebuilt->table, trx);
+
+			ut_a(trx->flush_tables > 0);
+			--trx->flush_tables;
+		}
+
+		break;
+
+	case QUIESCE_NONE:
+		break;
+	}
+
 	if (lock_type == F_WRLCK) {
 
 		/* If this is a SELECT, then it is in UPDATE TABLE ...
@@ -11528,13 +12353,13 @@ ha_innobase::external_lock(
 			    && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)
 			    && thd_in_lock_tables(thd)) {
 
-				ulint	error = row_lock_table_for_mysql(
+				dberr_t	error = row_lock_table_for_mysql(
 					prebuilt, NULL, 0);
 
 				if (error != DB_SUCCESS) {
-					error = convert_error_code_to_mysql(
-						(int) error, 0, thd);
-					DBUG_RETURN((int) error);
+					DBUG_RETURN(
+						convert_error_code_to_mysql(
+							error, 0, thd));
 				}
 			}
 
@@ -11624,19 +12449,23 @@ ha_innobase::transactional_table_lock(
 
 	update_thd(thd);
 
-	if (prebuilt->table->ibd_file_missing && !thd_tablespace_op(thd)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: MySQL is trying to use a table handle"
-			" but the .ibd file for\n"
-			"InnoDB: table %s does not exist.\n"
-			"InnoDB: Have you deleted the .ibd file"
-			" from the database directory under\n"
-			"InnoDB: the MySQL datadir?"
-			"InnoDB: See " REFMAN
-			"innodb-troubleshooting.html\n"
-			"InnoDB: how you can resolve the problem.\n",
-			prebuilt->table->name);
+	if (!thd_tablespace_op(thd)) {
+
+		if (dict_table_is_discarded(prebuilt->table)) {
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_ERROR,
+				ER_TABLESPACE_DISCARDED,
+				table->s->table_name.str);
+
+		} else if (prebuilt->table->ibd_file_missing) {
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_ERROR,
+				ER_TABLESPACE_MISSING,
+				table->s->table_name.str);
+		}
+
 		DBUG_RETURN(HA_ERR_CRASHED);
 	}
 
@@ -11654,11 +12483,12 @@ ha_innobase::transactional_table_lock(
 		prebuilt->select_lock_type = LOCK_S;
 		prebuilt->stored_select_lock_type = LOCK_S;
 	} else {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB error:\n"
-"MySQL is trying to set transactional table lock with corrupted lock type\n"
-"to table %s, lock type %d does not exist.\n",
-				prebuilt->table->name, lock_type);
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"MySQL is trying to set transactional table lock "
+			"with corrupted lock type to table %s, lock type "
+			"%d does not exist.",
+			table->s->table_name.str, lock_type);
+
 		DBUG_RETURN(HA_ERR_CRASHED);
 	}
 
@@ -11667,14 +12497,14 @@ ha_innobase::transactional_table_lock(
 	innobase_register_trx(ht, thd, trx);
 
 	if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
-		ulint	error = DB_SUCCESS;
+		dberr_t	error;
 
 		error = row_lock_table_for_mysql(prebuilt, NULL, 0);
 
 		if (error != DB_SUCCESS) {
-			error = convert_error_code_to_mysql(
-				(int) error, prebuilt->table->flags, thd);
-			DBUG_RETURN((int) error);
+			DBUG_RETURN(
+				convert_error_code_to_mysql(
+					error, prebuilt->table->flags, thd));
 		}
 
 		if (thd_test_options(
@@ -11725,6 +12555,13 @@ innodb_show_status(
 	DBUG_ENTER("innodb_show_status");
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
+	/* We don't create the temp files or associated
+	mutexes in read-only-mode */
+
+	if (srv_read_only_mode) {
+		DBUG_RETURN(0);
+	}
+
 	trx = check_trx_exists(thd);
 
 	trx_search_latch_release_if_reserved(trx);
@@ -11814,11 +12651,11 @@ innodb_mutex_show_status(
 {
 	char		buf1[IO_SIZE];
 	char		buf2[IO_SIZE];
-	mutex_t*	mutex;
+	ib_mutex_t*	mutex;
 	rw_lock_t*	lock;
 	ulint		block_mutex_oswait_count = 0;
 	ulint		block_lock_oswait_count = 0;
-	mutex_t*	block_mutex = NULL;
+	ib_mutex_t*	block_mutex = NULL;
 	rw_lock_t*	block_lock = NULL;
 #ifdef UNIV_DEBUG
 	ulint		rw_lock_count= 0;
@@ -11850,41 +12687,7 @@ innodb_mutex_show_status(
 			block_mutex_oswait_count += mutex->count_os_wait;
 			continue;
 		}
-#ifdef UNIV_DEBUG
-		if (mutex->mutex_type != 1) {
-			if (mutex->count_using > 0) {
-				buf1len= my_snprintf(buf1, sizeof(buf1),
-					"%s:%s",
-					mutex->cmutex_name,
-					innobase_basename(mutex->cfile_name));
-				buf2len= my_snprintf(buf2, sizeof(buf2),
-					"count=%lu, spin_waits=%lu,"
-					" spin_rounds=%lu, "
-					"os_waits=%lu, os_yields=%lu,"
-					" os_wait_times=%lu",
-					mutex->count_using,
-					mutex->count_spin_loop,
-					mutex->count_spin_rounds,
-					mutex->count_os_wait,
-					mutex->count_os_yield,
-					(ulong) (mutex->lspent_time/1000));
-
-				if (stat_print(thd, innobase_hton_name,
-						hton_name_len, buf1, buf1len,
-						buf2, buf2len)) {
-					mutex_exit(&mutex_list_mutex);
-					DBUG_RETURN(1);
-				}
-			}
-		} else {
-			rw_lock_count += mutex->count_using;
-			rw_lock_count_spin_loop += mutex->count_spin_loop;
-			rw_lock_count_spin_rounds += mutex->count_spin_rounds;
-			rw_lock_count_os_wait += mutex->count_os_wait;
-			rw_lock_count_os_yield += mutex->count_os_yield;
-			rw_lock_wait_time += mutex->lspent_time;
-		}
-#else /* UNIV_DEBUG */
+
 		buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s:%lu",
 				     innobase_basename(mutex->cfile_name),
 				     (ulong) mutex->cline);
@@ -11897,7 +12700,6 @@ innodb_mutex_show_status(
 			mutex_exit(&mutex_list_mutex);
 			DBUG_RETURN(1);
 		}
-#endif /* UNIV_DEBUG */
 	}
 
 	if (block_mutex) {
@@ -12170,12 +12972,52 @@ ha_innobase::store_lock(
 	const bool in_lock_tables = thd_in_lock_tables(thd);
 	const uint sql_command = thd_sql_command(thd);
 
-	if (sql_command == SQLCOM_DROP_TABLE) {
+	if (srv_read_only_mode
+	    && (sql_command == SQLCOM_UPDATE
+		|| sql_command == SQLCOM_INSERT
+		|| sql_command == SQLCOM_REPLACE
+		|| sql_command == SQLCOM_DROP_TABLE
+		|| sql_command == SQLCOM_ALTER_TABLE
+		|| sql_command == SQLCOM_OPTIMIZE
+		|| sql_command == SQLCOM_CREATE_TABLE
+		|| sql_command == SQLCOM_CREATE_INDEX
+		|| sql_command == SQLCOM_DROP_INDEX
+		|| sql_command == SQLCOM_DELETE)) {
+
+		ib_senderrf(trx->mysql_thd,
+			    IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+
+	} else if (sql_command == SQLCOM_FLUSH
+		   && lock_type == TL_READ_NO_INSERT) {
+
+		/* Check for FLUSH TABLES ... WITH READ LOCK */
+
+		/* Note: This call can fail, but there is no way to return
+		the error to the caller. We simply ignore it for now here
+		and push the error code to the caller where the error is
+		detected in the function. */
+
+		dberr_t	err = row_quiesce_set_state(
+			prebuilt->table, QUIESCE_START, trx);
+
+		ut_a(err == DB_SUCCESS || err == DB_UNSUPPORTED);
+
+		if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
+			prebuilt->select_lock_type = LOCK_S;
+			prebuilt->stored_select_lock_type = LOCK_S;
+		} else {
+			prebuilt->select_lock_type = LOCK_NONE;
+			prebuilt->stored_select_lock_type = LOCK_NONE;
+		}
+
+	/* Check for DROP TABLE */
+	} else if (sql_command == SQLCOM_DROP_TABLE) {
 
 		/* MySQL calls this function in DROP TABLE though this table
 		handle may belong to another thd that is running a query. Let
 		us in that case skip any changes to the prebuilt struct. */
 
+	/* Check for LOCK TABLE t1,...,tn WITH SHARED LOCKS */
 	} else if ((lock_type == TL_READ && in_lock_tables)
 		   || (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables)
 		   || lock_type == TL_READ_WITH_SHARED_LOCKS
@@ -12201,18 +13043,18 @@ ha_innobase::store_lock(
 		unexpected if an obsolete consistent read view would be
 		used. */
 
-		ulint	isolation_level;
-
-		isolation_level = trx->isolation_level;
+		/* Use consistent read for checksum table */
 
-		if ((srv_locks_unsafe_for_binlog
-		     || isolation_level <= TRX_ISO_READ_COMMITTED)
-		    && isolation_level != TRX_ISO_SERIALIZABLE
-		    && (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT)
-		    && (sql_command == SQLCOM_INSERT_SELECT
-			|| sql_command == SQLCOM_REPLACE_SELECT
-			|| sql_command == SQLCOM_UPDATE
-			|| sql_command == SQLCOM_CREATE_TABLE)) {
+		if (sql_command == SQLCOM_CHECKSUM
+		    || ((srv_locks_unsafe_for_binlog
+			|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
+			&& trx->isolation_level != TRX_ISO_SERIALIZABLE
+			&& (lock_type == TL_READ
+			    || lock_type == TL_READ_NO_INSERT)
+			&& (sql_command == SQLCOM_INSERT_SELECT
+			    || sql_command == SQLCOM_REPLACE_SELECT
+			    || sql_command == SQLCOM_UPDATE
+			    || sql_command == SQLCOM_CREATE_TABLE))) {
 
 			/* If we either have innobase_locks_unsafe_for_binlog
 			option set or this session is using READ COMMITTED
@@ -12226,11 +13068,6 @@ ha_innobase::store_lock(
 
 			prebuilt->select_lock_type = LOCK_NONE;
 			prebuilt->stored_select_lock_type = LOCK_NONE;
-		} else if (sql_command == SQLCOM_CHECKSUM) {
-			/* Use consistent read for checksum table */
-
-			prebuilt->select_lock_type = LOCK_NONE;
-			prebuilt->stored_select_lock_type = LOCK_NONE;
 		} else {
 			prebuilt->select_lock_type = LOCK_S;
 			prebuilt->stored_select_lock_type = LOCK_S;
@@ -12330,7 +13167,7 @@ the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked
 on return and all relevant locks acquired.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 ha_innobase::innobase_get_autoinc(
 /*==============================*/
 	ulonglong*	value)		/*!< out: autoinc value */
@@ -12387,12 +13224,7 @@ ha_innobase::innobase_peek_autoinc(void)
 }
 
 /*********************************************************************//**
-This function initializes the auto-inc counter if it has not been
-initialized yet. This function does not change the value of the auto-inc
-counter if it already has been initialized. Returns the value of the
-auto-inc counter in *first_value, and ULONGLONG_MAX in *nb_reserved_values (as
-we have a table-level lock). offset, increment, nb_desired_values are ignored.
-*first_value is set to -1 if error (deadlock or lock wait timeout) */
+Returns the value of the auto-inc counter in *first_value and ~0 on failure. */
 UNIV_INTERN
 void
 ha_innobase::get_auto_increment(
@@ -12407,7 +13239,7 @@ ha_innobase::get_auto_increment(
 						values */
 {
 	trx_t*		trx;
-	ulint		error;
+	dberr_t		error;
 	ulonglong	autoinc = 0;
 
 	/* Prepare prebuilt->trx in the table handle */
@@ -12521,18 +13353,15 @@ ha_innobase::reset_auto_increment(
 {
 	DBUG_ENTER("ha_innobase::reset_auto_increment");
 
-	int	error;
+	dberr_t	error;
 
 	update_thd(ha_thd());
 
 	error = row_lock_table_autoinc_for_mysql(prebuilt);
 
 	if (error != DB_SUCCESS) {
-		error = convert_error_code_to_mysql(error,
-						    prebuilt->table->flags,
-						    user_thd);
-
-		DBUG_RETURN(error);
+		DBUG_RETURN(convert_error_code_to_mysql(
+				    error, prebuilt->table->flags, user_thd));
 	}
 
 	/* The next value can never be 0. */
@@ -12601,7 +13430,7 @@ ha_innobase::get_foreign_dup_key(
 	/* else */
 
 	/* copy table name (and convert from filename-safe encoding to
-	system_charset_info, e.g. "foo_@0J@00b6" -> "foo_ö") */
+	system_charset_info) */
 	char*	p;
 	p = strchr(err_index->table->name, '/');
 	/* strip ".../" prefix if any */
@@ -12654,7 +13483,7 @@ ha_innobase::cmp_ref(
 	key_part = table->key_info[table->s->primary_key].key_part;
 
 	key_part_end = key_part
-			+ table->key_info[table->s->primary_key].key_parts;
+			+ table->key_info[table->s->primary_key].user_defined_key_parts;
 
 	for (; key_part != key_part_end; ++key_part) {
 		field = key_part->field;
@@ -12699,11 +13528,10 @@ my_bool
 ha_innobase::register_query_cache_table(
 /*====================================*/
 	THD*		thd,		/*!< in: user thread handle */
-	char*		table_key,	/*!< in: concatenation of database name,
-					the null character NUL,
-					and the table name */
-	uint		key_length,	/*!< in: length of the full name, i.e.
-					len(dbname) + len(tablename) + 1 */
+	char*		table_key,	/*!< in: normalized path to the  
+					table */
+	uint		key_length,	/*!< in: length of the normalized
+					path to the table */
 	qc_engine_callback*
 			call_back,	/*!< out: pointer to function for
 					checking if query caching
@@ -12825,8 +13653,8 @@ innobase_xa_prepare(
 					false - the current SQL statement
 					ended */
 {
-	int error = 0;
-	trx_t* trx = check_trx_exists(thd);
+	int		error = 0;
+	trx_t*		trx = check_trx_exists(thd);
 
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
@@ -13019,124 +13847,6 @@ innobase_set_cursor_view(
 }
 
 /*******************************************************************//**
-If col_name is not NULL, check whether the named column is being
-renamed in the table. If col_name is not provided, check
-whether any one of columns in the table is being renamed.
-@return true if the column is being renamed */
-static
-bool
-check_column_being_renamed(
-/*=======================*/
-	const TABLE*	table,		/*!< in: MySQL table */
-	const char*	col_name)	/*!< in: name of the column */
-{
-	uint		k;
-	Field*		field;
-
-	for (k = 0; k < table->s->fields; k++) {
-		field = table->field[k];
-
-		if (field->flags & FIELD_IS_RENAMED) {
-
-			/* If col_name is not provided, return
-			if the field is marked as being renamed. */
-			if (!col_name) {
-				return(true);
-			}
-
-			/* If col_name is provided, return only
-			if names match */
-			if (innobase_strcasecmp(field->field_name,
-						col_name) == 0) {
-				return(true);
-			}
-		}
-	}
-
-	return(false);
-}
-
-/*******************************************************************//**
-Check whether any of the given columns is being renamed in the table.
-@return true if any of col_names is being renamed in table */
-static
-bool
-column_is_being_renamed(
-/*====================*/
-	TABLE*		table,		/*!< in: MySQL table */
-	uint		n_cols,		/*!< in: number of columns */
-	const char**	col_names)	/*!< in: names of the columns */
-{
-	uint		j;
-
-	for (j = 0; j < n_cols; j++) {
-		if (check_column_being_renamed(table, col_names[j])) {
-			return(true);
-		}
-	}
-
-	return(false);
-}
-
-/*******************************************************************//**
-Check whether a column in table "table" is being renamed and if this column
-is part of a foreign key, either part of another table, referencing this
-table or part of this table, referencing another table.
-@return true if a column that participates in a foreign key definition
-is being renamed */
-static
-bool
-foreign_key_column_is_being_renamed(
-/*================================*/
-	row_prebuilt_t*	prebuilt,	/* in: InnoDB prebuilt struct */
-	TABLE*		table)		/* in: MySQL table */
-{
-	dict_foreign_t*	foreign;
-
-	/* check whether there are foreign keys at all */
-	if (UT_LIST_GET_LEN(prebuilt->table->foreign_list) == 0
-	    && UT_LIST_GET_LEN(prebuilt->table->referenced_list) == 0) {
-		/* no foreign keys involved with prebuilt->table */
-
-		return(false);
-	}
-
-	row_mysql_lock_data_dictionary(prebuilt->trx);
-
-	/* Check whether any column in the foreign key constraints which refer
-	to this table is being renamed. */
-	for (foreign = UT_LIST_GET_FIRST(prebuilt->table->referenced_list);
-	     foreign != NULL;
-	     foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
-
-		if (column_is_being_renamed(table, foreign->n_fields,
-					    foreign->referenced_col_names)) {
-
-			row_mysql_unlock_data_dictionary(prebuilt->trx);
-			return(true);
-		}
-	}
-
-	/* Check whether any column in the foreign key constraints in the
-	table is being renamed. */
-	for (foreign = UT_LIST_GET_FIRST(prebuilt->table->foreign_list);
-	     foreign != NULL;
-	     foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
-
-		if (column_is_being_renamed(table, foreign->n_fields,
-					    foreign->foreign_col_names)) {
-
-			row_mysql_unlock_data_dictionary(prebuilt->trx);
-			return(true);
-		}
-	}
-
-	row_mysql_unlock_data_dictionary(prebuilt->trx);
-
-	return(false);
-}
-
-/*******************************************************************//**
 */
 UNIV_INTERN
 bool
@@ -13145,6 +13855,8 @@ ha_innobase::check_if_incompatible_data(
 	HA_CREATE_INFO*	info,
 	uint		table_changes)
 {
+	innobase_copy_frm_flags_from_create_info(prebuilt->table, info);
+
 	if (table_changes != IS_EQUAL_YES) {
 
 		return(COMPATIBLE_DATA_NO);
@@ -13157,25 +13869,8 @@ ha_innobase::check_if_incompatible_data(
 		return(COMPATIBLE_DATA_NO);
 	}
 
-	/* For column rename operation, MySQL does not supply enough
-	information (new column name etc.) for InnoDB to make appropriate
-	system metadata change. To avoid system metadata inconsistency,
-	currently we can just request a table rebuild/copy by returning
-	COMPATIBLE_DATA_NO */
-	if (check_column_being_renamed(table, NULL)) {
-		return(COMPATIBLE_DATA_NO);
-	}
-
-	/* Check if a column participating in a foreign key is being renamed.
-	There is no mechanism for updating InnoDB foreign key definitions. */
-	if (foreign_key_column_is_being_renamed(prebuilt, table)) {
-
-		return(COMPATIBLE_DATA_NO);
-	}
-
 	/* Check that row format didn't change */
 	if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT)
-	    && info->row_type != ROW_TYPE_DEFAULT
 	    && info->row_type != get_row_type()) {
 
 		return(COMPATIBLE_DATA_NO);
@@ -13189,6 +13884,135 @@ ha_innobase::check_if_incompatible_data(
 	return(COMPATIBLE_DATA_YES);
 }
 
+/****************************************************************//**
+Update the system variable innodb_io_capacity_max using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_io_capacity_max_update(
+/*===========================*/
+	THD*				thd,	/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,	/*!< in: pointer to
+						system variable */
+	void*				var_ptr,/*!< out: where the
+						formal string goes */
+	const void*			save)	/*!< in: immediate result
+						from check function */
+{
+	ulong	in_val = *static_cast<const ulong*>(save);
+	if (in_val < srv_io_capacity) {
+		in_val = srv_io_capacity;
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "innodb_io_capacity_max cannot be"
+				    " set lower than innodb_io_capacity.");
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "Setting innodb_io_capacity_max to %lu",
+				    srv_io_capacity);
+	}
+
+	srv_max_io_capacity = in_val;
+}
+
+/****************************************************************//**
+Update the system variable innodb_io_capacity using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_io_capacity_update(
+/*======================*/
+	THD*				thd,	/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,	/*!< in: pointer to
+						system variable */
+	void*				var_ptr,/*!< out: where the
+						formal string goes */
+	const void*			save)	/*!< in: immediate result
+						from check function */
+{
+	ulong	in_val = *static_cast<const ulong*>(save);
+	if (in_val > srv_max_io_capacity) {
+		in_val = srv_max_io_capacity;
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "innodb_io_capacity cannot be set"
+				    " higher than innodb_io_capacity_max.");
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "Setting innodb_io_capacity to %lu",
+				    srv_max_io_capacity);
+	}
+
+	srv_io_capacity = in_val;
+}
+
+/****************************************************************//**
+Update the system variable innodb_max_dirty_pages_pct using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_max_dirty_pages_pct_update(
+/*==============================*/
+	THD*				thd,	/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,	/*!< in: pointer to
+						system variable */
+	void*				var_ptr,/*!< out: where the
+						formal string goes */
+	const void*			save)	/*!< in: immediate result
+						from check function */
+{
+	ulong	in_val = *static_cast<const ulong*>(save);
+	if (in_val < srv_max_dirty_pages_pct_lwm) {
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "innodb_max_dirty_pages_pct cannot be"
+				    " set lower than"
+				    " innodb_max_dirty_pages_pct_lwm.");
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "Lowering"
+				    " innodb_max_dirty_page_pct_lwm to %lu",
+				    in_val);
+
+		srv_max_dirty_pages_pct_lwm = in_val;
+	}
+
+	srv_max_buf_pool_modified_pct = in_val;
+}
+
+/****************************************************************//**
+Update the system variable innodb_max_dirty_pages_pct_lwm using the
+"saved" value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_max_dirty_pages_pct_lwm_update(
+/*==================================*/
+	THD*				thd,	/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,	/*!< in: pointer to
+						system variable */
+	void*				var_ptr,/*!< out: where the
+						formal string goes */
+	const void*			save)	/*!< in: immediate result
+						from check function */
+{
+	ulong	in_val = *static_cast<const ulong*>(save);
+	if (in_val > srv_max_buf_pool_modified_pct) {
+		in_val = srv_max_buf_pool_modified_pct;
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "innodb_max_dirty_pages_pct_lwm"
+				    " cannot be set higher than"
+				    " innodb_max_dirty_pages_pct.");
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "Setting innodb_max_dirty_page_pct_lwm"
+				    " to %lu",
+				    in_val);
+	}
+
+	srv_max_dirty_pages_pct_lwm = in_val;
+}
+
 /************************************************************//**
 Validate the file format name and return its corresponding id.
 @return	valid file format id */
@@ -13554,8 +14378,8 @@ innodb_internal_table_validate(
 		return(0);
 	}
 
-	user_table = dict_table_open_on_name_no_stats(
-			table_name, FALSE, DICT_ERR_IGNORE_NONE);
+	user_table = dict_table_open_on_name(
+		table_name, FALSE, TRUE, DICT_ERR_IGNORE_NONE);
 
 	if (user_table) {
 		if (dict_table_has_fts_index(user_table)) {
@@ -13563,7 +14387,7 @@ innodb_internal_table_validate(
 			ret = 0;
 		}
 
-		dict_table_close(user_table, FALSE);
+		dict_table_close(user_table, FALSE, TRUE);
 	}
 
 	return(ret);
@@ -13608,13 +14432,12 @@ innodb_internal_table_update(
 }
 
 /****************************************************************//**
-Update the session variable innodb_session_stopword_table
-with the "saved" stopword table name value. This function
-is registered as a callback with MySQL. */
+Update the system variable innodb_adaptive_hash_index using the "saved"
+value. This function is registered as a callback with MySQL. */
 static
 void
-innodb_session_stopword_update(
-/*===========================*/
+innodb_adaptive_hash_index_update(
+/*==============================*/
 	THD*				thd,	/*!< in: thread handle */
 	struct st_mysql_sys_var*	var,	/*!< in: pointer to
 						system variable */
@@ -13623,32 +14446,20 @@ innodb_session_stopword_update(
 	const void*			save)	/*!< in: immediate result
 						from check function */
 {
-	const char*	stopword_table_name;
-	char*		old;
-
-	ut_a(save != NULL);
-	ut_a(var_ptr != NULL);
-
-	stopword_table_name = *static_cast<const char*const*>(save);
-	old = *(char**) var_ptr;
-
-	if (stopword_table_name) {
-		*(char**) var_ptr =  my_strdup(stopword_table_name,  MYF(0));
+	if (*(my_bool*) save) {
+		btr_search_enable();
 	} else {
-		*(char**) var_ptr = NULL;
-	}
-
-	if (old) {
-		my_free(old);
+		btr_search_disable();
 	}
 }
+
 /****************************************************************//**
-Update the system variable innodb_adaptive_hash_index using the "saved"
+Update the system variable innodb_cmp_per_index using the "saved"
 value. This function is registered as a callback with MySQL. */
 static
 void
-innodb_adaptive_hash_index_update(
-/*==============================*/
+innodb_cmp_per_index_update(
+/*========================*/
 	THD*				thd,	/*!< in: thread handle */
 	struct st_mysql_sys_var*	var,	/*!< in: pointer to
 						system variable */
@@ -13657,11 +14468,13 @@ innodb_adaptive_hash_index_update(
 	const void*			save)	/*!< in: immediate result
 						from check function */
 {
-	if (*(my_bool*) save) {
-		btr_search_enable();
-	} else {
-		btr_search_disable();
+	/* Reset the stats whenever we enable the table
+	INFORMATION_SCHEMA.innodb_cmp_per_index. */
+	if (!srv_cmp_per_index_enabled && *(my_bool*) save) {
+		page_zip_reset_stat_per_index();
 	}
+
+	srv_cmp_per_index_enabled = !!(*(my_bool*) save);
 }
 
 /****************************************************************//**
@@ -14233,6 +15046,53 @@ exit:
 	return;
 }
 
+#ifdef __WIN__
+/*************************************************************//**
+Validate if passed-in "value" is a valid value for
+innodb_buffer_pool_filename. On Windows, file names with colon (:)
+are not allowed.
+
+@return	0 for valid name */
+static
+int
+innodb_srv_buf_dump_filename_validate(
+/*==================================*/
+	THD*				thd,	/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
+						variable */
+	void*				save,	/*!< out: immediate result
+						for update function */
+	struct st_mysql_value*		value)	/*!< in: incoming string */
+{
+	const char*	buf_name;
+	char		buff[OS_FILE_MAX_PATH];
+	int		len= sizeof(buff);
+
+	ut_a(save != NULL);
+	ut_a(value != NULL);
+
+	buf_name = value->val_str(value, buff, &len);
+
+	if (buf_name) {
+		if (is_filename_allowed(buf_name, len, FALSE)){
+			*static_cast<const char**>(save) = buf_name;
+			return(0);
+		} else {
+			push_warning_printf(thd,
+				Sql_condition::WARN_LEVEL_WARN,
+				ER_WRONG_ARGUMENTS,
+				"InnoDB: innodb_buffer_pool_filename "
+				"cannot have colon (:) in the file name.");
+
+		}
+	}
+
+	return(1);
+}
+#else /* __WIN__ */
+# define innodb_srv_buf_dump_filename_validate NULL
+#endif /* __WIN__ */
+
 /****************************************************************//**
 Update the system variable innodb_monitor_enable and enable
 specified monitor counter.
@@ -14310,6 +15170,29 @@ innodb_reset_all_monitor_update(
 }
 
 /****************************************************************//**
+Update the system variable innodb_compression_level using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_compression_level_update(
+/*============================*/
+	THD*				thd,	/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,	/*!< in: pointer to
+						system variable */
+	void*				var_ptr,/*!< out: where the
+						formal string goes */
+	const void*			save)	/*!< in: immediate result
+						from check function */
+{
+	/* We have this call back just to avoid confusion between
+	ulong and ulint datatypes. */
+	innobase_compression_level =
+			(*static_cast<const ulong*>(save));
+	page_compression_level =
+			(static_cast<const ulint>(innobase_compression_level));
+}
+
+/****************************************************************//**
 Parse and enable InnoDB monitor counters during server startup.
 User can list the monitor counters/groups to be enable by specifying
 "loose-innodb_monitor_enable=monitor_name1;monitor_name2..."
@@ -14427,6 +15310,12 @@ innobase_fts_retrieve_ranking(
 
 	ft_prebuilt = ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt;
 
+	if (ft_prebuilt->read_just_key) {
+		fts_ranking_t*  ranking =
+			rbt_value(fts_ranking_t, result->current);
+		return(ranking->rank);
+	}
+
 	/* Retrieve the ranking value for doc_id with value of
 	prebuilt->fts_doc_id */
 	return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id));
@@ -14441,20 +15330,16 @@ innobase_fts_close_ranking(
 		FT_INFO * fts_hdl)
 {
 	fts_result_t*	result;
-	row_prebuilt_t*	ft_prebuilt;
 
-	ft_prebuilt = ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt;
+	((NEW_FT_INFO*) fts_hdl)->ft_prebuilt->in_fts_query = false;
 
 	result = ((NEW_FT_INFO*) fts_hdl)->ft_result;
 
 	fts_query_free_result(result);
 
-	if (result == ft_prebuilt->result) {
-		ft_prebuilt->result = NULL;
-	}
-
 	my_free((uchar*) fts_hdl);
 
+
 	return;
 }
 
@@ -14478,7 +15363,120 @@ innobase_fts_find_ranking(
 
 	/* Retrieve the ranking value for doc_id with value of
 	prebuilt->fts_doc_id */
-	return fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id);
+	return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id));
+}
+
+#ifdef UNIV_DEBUG
+static my_bool	innodb_purge_run_now = TRUE;
+static my_bool	innodb_purge_stop_now = TRUE;
+
+/****************************************************************//**
+Set the purge state to RUN. If purge is disabled then it
+is a no-op. This function is registered as a callback with MySQL. */
+static
+void
+purge_run_now_set(
+/*==============*/
+	THD*				thd	/*!< in: thread handle */
+					__attribute__((unused)),
+	struct st_mysql_sys_var*	var	/*!< in: pointer to system
+						variable */
+					__attribute__((unused)),
+	void*				var_ptr	/*!< out: where the formal
+						string goes */
+					__attribute__((unused)),
+	const void*			save)	/*!< in: immediate result from
+						check function */
+{
+	if (*(my_bool*) save && trx_purge_state() != PURGE_STATE_DISABLED) {
+		trx_purge_run();
+	}
+}
+
+/****************************************************************//**
+Set the purge state to STOP. If purge is disabled then it
+is a no-op. This function is registered as a callback with MySQL. */
+static
+void
+purge_stop_now_set(
+/*===============*/
+	THD*				thd	/*!< in: thread handle */
+					__attribute__((unused)),
+	struct st_mysql_sys_var*	var	/*!< in: pointer to system
+						variable */
+					__attribute__((unused)),
+	void*				var_ptr	/*!< out: where the formal
+						string goes */
+					__attribute__((unused)),
+	const void*			save)	/*!< in: immediate result from
+						check function */
+{
+	if (*(my_bool*) save && trx_purge_state() != PURGE_STATE_DISABLED) {
+		trx_purge_stop();
+	}
+}
+#endif /* UNIV_DEBUG */
+
+/***********************************************************************
+@return version of the extended FTS API */
+uint
+innobase_fts_get_version()
+/*======================*/
+{
+	/* Currently this doesn't make much sense as returning
+	HA_CAN_FULLTEXT_EXT automatically mean this version is supported.
+	This supposed to ease future extensions.  */
+	return(2);
+}
+
+/***********************************************************************
+@return Which part of the extended FTS API is supported */
+ulonglong
+innobase_fts_flags()
+/*================*/
+{
+	return(FTS_ORDERED_RESULT | FTS_DOCID_IN_RESULT);
+}
+
+
+/***********************************************************************
+Find and Retrieve the FTS doc_id for the current result row
+@return the document ID */
+ulonglong
+innobase_fts_retrieve_docid(
+/*========================*/
+		FT_INFO_EXT * fts_hdl)	/*!< in: FTS handler */
+{
+	row_prebuilt_t* ft_prebuilt;
+	fts_result_t*	result;
+
+	ft_prebuilt = ((NEW_FT_INFO *)fts_hdl)->ft_prebuilt;
+	result = ((NEW_FT_INFO *)fts_hdl)->ft_result;
+
+	if (ft_prebuilt->read_just_key) {
+		fts_ranking_t* ranking =
+			rbt_value(fts_ranking_t, result->current);
+		return(ranking->doc_id);
+	}
+
+	return(ft_prebuilt->fts_doc_id);
+}
+
+/***********************************************************************
+Find and retrieve the size of the current result
+@return number of matching rows */
+ulonglong
+innobase_fts_count_matches(
+/*=======================*/
+	FT_INFO_EXT* fts_hdl)	/*!< in: FTS handler */
+{
+	NEW_FT_INFO*	handle = (NEW_FT_INFO *) fts_hdl;
+
+	if (handle->ft_result->rankings_by_id != 0) {
+		return rbt_size(handle->ft_result->rankings_by_id);
+	} else {
+		return(0);
+	}
 }
 
 /* These variables are never read by InnoDB or changed. They are a kind of
@@ -14515,7 +15513,7 @@ buffer_pool_dump_now(
 	const void*			save)	/*!< in: immediate result from
 						check function */
 {
-	if (*(my_bool*) save) {
+	if (*(my_bool*) save && !srv_read_only_mode) {
 		buf_dump_start();
 	}
 }
@@ -14622,7 +15620,26 @@ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
 static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
   PLUGIN_VAR_RQCMDARG,
   "Number of IOPs the server can do. Tunes the background IO rate",
-  NULL, NULL, 200, 100, ~0UL, 0);
+  NULL, innodb_io_capacity_update, 200, 100, ~0UL, 0);
+
+static MYSQL_SYSVAR_ULONG(io_capacity_max, srv_max_io_capacity,
+  PLUGIN_VAR_RQCMDARG,
+  "Limit to which innodb_io_capacity can be inflated.",
+  NULL, innodb_io_capacity_max_update,
+  SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT, 100,
+  SRV_MAX_IO_CAPACITY_LIMIT, 0);
+
+#ifdef UNIV_DEBUG
+static MYSQL_SYSVAR_BOOL(purge_run_now, innodb_purge_run_now,
+  PLUGIN_VAR_OPCMDARG,
+  "Set purge state to RUN",
+  NULL, purge_run_now_set, FALSE);
+
+static MYSQL_SYSVAR_BOOL(purge_stop_now, innodb_purge_stop_now,
+  PLUGIN_VAR_OPCMDARG,
+  "Set purge state to STOP",
+  NULL, purge_stop_now_set, FALSE);
+#endif /* UNIV_DEBUG */
 
 static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
   PLUGIN_VAR_OPCMDARG,
@@ -14634,7 +15651,7 @@ static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
 
 static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
-  "Purge threads can be from 0 to 32. Default is 0.",
+  "Purge threads can be from 1 to 32. Default is 1.",
   NULL, NULL,
   1,			/* Default setting */
   1,			/* Minimum value */
@@ -14657,7 +15674,7 @@ static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
 static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
   PLUGIN_VAR_NOCMDARG,
   "Stores each InnoDB table to an .ibd file in the database dir.",
-  NULL, NULL, FALSE);
+  NULL, NULL, TRUE);
 
 static MYSQL_SYSVAR_STR(file_format, innobase_file_format_name,
   PLUGIN_VAR_RQCMDARG,
@@ -14693,6 +15710,11 @@ static MYSQL_SYSVAR_STR(ft_server_stopword_table, innobase_server_stopword_table
   innodb_stopword_table_update,
   NULL);
 
+static MYSQL_SYSVAR_UINT(flush_log_at_timeout, srv_flush_log_at_timeout,
+  PLUGIN_VAR_OPCMDARG,
+  "Write and flush logs every (n) second.",
+  NULL, NULL, 1, 0, 2700, 0);
+
 static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
   PLUGIN_VAR_OPCMDARG,
   "Controls the durability/speed trade-off for commits."
@@ -14738,20 +15760,38 @@ static MYSQL_SYSVAR_BOOL(log_archive, innobase_log_archive,
   "Set to 1 if you want to have logs archived.", NULL, NULL, FALSE);
 #endif /* UNIV_LOG_ARCHIVE */
 
-static MYSQL_SYSVAR_STR(log_group_home_dir, innobase_log_group_home_dir,
+static MYSQL_SYSVAR_STR(log_group_home_dir, srv_log_group_home_dir,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Path to InnoDB log files.", NULL, NULL, NULL);
 
 static MYSQL_SYSVAR_ULONG(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
   PLUGIN_VAR_RQCMDARG,
   "Percentage of dirty pages allowed in bufferpool.",
-  NULL, NULL, 75, 0, 99, 0);
+  NULL, innodb_max_dirty_pages_pct_update, 75, 0, 99, 0);
+
+static MYSQL_SYSVAR_ULONG(max_dirty_pages_pct_lwm,
+  srv_max_dirty_pages_pct_lwm,
+  PLUGIN_VAR_RQCMDARG,
+  "Percentage of dirty pages at which flushing kicks in.",
+  NULL, innodb_max_dirty_pages_pct_lwm_update, 0, 0, 99, 0);
+
+static MYSQL_SYSVAR_ULONG(adaptive_flushing_lwm,
+  srv_adaptive_flushing_lwm,
+  PLUGIN_VAR_RQCMDARG,
+  "Percentage of log capacity below which no adaptive flushing happens.",
+  NULL, NULL, 10, 0, 70, 0);
 
 static MYSQL_SYSVAR_BOOL(adaptive_flushing, srv_adaptive_flushing,
   PLUGIN_VAR_NOCMDARG,
   "Attempt flushing dirty pages to avoid IO bursts at checkpoints.",
   NULL, NULL, TRUE);
 
+static MYSQL_SYSVAR_ULONG(flushing_avg_loops,
+  srv_flushing_avg_loops,
+  PLUGIN_VAR_RQCMDARG,
+  "Number of iterations over which the background flushing is averaged.",
+  NULL, NULL, 30, 1, 1000, 0);
+
 static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
   PLUGIN_VAR_RQCMDARG,
   "Desired maximum length of the purge queue (0 = no limit)",
@@ -14760,11 +15800,11 @@ static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
 static MYSQL_SYSVAR_ULONG(max_purge_lag_delay, srv_max_purge_lag_delay,
    PLUGIN_VAR_RQCMDARG,
    "Maximum delay of user threads in micro-seconds",
-   NULL, NULL, 
+   NULL, NULL,
    0L,			/* Default seting */
    0L,			/* Minimum value */
    10000000UL, 0);	/* Maximum value */
- 
+
 static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout,
   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
   "Roll back the complete transaction on lock wait timeout, for 4.x compatibility (disabled by default)",
@@ -14777,8 +15817,9 @@ static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file,
 
 static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
   PLUGIN_VAR_OPCMDARG,
-  "Enable statistics gathering for metadata commands such as SHOW TABLE STATUS (on by default)",
-  NULL, NULL, TRUE);
+  "Enable statistics gathering for metadata commands such as "
+  "SHOW TABLE STATUS for tables that use transient statistics (off by default)",
+  NULL, NULL, FALSE);
 
 static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_transient_sample_pages,
   PLUGIN_VAR_RQCMDARG,
@@ -14792,6 +15833,20 @@ static MYSQL_SYSVAR_ULONGLONG(stats_transient_sample_pages,
   "statistics (if persistent statistics are not used, default 8)",
   NULL, NULL, 8, 1, ~0ULL, 0);
 
+static MYSQL_SYSVAR_BOOL(stats_persistent, srv_stats_persistent,
+  PLUGIN_VAR_OPCMDARG,
+  "InnoDB persistent statistics enabled for all tables unless overridden "
+  "at table level",
+  NULL, NULL, TRUE);
+
+static MYSQL_SYSVAR_BOOL(stats_auto_recalc, srv_stats_auto_recalc,
+  PLUGIN_VAR_OPCMDARG,
+  "InnoDB automatic recalculation of persistent statistics enabled for all "
+  "tables unless overridden at table level (automatic recalculation is only "
+  "done when InnoDB decides that the table has changed too much and needs a "
+  "new statistics)",
+  NULL, NULL, TRUE);
+
 static MYSQL_SYSVAR_ULONGLONG(stats_persistent_sample_pages,
   srv_stats_persistent_sample_pages,
   PLUGIN_VAR_RQCMDARG,
@@ -14811,6 +15866,13 @@ static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
   "innodb_thread_concurrency is reached (0 by default)",
   NULL, NULL, 0, 0, ~0UL, 0);
 
+static MYSQL_SYSVAR_ULONG(compression_level, innobase_compression_level,
+  PLUGIN_VAR_RQCMDARG,
+  "Compression level used for compressed row format.  0 is no compression"
+  ", 1 is fastest, 9 is best compression and default is 6.",
+  NULL, innodb_compression_level_update,
+  DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
+
 static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "DEPRECATED. This option may be removed in future releases, "
@@ -14822,7 +15884,7 @@ static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_
 static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment,
   PLUGIN_VAR_RQCMDARG,
   "Data file autoextend increment in megabytes",
-  NULL, NULL, 8L, 1L, 1000L, 0);
+  NULL, NULL, 64L, 1L, 1000L, 0);
 
 static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -14844,12 +15906,12 @@ static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size,
 static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
-  NULL, NULL, 1L, 1L, MAX_BUFFER_POOLS, 1L);
+  NULL, NULL, 0L, 0L, MAX_BUFFER_POOLS, 1L);
 
 static MYSQL_SYSVAR_STR(buffer_pool_filename, srv_buf_dump_filename,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
   "Filename to/from which to dump/load the InnoDB buffer pool",
-  NULL, NULL, SRV_BUF_DUMP_FILENAME_DEFAULT);
+  innodb_srv_buf_dump_filename_validate, NULL, SRV_BUF_DUMP_FILENAME_DEFAULT);
 
 static MYSQL_SYSVAR_BOOL(buffer_pool_dump_now, innodb_buffer_pool_dump_now,
   PLUGIN_VAR_RQCMDARG,
@@ -14882,10 +15944,13 @@ static MYSQL_SYSVAR_ULONG(lru_scan_depth, srv_LRU_scan_depth,
   "How deep to scan LRU to keep it clean",
   NULL, NULL, 1024, 100, ~0UL, 0);
 
-static MYSQL_SYSVAR_BOOL(flush_neighbors, srv_flush_neighbors,
-  PLUGIN_VAR_NOCMDARG,
-  "Flush neighbors from buffer pool when flushing a block.",
-  NULL, NULL, TRUE);
+static MYSQL_SYSVAR_ULONG(flush_neighbors, srv_flush_neighbors,
+  PLUGIN_VAR_OPCMDARG,
+  "Set to 0 (don't flush neighbors from buffer pool),"
+  " 1 (flush contiguous neighbors from buffer pool)"
+  " or 2 (flush neighbors from buffer pool),"
+  " when flushing a block",
+  NULL, NULL, 1, 0, 2, 0);
 
 static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
   PLUGIN_VAR_RQCMDARG,
@@ -14895,7 +15960,7 @@ static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
 static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
   PLUGIN_VAR_RQCMDARG,
   "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
-  NULL, NULL, 500L, 1L, ~0UL, 0);
+  NULL, NULL, 5000L, 1L, ~0UL, 0);
 
 static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR,
@@ -14905,7 +15970,7 @@ static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
 static MYSQL_SYSVAR_BOOL(ft_enable_diag_print, fts_enable_diag_print,
   PLUGIN_VAR_OPCMDARG,
   "Whether to enable additional FTS diagnostic printout ",
-  NULL, NULL, TRUE);
+  NULL, NULL, FALSE);
 
 static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
   PLUGIN_VAR_OPCMDARG,
@@ -14921,7 +15986,7 @@ static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name,
 static MYSQL_SYSVAR_ULONG(ft_cache_size, fts_max_cache_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "InnoDB Fulltext search cache size in bytes",
-  NULL, NULL, 32000000, 1600000, 80000000, 0);
+  NULL, NULL, 8000000, 1600000, 80000000, 0);
 
 static MYSQL_SYSVAR_ULONG(ft_min_token_size, fts_min_token_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -14947,7 +16012,12 @@ static MYSQL_SYSVAR_ULONG(ft_sort_pll_degree, fts_sort_pll_degree,
 static MYSQL_SYSVAR_ULONG(sort_buffer_size, srv_sort_buf_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Memory buffer size for index creation",
-  NULL, NULL, 1048576, 524288, 64<<20, 0);
+  NULL, NULL, 1048576, 65536, 64<<20, 0);
+
+static MYSQL_SYSVAR_ULONGLONG(online_alter_log_max_size, srv_online_max_size,
+  PLUGIN_VAR_RQCMDARG,
+  "Maximum modification log file size for online index creation",
+  NULL, NULL, 128<<20, 65536, ~0ULL, 0);
 
 static MYSQL_SYSVAR_BOOL(optimize_fulltext_only, innodb_optimize_fulltext_only,
   PLUGIN_VAR_NOCMDARG,
@@ -14964,11 +16034,18 @@ static MYSQL_SYSVAR_ULONG(write_io_threads, innobase_write_io_threads,
   "Number of background write I/O threads in InnoDB.",
   NULL, NULL, 4, 1, 64, 0);
 
-static MYSQL_SYSVAR_LONG(force_recovery, innobase_force_recovery,
+static MYSQL_SYSVAR_ULONG(force_recovery, srv_force_recovery,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Helps to save your data in case the disk image of the database becomes corrupt.",
   NULL, NULL, 0, 0, 6, 0);
 
+#ifndef DBUG_OFF
+static MYSQL_SYSVAR_ULONG(force_recovery_crash, srv_force_recovery_crash,
+  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+  "Kills the server during crash recovery.",
+  NULL, NULL, 0, 0, 10, 0);
+#endif /* !DBUG_OFF */
+
 static MYSQL_SYSVAR_ULONG(page_size, srv_page_size,
   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
   "Page size to use for all InnoDB tablespaces.",
@@ -14983,12 +16060,12 @@ static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size,
 static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Size of each log file in a log group.",
-  NULL, NULL, 5*1024*1024L, 1*1024*1024L, LONGLONG_MAX, 1024*1024L);
+  NULL, NULL, 48*1024*1024L, 1*1024*1024L, LONGLONG_MAX, 1024*1024L);
 
-static MYSQL_SYSVAR_LONG(log_files_in_group, innobase_log_files_in_group,
+static MYSQL_SYSVAR_ULONG(log_files_in_group, srv_n_log_files,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "Number of log files in the log group. InnoDB writes to the files in a circular fashion. Value 3 is recommended here.",
-  NULL, NULL, 2, 2, 100, 0);
+  "Number of log files in the log group. InnoDB writes to the files in a circular fashion.",
+  NULL, NULL, 2, 2, SRV_N_LOG_FILES_MAX, 0);
 
 static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -15004,13 +16081,13 @@ static MYSQL_SYSVAR_UINT(old_blocks_time, buf_LRU_old_threshold_ms,
   PLUGIN_VAR_RQCMDARG,
   "Move blocks to the 'new' end of the buffer pool if the first access"
   " was at least this many milliseconds ago."
-  " The timeout is disabled if 0 (the default).",
-  NULL, NULL, 0, 0, UINT_MAX32, 0);
+  " The timeout is disabled if 0.",
+  NULL, NULL, 1000, 0, UINT_MAX32, 0);
 
 static MYSQL_SYSVAR_LONG(open_files, innobase_open_files,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "How many files at the maximum InnoDB keeps open at the same time.",
-  NULL, NULL, 300L, 10L, LONG_MAX, 0);
+  NULL, NULL, 0L, 0L, LONG_MAX, 0);
 
 static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
   PLUGIN_VAR_RQCMDARG,
@@ -15110,6 +16187,37 @@ static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
   "Use native AIO if supported on this platform.",
   NULL, NULL, TRUE);
 
+static MYSQL_SYSVAR_BOOL(api_enable_binlog, ib_binlog_enabled,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+  "Enable binlog for applications direct access InnoDB through InnoDB APIs",
+  NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(api_enable_mdl, ib_mdl_enabled,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+  "Enable MDL for applications direct access InnoDB through InnoDB APIs",
+  NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(api_disable_rowlock, ib_disable_row_lock,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+  "Disable row lock when direct access InnoDB through InnoDB APIs",
+  NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_ULONG(api_trx_level, ib_trx_level_setting,
+  PLUGIN_VAR_OPCMDARG,
+  "InnoDB API transaction isolation level",
+  NULL, NULL,
+  0,		/* Default setting */
+  0,		/* Minimum value */
+  3, 0);	/* Maximum value */
+
+static MYSQL_SYSVAR_ULONG(api_bk_commit_interval, ib_bk_commit_interval,
+  PLUGIN_VAR_OPCMDARG,
+  "Background commit interval in seconds",
+  NULL, NULL,
+  5,		/* Default setting */
+  1,		/* Minimum value */
+  1024 * 1024 * 1024, 0);	/* Maximum value */
+
 static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering,
   PLUGIN_VAR_RQCMDARG,
   "Buffer changes to reduce random access: "
@@ -15137,6 +16245,12 @@ static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
   PLUGIN_VAR_RQCMDARG,
   "Debug flags for InnoDB change buffering (0=none, 2=crash at merge)",
   NULL, NULL, 0, 0, 2, 0);
+
+static MYSQL_SYSVAR_BOOL(disable_background_merge,
+  srv_ibuf_disable_background_merge,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_RQCMDARG,
+  "Disable change buffering merges by the master thread",
+  NULL, NULL, FALSE);
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
 static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
@@ -15179,15 +16293,53 @@ static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks,
   "Print all deadlocks to MySQL error log (off by default)",
   NULL, NULL, FALSE);
 
+static MYSQL_SYSVAR_ULONG(compression_failure_threshold_pct,
+  zip_failure_threshold_pct, PLUGIN_VAR_OPCMDARG,
+  "If the compression failure rate of a table is greater than this number"
+  " more padding is added to the pages to reduce the failures. A value of"
+  " zero implies no padding",
+  NULL, NULL, 5, 0, 100, 0);
+
+static MYSQL_SYSVAR_ULONG(compression_pad_pct_max,
+  zip_pad_max, PLUGIN_VAR_OPCMDARG,
+  "Percentage of empty space on a data page that can be reserved"
+  " to make the page compressible.",
+  NULL, NULL, 50, 0, 75, 0);
+
+static MYSQL_SYSVAR_BOOL(read_only, srv_read_only_mode,
+  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+  "Start InnoDB in read only mode (off by default)",
+  NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(cmp_per_index_enabled, srv_cmp_per_index_enabled,
+  PLUGIN_VAR_OPCMDARG,
+  "Enable INFORMATION_SCHEMA.innodb_cmp_per_index, "
+  "may have negative impact on performance (off by default)",
+  NULL, innodb_cmp_per_index_update, FALSE);
+
 #ifdef UNIV_DEBUG_never
 static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug,
   PLUGIN_VAR_RQCMDARG,
   "Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()",
   NULL, NULL, 0, 0, 1024, 0);
+
+static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug,
+  btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG,
+  "Artificially limit the number of records per B-tree page (0=unlimited).",
+  NULL, NULL, 0, 0, UINT_MAX32, 0);
+
+static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
+  srv_purge_view_update_only_debug, PLUGIN_VAR_NOCMDARG,
+  "Pause actual purging any delete-marked records, but merely update the purge view. "
+  "It is to create artificially the situation the purge view have been updated "
+  "but the each purges were not done yet.",
+  NULL, NULL, FALSE);
 #endif /* UNIV_DEBUG */
 
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(additional_mem_pool_size),
+  MYSQL_SYSVAR(api_trx_level),
+  MYSQL_SYSVAR(api_bk_commit_interval),
   MYSQL_SYSVAR(autoextend_increment),
   MYSQL_SYSVAR(buffer_pool_size),
   MYSQL_SYSVAR(buffer_pool_instances),
@@ -15203,9 +16355,13 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(checksums),
   MYSQL_SYSVAR(commit_concurrency),
   MYSQL_SYSVAR(concurrency_tickets),
+  MYSQL_SYSVAR(compression_level),
   MYSQL_SYSVAR(data_file_path),
   MYSQL_SYSVAR(data_home_dir),
   MYSQL_SYSVAR(doublewrite),
+  MYSQL_SYSVAR(api_enable_binlog),
+  MYSQL_SYSVAR(api_enable_mdl),
+  MYSQL_SYSVAR(api_disable_rowlock),
   MYSQL_SYSVAR(fast_shutdown),
   MYSQL_SYSVAR(file_io_threads),
   MYSQL_SYSVAR(read_io_threads),
@@ -15214,9 +16370,13 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(file_format),
   MYSQL_SYSVAR(file_format_check),
   MYSQL_SYSVAR(file_format_max),
+  MYSQL_SYSVAR(flush_log_at_timeout),
   MYSQL_SYSVAR(flush_log_at_trx_commit),
   MYSQL_SYSVAR(flush_method),
   MYSQL_SYSVAR(force_recovery),
+#ifndef DBUG_OFF
+  MYSQL_SYSVAR(force_recovery_crash),
+#endif /* !DBUG_OFF */
   MYSQL_SYSVAR(ft_cache_size),
   MYSQL_SYSVAR(ft_enable_stopword),
   MYSQL_SYSVAR(ft_max_token_size),
@@ -15237,7 +16397,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(log_files_in_group),
   MYSQL_SYSVAR(log_group_home_dir),
   MYSQL_SYSVAR(max_dirty_pages_pct),
+  MYSQL_SYSVAR(max_dirty_pages_pct_lwm),
+  MYSQL_SYSVAR(adaptive_flushing_lwm),
   MYSQL_SYSVAR(adaptive_flushing),
+  MYSQL_SYSVAR(flushing_avg_loops),
   MYSQL_SYSVAR(max_purge_lag),
   MYSQL_SYSVAR(max_purge_lag_delay),
   MYSQL_SYSVAR(mirrored_log_groups),
@@ -15254,7 +16417,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(stats_on_metadata),
   MYSQL_SYSVAR(stats_sample_pages),
   MYSQL_SYSVAR(stats_transient_sample_pages),
+  MYSQL_SYSVAR(stats_persistent),
   MYSQL_SYSVAR(stats_persistent_sample_pages),
+  MYSQL_SYSVAR(stats_auto_recalc),
   MYSQL_SYSVAR(adaptive_hash_index),
   MYSQL_SYSVAR(stats_method),
   MYSQL_SYSVAR(replication_delay),
@@ -15262,7 +16427,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(strict_mode),
   MYSQL_SYSVAR(support_xa),
   MYSQL_SYSVAR(sort_buffer_size),
-  MYSQL_SYSVAR(analyze_is_persistent),
+  MYSQL_SYSVAR(online_alter_log_max_size),
   MYSQL_SYSVAR(sync_spin_loops),
   MYSQL_SYSVAR(spin_wait_delay),
   MYSQL_SYSVAR(table_locks),
@@ -15279,33 +16444,45 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(change_buffer_max_size),
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
   MYSQL_SYSVAR(change_buffering_debug),
+  MYSQL_SYSVAR(disable_background_merge),
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
   MYSQL_SYSVAR(random_read_ahead),
   MYSQL_SYSVAR(read_ahead_threshold),
+  MYSQL_SYSVAR(read_only),
   MYSQL_SYSVAR(io_capacity),
+  MYSQL_SYSVAR(io_capacity_max),
   MYSQL_SYSVAR(monitor_enable),
   MYSQL_SYSVAR(monitor_disable),
   MYSQL_SYSVAR(monitor_reset),
   MYSQL_SYSVAR(monitor_reset_all),
   MYSQL_SYSVAR(purge_threads),
   MYSQL_SYSVAR(purge_batch_size),
+#ifdef UNIV_DEBUG
+  MYSQL_SYSVAR(purge_run_now),
+  MYSQL_SYSVAR(purge_stop_now),
+#endif /* UNIV_DEBUG */
 #if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
   MYSQL_SYSVAR(page_hash_locks),
   MYSQL_SYSVAR(doublewrite_batch_size),
 #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
   MYSQL_SYSVAR(print_all_deadlocks),
+  MYSQL_SYSVAR(cmp_per_index_enabled),
   MYSQL_SYSVAR(undo_logs),
   MYSQL_SYSVAR(rollback_segments),
   MYSQL_SYSVAR(undo_directory),
   MYSQL_SYSVAR(undo_tablespaces),
   MYSQL_SYSVAR(sync_array_size),
+  MYSQL_SYSVAR(compression_failure_threshold_pct),
+  MYSQL_SYSVAR(compression_pad_pct_max),
 #ifdef UNIV_DEBUG_never /* disable this flag. --innodb-trx becomes ambiguous */
   MYSQL_SYSVAR(trx_rseg_n_slots_debug),
+  MYSQL_SYSVAR(limit_optimistic_insert_debug),
+  MYSQL_SYSVAR(trx_purge_view_update_only_debug),
 #endif /* UNIV_DEBUG */
   NULL
 };
 
-maria_declare_plugin(innobase)
+mysql_declare_plugin(innobase)
 {
   MYSQL_STORAGE_ENGINE_PLUGIN,
   &innobase_storage_engine,
@@ -15318,8 +16495,8 @@ maria_declare_plugin(innobase)
   INNODB_VERSION_SHORT,
   innodb_status_variables_export,/* status variables             */
   innobase_system_variables, /* system variables */
-  INNODB_VERSION_STR,         /* string version */
-  MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
+  NULL, /* reserved */
+  0,    /* flags */
 },
 i_s_innodb_trx,
 i_s_innodb_locks,
@@ -15328,6 +16505,8 @@ i_s_innodb_cmp,
 i_s_innodb_cmp_reset,
 i_s_innodb_cmpmem,
 i_s_innodb_cmpmem_reset,
+i_s_innodb_cmp_per_index,
+i_s_innodb_cmp_per_index_reset,
 i_s_innodb_buffer_page,
 i_s_innodb_buffer_page_lru,
 i_s_innodb_buffer_stats,
@@ -15345,9 +16524,11 @@ i_s_innodb_sys_indexes,
 i_s_innodb_sys_columns,
 i_s_innodb_sys_fields,
 i_s_innodb_sys_foreign,
-i_s_innodb_sys_foreign_cols
+i_s_innodb_sys_foreign_cols,
+i_s_innodb_sys_tablespaces,
+i_s_innodb_sys_datafiles
 
-maria_declare_plugin_end;
+mysql_declare_plugin_end;
 
 /** @brief Initialize the default value of innodb_commit_concurrency.
 
@@ -15384,7 +16565,7 @@ innobase_undo_logs_init_default_max()
 
 #ifdef UNIV_COMPILE_TEST_FUNCS
 
-typedef struct innobase_convert_name_test_struct {
+struct innobase_convert_name_test_t {
 	char*		buf;
 	ulint		buflen;
 	const char*	id;
@@ -15393,7 +16574,7 @@ typedef struct innobase_convert_name_test_struct {
 	ibool		file_id;
 
 	const char*	expected;
-} innobase_convert_name_test_t;
+};
 
 void
 test_innobase_convert_name()
@@ -15512,62 +16693,52 @@ test_innobase_convert_name()
  * Multi Range Read interface, DS-MRR calls
  */
 
-int
-ha_innobase::multi_range_read_init(
-	RANGE_SEQ_IF*	seq,
-	void*		seq_init_param,
-	uint		n_ranges,
-	uint		mode,
-	HANDLER_BUFFER*	buf)
+int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+                                       uint n_ranges, uint mode, 
+                                       HANDLER_BUFFER *buf)
 {
-	return(ds_mrr.dsmrr_init(this, seq, seq_init_param,
-				 n_ranges, mode, buf));
+  return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
 }
 
-int
-ha_innobase::multi_range_read_next(
-	range_id_t *range_info)
+int ha_innobase::multi_range_read_next(range_id_t *range_info)
 {
-	return(ds_mrr.dsmrr_next(range_info));
+  return ds_mrr.dsmrr_next(range_info);
 }
 
-ha_rows
-ha_innobase::multi_range_read_info_const(
-	uint		keyno,
-	RANGE_SEQ_IF*	seq,
-	void*		seq_init_param,
-	uint		n_ranges,
-	uint*		bufsz,
-	uint*		flags,
-	Cost_estimate*	cost)
+ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                                 void *seq_init_param,  
+                                                 uint n_ranges, uint *bufsz,
+                                                 uint *flags, 
+                                                 Cost_estimate *cost)
 {
-	/* See comments in ha_myisam::multi_range_read_info_const */
-	ds_mrr.init(this, table);
-	return(ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param,
-				       n_ranges, bufsz, flags, cost));
+  /* See comments in ha_myisam::multi_range_read_info_const */
+  ds_mrr.init(this, table);
+
+  if (prebuilt->select_lock_type != LOCK_NONE)
+    *flags |= HA_MRR_USE_DEFAULT_IMPL;
+
+  ha_rows res= ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
+                                       bufsz, flags, cost);
+  return res;
 }
 
-ha_rows
-ha_innobase::multi_range_read_info(
-	uint		keyno,
-	uint		n_ranges,
-	uint		keys,
-        uint            key_parts,
-	uint*		bufsz,
-	uint*		flags,
-	Cost_estimate*	cost)
+ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges,
+                                           uint keys, uint key_parts,
+                                           uint *bufsz, uint *flags,
+                                           Cost_estimate *cost)
 {
-	ds_mrr.init(this, table);
-	return(ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
-                                 flags, cost));
+  ds_mrr.init(this, table);
+  ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, 
+                                 flags, cost);
+  return res;
 }
 
-int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str, size_t size)
+int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str,
+                                               size_t size)
 {
   return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
 }
 
-
 /**
  * Index Condition Pushdown interface implementation
  */
@@ -15581,7 +16752,7 @@ innobase_index_cond(
 /*================*/
 	void*	file)	/*!< in/out: pointer to ha_innobase */
 {
-	return handler_index_cond_check(file);
+  return handler_index_cond_check(file);
 }
 
 /** Attempt to push down an index condition.
@@ -15606,3 +16777,181 @@ ha_innobase::idx_cond_push(
 	DBUG_RETURN(NULL);
 }
 
+/******************************************************************//**
+Use this when the args are passed to the format string from
+errmsg-utf8.txt directly as is.
+
+Push a warning message to the client, it is a wrapper around:
+
+void push_warning_printf(
+	THD *thd, Sql_condition::enum_warning_level level,
+	uint code, const char *format, ...);
+*/
+UNIV_INTERN
+void
+ib_senderrf(
+/*========*/
+	THD*		thd,		/*!< in/out: session */
+	ib_log_level_t	level,		/*!< in: warning level */
+	ib_uint32_t	code,		/*!< MySQL error code */
+	...)				/*!< Args */
+{
+	char*		str;
+	va_list         args;
+	const char*	format = innobase_get_err_msg(code);
+
+	/* If the caller wants to push a message to the client then
+	the caller must pass a valid session handle. */
+
+	ut_a(thd != 0);
+
+	/* The error code must exist in the errmsg-utf8.txt file. */
+	ut_a(format != 0);
+
+	va_start(args, code);
+
+#ifdef __WIN__
+	int		size = _vscprintf(format, args) + 1;
+	str = static_cast<char*>(malloc(size));
+	str[size - 1] = 0x0;
+	vsnprintf(str, size, format, args);
+#elif HAVE_VASPRINTF
+	(void) vasprintf(&str, format, args);
+#else
+	/* Use a fixed length string. */
+	str = static_cast<char*>(malloc(BUFSIZ));
+	my_vsnprintf(str, BUFSIZ, format, args);
+#endif /* __WIN__ */
+
+	Sql_condition::enum_warning_level	l;
+
+	l = Sql_condition::WARN_LEVEL_NOTE;
+
+	switch(level) {
+	case IB_LOG_LEVEL_INFO:
+		break;
+	case IB_LOG_LEVEL_WARN:
+		l = Sql_condition::WARN_LEVEL_WARN;
+		break;
+	case IB_LOG_LEVEL_ERROR:
+		/* We can't use push_warning_printf(), it is a hard error. */
+		my_printf_error(code, "%s", MYF(0), str);
+		break;
+	case IB_LOG_LEVEL_FATAL:
+		l = Sql_condition::WARN_LEVEL_END;
+		break;
+	}
+
+	if (level != IB_LOG_LEVEL_ERROR) {
+		push_warning_printf(thd, l, code, "InnoDB: %s", str);
+	}
+
+	va_end(args);
+	free(str);
+
+	if (level == IB_LOG_LEVEL_FATAL) {
+		ut_error;
+	}
+}
+
+/******************************************************************//**
+Use this when the args are first converted to a formatted string and then
+passed to the format string from errmsg-utf8.txt. The error message format
+must be: "Some string ... %s".
+
+Push a warning message to the client, it is a wrapper around:
+
+void push_warning_printf(
+	THD *thd, Sql_condition::enum_warning_level level,
+	uint code, const char *format, ...);
+*/
+UNIV_INTERN
+void
+ib_errf(
+/*====*/
+	THD*		thd,		/*!< in/out: session */
+	ib_log_level_t	level,		/*!< in: warning level */
+	ib_uint32_t	code,		/*!< MySQL error code */
+	const char*	format,		/*!< printf format */
+	...)				/*!< Args */
+{
+	char*		str;
+	va_list         args;
+
+	/* If the caller wants to push a message to the client then
+	the caller must pass a valid session handle. */
+
+	ut_a(thd != 0);
+	ut_a(format != 0);
+
+	va_start(args, format);
+
+#ifdef __WIN__
+	int		size = _vscprintf(format, args) + 1;
+	str = static_cast<char*>(malloc(size));
+	str[size - 1] = 0x0;
+	vsnprintf(str, size, format, args);
+#elif HAVE_VASPRINTF
+	(void) vasprintf(&str, format, args);
+#else
+	/* Use a fixed length string. */
+	str = static_cast<char*>(malloc(BUFSIZ));
+	my_vsnprintf(str, BUFSIZ, format, args);
+#endif /* __WIN__ */
+
+	ib_senderrf(thd, level, code, str);
+
+	va_end(args);
+	free(str);
+}
+
+/******************************************************************//**
+Write a message to the MySQL log, prefixed with "InnoDB: " */
+UNIV_INTERN
+void
+ib_logf(
+/*====*/
+	ib_log_level_t	level,		/*!< in: warning level */
+	const char*	format,		/*!< printf format */
+	...)				/*!< Args */
+{
+	char*		str;
+	va_list         args;
+
+	va_start(args, format);
+
+#ifdef __WIN__
+	int		size = _vscprintf(format, args) + 1;
+	str = static_cast<char*>(malloc(size));
+	str[size - 1] = 0x0;
+	vsnprintf(str, size, format, args);
+#elif HAVE_VASPRINTF
+	(void) vasprintf(&str, format, args);
+#else
+	/* Use a fixed length string. */
+	str = static_cast<char*>(malloc(BUFSIZ));
+	my_vsnprintf(str, BUFSIZ, format, args);
+#endif /* __WIN__ */
+
+	switch(level) {
+	case IB_LOG_LEVEL_INFO:
+		sql_print_information("InnoDB: %s", str);
+		break;
+	case IB_LOG_LEVEL_WARN:
+		sql_print_warning("InnoDB: %s", str);
+		break;
+	case IB_LOG_LEVEL_ERROR:
+		sql_print_error("InnoDB: %s", str);
+		break;
+	case IB_LOG_LEVEL_FATAL:
+		sql_print_error("InnoDB: %s", str);
+		break;
+	}
+
+	va_end(args);
+	free(str);
+
+	if (level == IB_LOG_LEVEL_FATAL) {
+		ut_error;
+	}
+}
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index e56a1ec52e3..ece9f7cf58a 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -27,14 +27,14 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 /* Structure defines translation table between mysql index and innodb
 index structures */
-typedef struct innodb_idx_translate_struct {
+struct innodb_idx_translate_t {
 	ulint		index_count;	/*!< number of valid index entries
 					in the index_mapping array */
 	ulint		array_size;	/*!< array size of index_mapping */
 	dict_index_t**	index_mapping;	/*!< index pointer array directly
 					maps to index in Innodb from MySQL
 					array index */
-} innodb_idx_translate_t;
+};
 
 
 /** InnoDB table share */
@@ -53,15 +53,8 @@ typedef struct st_innobase_share {
 } INNOBASE_SHARE;
 
 
-/** InnoDB B-tree index */
-struct dict_index_struct;
-/** Prebuilt structures in an Innobase table handle used within MySQL */
-struct row_prebuilt_struct;
-
-/** InnoDB B-tree index */
-typedef struct dict_index_struct dict_index_t;
-/** Prebuilt structures in an Innobase table handle used within MySQL */
-typedef struct row_prebuilt_struct row_prebuilt_t;
+/** Prebuilt structures in an InnoDB table handle used within MySQL */
+struct row_prebuilt_t;
 
 /** The class defining a handle to an Innodb table */
 class ha_innobase: public handler
@@ -101,15 +94,13 @@ class ha_innobase: public handler
 	void update_thd();
 	int change_active_index(uint keynr);
 	int general_fetch(uchar* buf, uint direction, uint match_mode);
-	ulint innobase_lock_autoinc();
+	dberr_t innobase_lock_autoinc();
 	ulonglong innobase_peek_autoinc();
-	ulint innobase_set_max_autoinc(ulonglong auto_inc);
-	ulint innobase_reset_autoinc(ulonglong auto_inc);
-	ulint innobase_get_autoinc(ulonglong* value);
-	ulint innobase_update_autoinc(ulonglong	auto_inc);
+	dberr_t innobase_set_max_autoinc(ulonglong auto_inc);
+	dberr_t innobase_reset_autoinc(ulonglong auto_inc);
+	dberr_t innobase_get_autoinc(ulonglong* value);
 	void innobase_initialize_autoinc();
 	dict_index_t* innobase_get_index(uint keynr);
-	int info_low(uint flag, dict_stats_upd_option_t stats_upd_option);
 
 	/* Init values for the class: */
  public:
@@ -132,9 +123,11 @@ class ha_innobase: public handler
 	const key_map* keys_to_use_for_scanning();
 
 	int open(const char *name, int mode, uint test_if_locked);
+	handler* clone(const char *name, MEM_ROOT *mem_root);
 	int close(void);
 	double scan_time();
 	double read_time(uint index, uint ranges, ha_rows rows);
+	longlong get_memory_buffer_size() const;
 
 	int write_row(uchar * buf);
 	int update_row(const uchar * old_data, uchar * new_data);
@@ -182,6 +175,13 @@ class ha_innobase: public handler
 	ha_rows estimate_rows_upper_bound();
 
 	void update_create_info(HA_CREATE_INFO* create_info);
+	int parse_table_name(const char*name,
+			     HA_CREATE_INFO* create_info,
+			     ulint flags,
+			     ulint flags2,
+			     char* norm_name,
+			     char* temp_path,
+			     char* remote_path);
 	int create(const char *name, register TABLE *form,
 					HA_CREATE_INFO *create_info);
 	int truncate();
@@ -219,13 +219,76 @@ class ha_innobase: public handler
 	static ulonglong get_mysql_bin_log_pos();
 	bool primary_key_is_clustered();
 	int cmp_ref(const uchar *ref1, const uchar *ref2);
-	/** Fast index creation (smart ALTER TABLE) @see handler0alter.cc @{ */
-	int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys,
-		      handler_add_index **add);
-	int final_add_index(handler_add_index *add, bool commit);
-	int prepare_drop_index(TABLE *table_arg, uint *key_num,
-			       uint num_of_keys);
-	int final_drop_index(TABLE *table_arg);
+	/** On-line ALTER TABLE interface @see handler0alter.cc @{ */
+
+	/** Check if InnoDB supports a particular alter table in-place
+	@param altered_table	TABLE object for new version of table.
+	@param ha_alter_info	Structure describing changes to be done
+	by ALTER TABLE and holding data used during in-place alter.
+
+	@retval HA_ALTER_INPLACE_NOT_SUPPORTED	Not supported
+	@retval HA_ALTER_INPLACE_NO_LOCK	Supported
+	@retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE
+						Supported, but requires lock
+						during main phase and exclusive
+						lock during prepare phase.
+	@retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE
+						Supported, prepare phase
+						requires exclusive lock.
+	*/
+	enum_alter_inplace_result check_if_supported_inplace_alter(
+		TABLE*			altered_table,
+		Alter_inplace_info*	ha_alter_info);
+	/** Allows InnoDB to update internal structures with concurrent
+	writes blocked (provided that check_if_supported_inplace_alter()
+	did not return HA_ALTER_INPLACE_NO_LOCK).
+	This will be invoked before inplace_alter_table().
+
+	@param altered_table	TABLE object for new version of table.
+	@param ha_alter_info	Structure describing changes to be done
+	by ALTER TABLE and holding data used during in-place alter.
+
+	@retval true		Failure
+	@retval false		Success
+	*/
+	bool prepare_inplace_alter_table(
+		TABLE*			altered_table,
+		Alter_inplace_info*	ha_alter_info);
+
+	/** Alter the table structure in-place with operations
+	specified using HA_ALTER_FLAGS and Alter_inplace_information.
+	The level of concurrency allowed during this operation depends
+	on the return value from check_if_supported_inplace_alter().
+
+	@param altered_table	TABLE object for new version of table.
+	@param ha_alter_info	Structure describing changes to be done
+	by ALTER TABLE and holding data used during in-place alter.
+
+	@retval true		Failure
+	@retval false		Success
+	*/
+	bool inplace_alter_table(
+		TABLE*			altered_table,
+		Alter_inplace_info*	ha_alter_info);
+
+	/** Commit or rollback the changes made during
+	prepare_inplace_alter_table() and inplace_alter_table() inside
+	the storage engine. Note that the allowed level of concurrency
+	during this operation will be the same as for
+	inplace_alter_table() and thus might be higher than during
+	prepare_inplace_alter_table(). (E.g concurrent writes were
+	blocked during prepare, but might not be during commit).
+	@param altered_table	TABLE object for new version of table.
+	@param ha_alter_info	Structure describing changes to be done
+	by ALTER TABLE and holding data used during in-place alter.
+	@param commit		true => Commit, false => Rollback.
+	@retval true		Failure
+	@retval false		Success
+	*/
+	bool commit_inplace_alter_table(
+		TABLE*			altered_table,
+		Alter_inplace_info*	ha_alter_info,
+		bool			commit);
 	/** @} */
 	bool check_if_incompatible_data(HA_CREATE_INFO *info,
 					uint table_changes);
@@ -241,6 +304,8 @@ private:
 	@see build_template() */
 	inline void reset_template();
 
+	int info_low(uint, bool);
+
 public:
 	/** @name Multi Range Read interface @{ */
 	/** Initialize multi range read @see DsMrr_impl::dsmrr_init
@@ -283,15 +348,12 @@ public:
 	* @param flags
 	* @param cost
 	*/
-	ha_rows multi_range_read_info(uint keyno,
-                                      uint n_ranges, uint keys,
-                                      uint key_parts,
-				      uint* bufsz, uint* mrr_mode,
+	ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+				      uint key_parts, uint* bufsz, uint* flags,
 				      Cost_estimate* cost);
 
-        int multi_range_read_explain_info(uint mrr_mode,
-                                          char *str, size_t size);
-
+        int multi_range_read_explain_info(uint mrr_mode, char *str,
+                                          size_t size);
 	/** Attempt to push down an index condition.
 	* @param[in] keyno	MySQL key number
 	* @param[in] idx_cond	Index condition to be checked
@@ -364,6 +426,27 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd);
 */
 bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd);
 
+/**
+  Gets information on the durability property requested by
+  a thread.
+  @param  thd   Thread handle
+  @return a durability property.
+*/
+enum durability_properties thd_get_durability_property(const MYSQL_THD thd);
+
+/** Get the auto_increment_offset auto_increment_increment.
+@param thd	Thread object
+@param off	auto_increment_offset
+@param inc	auto_increment_increment */
+void thd_get_autoinc(const MYSQL_THD thd, ulong* off, ulong* inc)
+__attribute__((nonnull));
+
+/** Is strict sql_mode set.
+@param thd	Thread object
+@return True if sql_mode has strict mode (all or trans), false otherwise.
+*/
+bool thd_is_strict_mode(const MYSQL_THD thd)
+__attribute__((nonnull));
 } /* extern "C" */
 
 /** Get the file name and position of the MySQL binlog corresponding to the
@@ -371,7 +454,7 @@ bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd);
  */
 extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
 
-typedef struct trx_struct trx_t;
+struct trx_t;
 
 extern const struct _ft_vft ft_vft_result;
 
@@ -379,23 +462,11 @@ extern const struct _ft_vft ft_vft_result;
 typedef struct new_ft_info
 {
 	struct _ft_vft		*please;
+	struct _ft_vft_ext	*could_you;
 	row_prebuilt_t*		ft_prebuilt;
 	fts_result_t*		ft_result;
 } NEW_FT_INFO;
 
-/********************************************************************//**
-@file handler/ha_innodb.h
-Converts an InnoDB error code to a MySQL error code and also tells to MySQL
-about a possible transaction rollback inside InnoDB caused by a lock wait
-timeout or a deadlock.
-@return	MySQL error code */
-int
-convert_error_code_to_mysql(
-/*========================*/
-	int		error,	/*!< in: InnoDB error code */
-	ulint		flags,	/*!< in: InnoDB table flags, or 0 */
-	MYSQL_THD	thd);	/*!< in: user thread handle or NULL */
-
 /*********************************************************************//**
 Allocates an InnoDB transaction for a MySQL handler object.
 @return	InnoDB transaction handle */
@@ -410,13 +481,50 @@ system default primary index name 'GEN_CLUST_INDEX'. If a name
 matches, this function pushes an warning message to the client,
 and returns true.
 @return true if the index name matches the reserved name */
+UNIV_INTERN
 bool
 innobase_index_name_is_reserved(
 /*============================*/
 	THD*		thd,		/*!< in/out: MySQL connection */
 	const KEY*	key_info,	/*!< in: Indexes to be created */
-	ulint		num_of_keys);	/*!< in: Number of indexes to
+	ulint		num_of_keys)	/*!< in: Number of indexes to
 					be created. */
+	__attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Determines InnoDB table flags.
+@retval true if successful, false if error */
+UNIV_INTERN
+bool
+innobase_table_flags(
+/*=================*/
+	const TABLE*		form,		/*!< in: table */
+	const HA_CREATE_INFO*	create_info,	/*!< in: information
+						on table columns and indexes */
+	THD*			thd,		/*!< in: connection */
+	bool			use_tablespace,	/*!< in: whether to create
+						outside system tablespace */
+	ulint*			flags,		/*!< out: DICT_TF flags */
+	ulint*			flags2)		/*!< out: DICT_TF2 flags */
+	__attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Validates the create options. We may build on this function
+in future. For now, it checks two specifiers:
+KEY_BLOCK_SIZE and ROW_FORMAT
+If innodb_strict_mode is not set then this function is a no-op
+@return	NULL if valid, string if not. */
+UNIV_INTERN
+const char*
+create_options_are_invalid(
+/*=======================*/
+	THD*		thd,		/*!< in: connection thread. */
+	TABLE*		form,		/*!< in: information on table
+					columns and indexes */
+	HA_CREATE_INFO*	create_info,	/*!< in: create info. */
+	bool		use_tablespace)	/*!< in: srv_file_per_table */
+	__attribute__((nonnull, warn_unused_result));
+
 /*********************************************************************//**
 Retrieve the FTS Relevance Ranking result for doc with doc_id
 of prebuilt->fts_doc_id
@@ -434,7 +542,7 @@ of prebuilt->fts_doc_id
 UNIV_INTERN
 float
 innobase_fts_find_ranking(
-/*==========================*/
+/*======================*/
 	FT_INFO*	fts_hdl,	/*!< in: FTS handler */
 	uchar*		record,		/*!< in: Unused */
 	uint		len);		/*!< in: Unused */
@@ -443,24 +551,20 @@ Free the memory for the FTS handler */
 UNIV_INTERN
 void
 innobase_fts_close_ranking(
-/*==========================*/
-	FT_INFO*	fts_hdl);	/*!< in: FTS handler */
-/*********************************************************************//**
-Free the memory for the FTS handler */
-void
-innobase_fts_close_ranking(
-/*==========================*/
-	FT_INFO*	fts_hdl);	/*!< in: FTS handler */
+/*=======================*/
+	FT_INFO*	fts_hdl)	/*!< in: FTS handler */
+	__attribute__((nonnull));
 /*****************************************************************//**
 Initialize the table FTS stopword list
-@return TRUE is succeed */
+@return TRUE if success */
 UNIV_INTERN
 ibool
 innobase_fts_load_stopword(
 /*=======================*/
 	dict_table_t*	table,		/*!< in: Table has the FTS */
 	trx_t*		trx,		/*!< in: transaction */
-	THD*		thd);		/*!< in: current thread */
+	THD*		thd)		/*!< in: current thread */
+	__attribute__((nonnull(1,3), warn_unused_result));
 
 /** Some defines for innobase_fts_check_doc_id_index() return value */
 enum fts_doc_id_index_enum {
@@ -472,15 +576,17 @@ enum fts_doc_id_index_enum {
 /*******************************************************************//**
 Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
 on the Doc ID column.
-@return FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index,
-FTS_INCORRECT_DOC_ID_INDEX if the FTS_DOC_ID index is of wrong format */
+@return the status of the FTS_DOC_ID index */
 UNIV_INTERN
 enum fts_doc_id_index_enum
 innobase_fts_check_doc_id_index(
 /*============================*/
-	dict_table_t*	table,		/*!< in: table definition */
-	ulint*		fts_doc_col_no);/*!< out: The column number for
-					Doc ID */
+	const dict_table_t*	table,		/*!< in: table definition */
+	const TABLE*		altered_table,	/*!< in: MySQL table
+						that is being altered */
+	ulint*			fts_doc_col_no)	/*!< out: The column number for
+						Doc ID */
+	__attribute__((warn_unused_result));
 
 /*******************************************************************//**
 Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
@@ -492,4 +598,59 @@ enum fts_doc_id_index_enum
 innobase_fts_check_doc_id_index_in_def(
 /*===================================*/
 	ulint		n_key,		/*!< in: Number of keys */
-	KEY*		key_info);	/*!< in: Key definition */
+	const KEY*	key_info)	/*!< in: Key definitions */
+	__attribute__((nonnull, warn_unused_result));
+
+/***********************************************************************
+@return version of the extended FTS API */
+uint
+innobase_fts_get_version();
+
+/***********************************************************************
+@return Which part of the extended FTS API is supported */
+ulonglong
+innobase_fts_flags();
+
+/***********************************************************************
+Find and Retrieve the FTS doc_id for the current result row
+@return the document ID */
+ulonglong
+innobase_fts_retrieve_docid(
+/*============================*/
+	FT_INFO_EXT*	fts_hdl);	/*!< in: FTS handler */
+
+/***********************************************************************
+Find and retrieve the size of the current result
+@return number of matching rows */
+ulonglong
+innobase_fts_count_matches(
+/*============================*/
+	FT_INFO_EXT*	fts_hdl);	/*!< in: FTS handler */
+
+/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
+system clustered index when there is no primary key. */
+extern const char innobase_index_reserve_name[];
+
+/*********************************************************************//**
+Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
+Those flags are stored in .frm file and end up in the MySQL table object,
+but are frequently used inside InnoDB so we keep their copies into the
+InnoDB table object. */
+UNIV_INTERN
+void
+innobase_copy_frm_flags_from_create_info(
+/*=====================================*/
+	dict_table_t*	innodb_table,		/*!< in/out: InnoDB table */
+	HA_CREATE_INFO*	create_info);		/*!< in: create info */
+
+/*********************************************************************//**
+Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
+Those flags are stored in .frm file and end up in the MySQL table object,
+but are frequently used inside InnoDB so we keep their copies into the
+InnoDB table object. */
+UNIV_INTERN
+void
+innobase_copy_frm_flags_from_table_share(
+/*=====================================*/
+	dict_table_t*	innodb_table,		/*!< in/out: InnoDB table */
+	TABLE_SHARE*	table_share);		/*!< in: table share */
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 1468bc79c04..437443979c0 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -23,11 +23,20 @@ Smart ALTER TABLE
 
 #include <unireg.h>
 #include <mysqld_error.h>
-#include <sql_lex.h>                            // SQLCOM_CREATE_INDEX
+#include <log.h>
+#include <debug_sync.h>
 #include <innodb_priv.h>
+#include <sql_alter.h>
+#include <sql_class.h>
 
+#include "dict0crea.h"
+#include "dict0dict.h"
+#include "dict0priv.h"
 #include "dict0stats.h"
+#include "dict0stats_bg.h"
 #include "log0log.h"
+#include "rem0types.h"
+#include "row0log.h"
 #include "row0merge.h"
 #include "srv0srv.h"
 #include "trx0trx.h"
@@ -36,9 +45,995 @@ Smart ALTER TABLE
 #include "handler0alter.h"
 #include "srv0mon.h"
 #include "fts0priv.h"
+#include "pars0pars.h"
 
 #include "ha_innodb.h"
 
+/** Operations for creating an index in place */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ONLINE_CREATE
+	= Alter_inplace_info::ADD_INDEX
+	| Alter_inplace_info::ADD_UNIQUE_INDEX;
+
+/** Operations for rebuilding a table in place */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_REBUILD
+	= Alter_inplace_info::ADD_PK_INDEX
+	| Alter_inplace_info::DROP_PK_INDEX
+	| Alter_inplace_info::CHANGE_CREATE_OPTION
+	| Alter_inplace_info::ALTER_COLUMN_NULLABLE
+	| Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE
+	| Alter_inplace_info::ALTER_COLUMN_ORDER
+	| Alter_inplace_info::DROP_COLUMN
+	| Alter_inplace_info::ADD_COLUMN
+	/*
+	| Alter_inplace_info::ALTER_COLUMN_TYPE
+	| Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
+	*/
+	;
+
+/** Operations for creating indexes or rebuilding a table */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_CREATE
+	= INNOBASE_ONLINE_CREATE | INNOBASE_INPLACE_REBUILD;
+
+/** Operations for altering a table that InnoDB does not care about */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_IGNORE
+	= Alter_inplace_info::ALTER_COLUMN_DEFAULT
+	| Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT
+	| Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE
+	| Alter_inplace_info::ALTER_RENAME;
+
+/** Operations that InnoDB can perform online */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ONLINE_OPERATIONS
+	= INNOBASE_INPLACE_IGNORE
+	| INNOBASE_ONLINE_CREATE
+	| Alter_inplace_info::DROP_INDEX
+	| Alter_inplace_info::DROP_UNIQUE_INDEX
+	| Alter_inplace_info::DROP_FOREIGN_KEY
+	| Alter_inplace_info::ALTER_COLUMN_NAME
+	| Alter_inplace_info::ADD_FOREIGN_KEY;
+
+/* Report an InnoDB error to the client by invoking my_error(). */
+static UNIV_COLD __attribute__((nonnull))
+void
+my_error_innodb(
+/*============*/
+	dberr_t		error,	/*!< in: InnoDB error code */
+	const char*	table,	/*!< in: table name */
+	ulint		flags)	/*!< in: table flags */
+{
+	switch (error) {
+	case DB_MISSING_HISTORY:
+		my_error(ER_TABLE_DEF_CHANGED, MYF(0));
+		break;
+	case DB_RECORD_NOT_FOUND:
+		my_error(ER_KEY_NOT_FOUND, MYF(0), table);
+		break;
+	case DB_DEADLOCK:
+		my_error(ER_LOCK_DEADLOCK, MYF(0));
+		break;
+	case DB_LOCK_WAIT_TIMEOUT:
+		my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0));
+		break;
+	case DB_INTERRUPTED:
+		my_error(ER_QUERY_INTERRUPTED, MYF(0));
+		break;
+	case DB_OUT_OF_MEMORY:
+		my_error(ER_OUT_OF_RESOURCES, MYF(0));
+		break;
+	case DB_OUT_OF_FILE_SPACE:
+		my_error(ER_RECORD_FILE_FULL, MYF(0), table);
+		break;
+	case DB_TOO_BIG_INDEX_COL:
+		my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
+			 DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
+		break;
+	case DB_TOO_MANY_CONCURRENT_TRXS:
+		my_error(ER_TOO_MANY_CONCURRENT_TRXS, MYF(0));
+		break;
+	case DB_LOCK_TABLE_FULL:
+		my_error(ER_LOCK_TABLE_FULL, MYF(0));
+		break;
+	case DB_UNDO_RECORD_TOO_BIG:
+		my_error(ER_UNDO_RECORD_TOO_BIG, MYF(0));
+		break;
+	case DB_CORRUPTION:
+		my_error(ER_NOT_KEYFILE, MYF(0), table);
+		break;
+	case DB_TOO_BIG_RECORD:
+		my_error(ER_TOO_BIG_ROWSIZE, MYF(0),
+			 page_get_free_space_of_empty(
+				 flags & DICT_TF_COMPACT) / 2);
+		break;
+	case DB_INVALID_NULL:
+		/* TODO: report the row, as we do for DB_DUPLICATE_KEY */
+		my_error(ER_INVALID_USE_OF_NULL, MYF(0));
+		break;
+#ifdef UNIV_DEBUG
+	case DB_SUCCESS:
+	case DB_DUPLICATE_KEY:
+	case DB_TABLESPACE_EXISTS:
+	case DB_ONLINE_LOG_TOO_BIG:
+		/* These codes should not be passed here. */
+		ut_error;
+#endif /* UNIV_DEBUG */
+	default:
+		my_error(ER_GET_ERRNO, MYF(0), error);
+		break;
+	}
+}
+
+/** Determine if fulltext indexes exist in a given table.
+@param table_share	MySQL table
+@return			whether fulltext indexes exist on the table */
+static
+bool
+innobase_fulltext_exist(
+/*====================*/
+	const TABLE_SHARE*	table_share)
+{
+	for (uint i = 0; i < table_share->keys; i++) {
+		if (table_share->key_info[i].flags & HA_FULLTEXT) {
+			return(true);
+		}
+	}
+
+	return(false);
+}
+
+/*******************************************************************//**
+Determine if ALTER TABLE needs to rebuild the table.
+@param ha_alter_info		the DDL operation
+@return whether it is necessary to rebuild the table */
+static __attribute__((nonnull, warn_unused_result))
+bool
+innobase_need_rebuild(
+/*==================*/
+	const Alter_inplace_info*	ha_alter_info)
+{
+	if (ha_alter_info->handler_flags
+	    == Alter_inplace_info::CHANGE_CREATE_OPTION
+	    && !(ha_alter_info->create_info->used_fields
+		 & (HA_CREATE_USED_ROW_FORMAT
+		    | HA_CREATE_USED_KEY_BLOCK_SIZE))) {
+		/* Any other CHANGE_CREATE_OPTION than changing
+		ROW_FORMAT or KEY_BLOCK_SIZE is ignored. */
+		return(false);
+	}
+
+	return(!!(ha_alter_info->handler_flags & INNOBASE_INPLACE_REBUILD));
+}
+
+/** Check if InnoDB supports a particular alter table in-place
+@param altered_table	TABLE object for new version of table.
+@param ha_alter_info	Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+
+@retval HA_ALTER_INPLACE_NOT_SUPPORTED	Not supported
+@retval HA_ALTER_INPLACE_NO_LOCK	Supported
+@retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE Supported, but requires
+lock during main phase and exclusive lock during prepare phase.
+@retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE	Supported, prepare phase
+requires exclusive lock (any transactions that have accessed the table
+must commit or roll back first, and no transactions can access the table
+while prepare_inplace_alter_table() is executing)
+*/
+UNIV_INTERN
+enum_alter_inplace_result
+ha_innobase::check_if_supported_inplace_alter(
+/*==========================================*/
+	TABLE*			altered_table,
+	Alter_inplace_info*	ha_alter_info)
+{
+	DBUG_ENTER("check_if_supported_inplace_alter");
+
+	if (srv_read_only_mode) {
+		ha_alter_info->unsupported_reason =
+			innobase_get_err_msg(ER_READ_ONLY_MODE);
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	} else if (srv_created_new_raw || srv_force_recovery) {
+		ha_alter_info->unsupported_reason =
+			innobase_get_err_msg(ER_READ_ONLY_MODE);
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+
+	if (altered_table->s->fields > REC_MAX_N_USER_FIELDS) {
+		/* Deny the inplace ALTER TABLE. MySQL will try to
+		re-create the table and ha_innobase::create() will
+		return an error too. This is how we effectively
+		deny adding too many columns to a table. */
+		ha_alter_info->unsupported_reason =
+			innobase_get_err_msg(ER_TOO_MANY_FIELDS);
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+
+	update_thd();
+	trx_search_latch_release_if_reserved(prebuilt->trx);
+
+	if (ha_alter_info->handler_flags
+	    & ~(INNOBASE_ONLINE_OPERATIONS | INNOBASE_INPLACE_REBUILD)) {
+		if (ha_alter_info->handler_flags
+			& (Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
+			   | Alter_inplace_info::ALTER_COLUMN_TYPE))
+			ha_alter_info->unsupported_reason = innobase_get_err_msg(
+				ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_COLUMN_TYPE);
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+
+	/* Only support online add foreign key constraint when
+	check_foreigns is turned off */
+	if ((ha_alter_info->handler_flags
+	     & Alter_inplace_info::ADD_FOREIGN_KEY)
+	    && prebuilt->trx->check_foreigns) {
+		ha_alter_info->unsupported_reason = innobase_get_err_msg(
+			ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_CHECK);
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+
+	if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
+		DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
+	}
+
+	/* Only support NULL -> NOT NULL change if strict table sql_mode
+	is set. Fall back to COPY for conversion if not strict tables.
+	In-Place will fail with an error when trying to convert
+	NULL to a NOT NULL value. */
+	if ((ha_alter_info->handler_flags
+	     & Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE)
+	    && !thd_is_strict_mode(user_thd)) {
+		ha_alter_info->unsupported_reason = innobase_get_err_msg(
+			ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL);
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+
+	/* InnoDB cannot IGNORE when creating unique indexes. IGNORE
+	should silently delete some duplicate rows. Our inplace_alter
+	code will not delete anything from existing indexes. */
+	if (ha_alter_info->ignore
+	    && (ha_alter_info->handler_flags
+		& (Alter_inplace_info::ADD_PK_INDEX
+		   | Alter_inplace_info::ADD_UNIQUE_INDEX))) {
+		ha_alter_info->unsupported_reason = innobase_get_err_msg(
+			ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_IGNORE);
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+
+	/* DROP PRIMARY KEY is only allowed in combination with ADD
+	PRIMARY KEY. */
+	if ((ha_alter_info->handler_flags
+	     & (Alter_inplace_info::ADD_PK_INDEX
+		| Alter_inplace_info::DROP_PK_INDEX))
+	    == Alter_inplace_info::DROP_PK_INDEX) {
+		ha_alter_info->unsupported_reason = innobase_get_err_msg(
+			ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOPK);
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+
+	/* ADD FOREIGN KEY does not currently work properly in combination
+	with renaming columns. (Bug#14105491) */
+	if ((ha_alter_info->handler_flags
+	     & (Alter_inplace_info::ADD_FOREIGN_KEY
+		| Alter_inplace_info::ALTER_COLUMN_NAME))
+	    == (Alter_inplace_info::ADD_FOREIGN_KEY
+		| Alter_inplace_info::ALTER_COLUMN_NAME)) {
+		ha_alter_info->unsupported_reason = innobase_get_err_msg(
+			ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_RENAME);
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+
+	/* DROP FOREIGN KEY may not currently work properly in combination
+	with other operations. (Work-around for 5.6.10 only.) */
+	if ((ha_alter_info->handler_flags
+	     & Alter_inplace_info::DROP_FOREIGN_KEY)
+	    && (ha_alter_info->handler_flags
+		& (Alter_inplace_info::DROP_FOREIGN_KEY
+		   | INNOBASE_INPLACE_REBUILD))
+	    != Alter_inplace_info::DROP_FOREIGN_KEY) {
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+
+	/* If a column change from NOT NULL to NULL,
+	and there's a implict pk on this column. the
+	table should be rebuild. The change should
+	only go through the "Copy" method.*/
+	if ((ha_alter_info->handler_flags
+	     & Alter_inplace_info::ALTER_COLUMN_NULLABLE)) {
+		uint primary_key = altered_table->s->primary_key;
+
+		/* See if MYSQL table has no pk but we do.*/
+		if (UNIV_UNLIKELY(primary_key >= MAX_KEY)
+		    && !row_table_got_default_clust_index(prebuilt->table)) {
+			ha_alter_info->unsupported_reason = innobase_get_err_msg(
+				ER_PRIMARY_CANT_HAVE_NULL);
+			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+		}
+	}
+
+	/* We should be able to do the operation in-place.
+	See if we can do it online (LOCK=NONE). */
+	bool	online = true;
+
+	List_iterator_fast<Create_field> cf_it(
+		ha_alter_info->alter_info->create_list);
+
+	/* Fix the key parts. */
+	for (KEY* new_key = ha_alter_info->key_info_buffer;
+	     new_key < ha_alter_info->key_info_buffer
+		     + ha_alter_info->key_count;
+	     new_key++) {
+		for (KEY_PART_INFO* key_part = new_key->key_part;
+		     key_part < new_key->key_part + new_key->user_defined_key_parts;
+		     key_part++) {
+			const Create_field*	new_field;
+
+			DBUG_ASSERT(key_part->fieldnr
+				    < altered_table->s->fields);
+
+			cf_it.rewind();
+			for (uint fieldnr = 0; (new_field = cf_it++);
+			     fieldnr++) {
+				if (fieldnr == key_part->fieldnr) {
+					break;
+				}
+			}
+
+			DBUG_ASSERT(new_field);
+
+			key_part->field = altered_table->field[
+				key_part->fieldnr];
+			/* In some special cases InnoDB emits "false"
+			duplicate key errors with NULL key values. Let
+			us play safe and ensure that we can correctly
+			print key values even in such cases .*/
+			key_part->null_offset = key_part->field->null_offset();
+			key_part->null_bit = key_part->field->null_bit;
+
+			if (new_field->field) {
+				/* This is an existing column. */
+				continue;
+			}
+
+			/* This is an added column. */
+			DBUG_ASSERT(ha_alter_info->handler_flags
+				    & Alter_inplace_info::ADD_COLUMN);
+
+			/* We cannot replace a hidden FTS_DOC_ID
+			with a user-visible FTS_DOC_ID. */
+			if (prebuilt->table->fts
+			    && innobase_fulltext_exist(altered_table->s)
+			    && !my_strcasecmp(
+				    system_charset_info,
+				    key_part->field->field_name,
+				    FTS_DOC_ID_COL_NAME)) {
+				ha_alter_info->unsupported_reason = innobase_get_err_msg(
+					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_HIDDEN_FTS);
+				DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+			}
+
+			DBUG_ASSERT((MTYP_TYPENR(key_part->field->unireg_check)
+				     == Field::NEXT_NUMBER)
+				    == !!(key_part->field->flags
+					  & AUTO_INCREMENT_FLAG));
+
+			if (key_part->field->flags & AUTO_INCREMENT_FLAG) {
+				/* We cannot assign an AUTO_INCREMENT
+				column values during online ALTER. */
+				DBUG_ASSERT(key_part->field == altered_table
+					    -> found_next_number_field);
+				ha_alter_info->unsupported_reason = innobase_get_err_msg(
+					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_AUTOINC);
+				online = false;
+			}
+		}
+	}
+
+	DBUG_ASSERT(!prebuilt->table->fts || prebuilt->table->fts->doc_col
+		    <= table->s->fields);
+	DBUG_ASSERT(!prebuilt->table->fts || prebuilt->table->fts->doc_col
+		    < dict_table_get_n_user_cols(prebuilt->table));
+
+	if (prebuilt->table->fts
+	    && innobase_fulltext_exist(altered_table->s)) {
+		/* FULLTEXT indexes are supposed to remain. */
+		/* Disallow DROP INDEX FTS_DOC_ID_INDEX */
+
+		for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
+			if (!my_strcasecmp(
+				    system_charset_info,
+				    ha_alter_info->index_drop_buffer[i]->name,
+				    FTS_DOC_ID_INDEX_NAME)) {
+				ha_alter_info->unsupported_reason = innobase_get_err_msg(
+					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_CHANGE_FTS);
+				DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+			}
+		}
+
+		/* InnoDB can have a hidden FTS_DOC_ID_INDEX on a
+		visible FTS_DOC_ID column as well. Prevent dropping or
+		renaming the FTS_DOC_ID. */
+
+		for (Field** fp = table->field; *fp; fp++) {
+			if (!((*fp)->flags
+			      & (FIELD_IS_RENAMED | FIELD_IS_DROPPED))) {
+				continue;
+			}
+
+			if (!my_strcasecmp(
+				    system_charset_info,
+				    (*fp)->field_name,
+				    FTS_DOC_ID_COL_NAME)) {
+				ha_alter_info->unsupported_reason = innobase_get_err_msg(
+					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_CHANGE_FTS);
+				DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+			}
+		}
+	}
+
+	prebuilt->trx->will_lock++;
+
+	if (!online) {
+		/* We already determined that only a non-locking
+		operation is possible. */
+	} else if (((ha_alter_info->handler_flags
+		     & Alter_inplace_info::ADD_PK_INDEX)
+		    || innobase_need_rebuild(ha_alter_info))
+		   && (innobase_fulltext_exist(altered_table->s)
+		       || (prebuilt->table->flags2
+			   & DICT_TF2_FTS_HAS_DOC_ID))) {
+		/* Refuse to rebuild the table online, if
+		fulltext indexes are to survive the rebuild,
+		or if the table contains a hidden FTS_DOC_ID column. */
+		online = false;
+		/* If the table already contains fulltext indexes,
+		refuse to rebuild the table natively altogether. */
+		if (prebuilt->table->fts) {
+			ha_alter_info->unsupported_reason = innobase_get_err_msg(
+				ER_INNODB_FT_LIMIT);
+			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+		}
+		ha_alter_info->unsupported_reason = innobase_get_err_msg(
+			ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS);
+	} else if ((ha_alter_info->handler_flags
+		    & Alter_inplace_info::ADD_INDEX)) {
+		/* Building a full-text index requires a lock.
+		We could do without a lock if the table already contains
+		an FTS_DOC_ID column, but in that case we would have
+		to apply the modification log to the full-text indexes. */
+
+		for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
+			const KEY* key =
+				&ha_alter_info->key_info_buffer[
+					ha_alter_info->index_add_buffer[i]];
+			if (key->flags & HA_FULLTEXT) {
+				DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
+					      & ~(HA_FULLTEXT
+						  | HA_PACK_KEY
+						  | HA_GENERATED_KEY
+						  | HA_BINARY_PACK_KEY)));
+				ha_alter_info->unsupported_reason = innobase_get_err_msg(
+					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS);
+				online = false;
+				break;
+			}
+		}
+	}
+
+	DBUG_RETURN(online
+		    ? HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE
+		    : HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE);
+}
+
+/*************************************************************//**
+Initialize the dict_foreign_t structure with supplied info
+@return true if added, false if duplicate foreign->id */
+static __attribute__((nonnull(1,3,5,7)))
+bool
+innobase_init_foreign(
+/*==================*/
+	dict_foreign_t*	foreign,		/*!< in/out: structure to
+						initialize */
+	char*		constraint_name,	/*!< in/out: constraint name if
+						exists */
+	dict_table_t*	table,			/*!< in: foreign table */
+	dict_index_t*	index,			/*!< in: foreign key index */
+	const char**	column_names,		/*!< in: foreign key column
+						names */
+	ulint		num_field,		/*!< in: number of columns */
+	const char*	referenced_table_name,	/*!< in: referenced table
+						name */
+	dict_table_t*	referenced_table,	/*!< in: referenced table */
+	dict_index_t*	referenced_index,	/*!< in: referenced index */
+	const char**	referenced_column_names,/*!< in: referenced column
+						names */
+	ulint		referenced_num_field)	/*!< in: number of referenced
+						columns */
+{
+        if (constraint_name) {
+                ulint   db_len;
+
+                /* Catenate 'databasename/' to the constraint name specified
+                by the user: we conceive the constraint as belonging to the
+                same MySQL 'database' as the table itself. We store the name
+                to foreign->id. */
+
+                db_len = dict_get_db_name_len(table->name);
+
+                foreign->id = static_cast<char*>(mem_heap_alloc(
+                        foreign->heap, db_len + strlen(constraint_name) + 2));
+
+                ut_memcpy(foreign->id, table->name, db_len);
+                foreign->id[db_len] = '/';
+                strcpy(foreign->id + db_len + 1, constraint_name);
+        }
+
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	/* Check if any existing foreign key has the same id */
+
+	for (const dict_foreign_t* existing_foreign
+		= UT_LIST_GET_FIRST(table->foreign_list);
+	     existing_foreign != 0;
+	     existing_foreign = UT_LIST_GET_NEXT(
+		     foreign_list, existing_foreign)) {
+
+		if (ut_strcmp(existing_foreign->id, foreign->id) == 0) {
+			return(false);
+		}
+	}
+
+        foreign->foreign_table = table;
+        foreign->foreign_table_name = mem_heap_strdup(
+                foreign->heap, table->name);
+        dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
+
+        foreign->foreign_index = index;
+        foreign->n_fields = (unsigned int) num_field;
+
+        foreign->foreign_col_names = static_cast<const char**>(
+                mem_heap_alloc(foreign->heap, num_field * sizeof(void*)));
+
+        for (ulint i = 0; i < foreign->n_fields; i++) {
+                foreign->foreign_col_names[i] = mem_heap_strdup(
+                        foreign->heap, column_names[i]);
+        }
+
+	foreign->referenced_index = referenced_index;
+	foreign->referenced_table = referenced_table;
+
+	foreign->referenced_table_name = mem_heap_strdup(
+		foreign->heap, referenced_table_name);
+        dict_mem_referenced_table_name_lookup_set(foreign, TRUE);
+
+        foreign->referenced_col_names = static_cast<const char**>(
+                mem_heap_alloc(foreign->heap,
+			       referenced_num_field * sizeof(void*)));
+
+        for (ulint i = 0; i < foreign->n_fields; i++) {
+                foreign->referenced_col_names[i]
+                        = mem_heap_strdup(foreign->heap,
+					  referenced_column_names[i]);
+        }
+
+	return(true);
+}
+
+/*************************************************************//**
+Check whether the foreign key options is legit
+@return true if it is */
+static __attribute__((nonnull, warn_unused_result))
+bool
+innobase_check_fk_option(
+/*=====================*/
+	dict_foreign_t*	foreign)	/*!< in:InnoDB Foreign key */
+{
+	if (foreign->type & (DICT_FOREIGN_ON_UPDATE_SET_NULL
+			     | DICT_FOREIGN_ON_DELETE_SET_NULL)
+	    && foreign->foreign_index) {
+
+		for (ulint j = 0; j < foreign->n_fields; j++) {
+			if ((dict_index_get_nth_col(
+				foreign->foreign_index, j)->prtype)
+				& DATA_NOT_NULL) {
+
+				/* It is not sensible to define
+				SET NULL if the column is not
+				allowed to be NULL! */
+				return(false);
+			}
+		}
+	}
+
+	return(true);
+}
+
+/*************************************************************//**
+Set foreign key options
+@return true if successfully set */
+static __attribute__((nonnull, warn_unused_result))
+bool
+innobase_set_foreign_key_option(
+/*============================*/
+	dict_foreign_t*	foreign,	/*!< in:InnoDB Foreign key */
+	Foreign_key*	fk_key)		/*!< in: Foreign key info from
+					MySQL */
+{
+	ut_ad(!foreign->type);
+
+	switch (fk_key->delete_opt) {
+	case Foreign_key::FK_OPTION_NO_ACTION:
+	case Foreign_key::FK_OPTION_RESTRICT:
+	case Foreign_key::FK_OPTION_DEFAULT:
+		foreign->type = DICT_FOREIGN_ON_DELETE_NO_ACTION;
+		break;
+	case Foreign_key::FK_OPTION_CASCADE:
+		foreign->type = DICT_FOREIGN_ON_DELETE_CASCADE;
+		break;
+	case Foreign_key::FK_OPTION_SET_NULL:
+		foreign->type = DICT_FOREIGN_ON_DELETE_SET_NULL;
+		break;
+	}
+
+	switch (fk_key->update_opt) {
+	case Foreign_key::FK_OPTION_NO_ACTION:
+	case Foreign_key::FK_OPTION_RESTRICT:
+	case Foreign_key::FK_OPTION_DEFAULT:
+		foreign->type |= DICT_FOREIGN_ON_UPDATE_NO_ACTION;
+		break;
+	case Foreign_key::FK_OPTION_CASCADE:
+		foreign->type |= DICT_FOREIGN_ON_UPDATE_CASCADE;
+		break;
+	case Foreign_key::FK_OPTION_SET_NULL:
+		foreign->type |= DICT_FOREIGN_ON_UPDATE_SET_NULL;
+		break;
+	}
+
+	return(innobase_check_fk_option(foreign));
+}
+
+/*******************************************************************//**
+Check if a foreign key constraint can make use of an index
+that is being created.
+@return	useable index, or NULL if none found */
+static __attribute__((nonnull, warn_unused_result))
+const KEY*
+innobase_find_equiv_index(
+/*======================*/
+	const char*const*	col_names,
+					/*!< in: column names */
+	uint			n_cols,	/*!< in: number of columns */
+	const KEY*		keys,	/*!< in: index information */
+	const uint*		add,	/*!< in: indexes being created */
+	uint			n_add)	/*!< in: number of indexes to create */
+{
+	for (uint i = 0; i < n_add; i++) {
+		const KEY*	key = &keys[add[i]];
+
+		if (key->user_defined_key_parts < n_cols) {
+no_match:
+			continue;
+		}
+
+		for (uint j = 0; j < n_cols; j++) {
+			const KEY_PART_INFO&	key_part = key->key_part[j];
+			uint32			col_len
+				= key_part.field->pack_length();
+
+			/* The MySQL pack length contains 1 or 2 bytes
+			length field for a true VARCHAR. */
+
+			if (key_part.field->type() == MYSQL_TYPE_VARCHAR) {
+				col_len -= static_cast<const Field_varstring*>(
+					key_part.field)->length_bytes;
+			}
+
+			if (key_part.length < col_len) {
+
+				/* Column prefix indexes cannot be
+				used for FOREIGN KEY constraints. */
+				goto no_match;
+			}
+
+			if (innobase_strcasecmp(col_names[j],
+						key_part.field->field_name)) {
+				/* Name mismatch */
+				goto no_match;
+			}
+		}
+
+		return(key);
+	}
+
+	return(NULL);
+}
+
+/*************************************************************//**
+Found an index whose first fields are the columns in the array
+in the same order and is not marked for deletion
+@return matching index, NULL if not found */
+static
+dict_index_t*
+innobase_find_fk_index(
+/*===================*/
+	Alter_inplace_info*	ha_alter_info,
+					/*!< in: alter table info */
+	dict_table_t*		table,	/*!< in: table */
+	const char**		columns,/*!< in: array of column names */
+	ulint			n_cols) /*!< in: number of columns */
+
+{
+        dict_index_t*	index;
+        dict_index_t*	found_index = NULL;
+
+        index = dict_table_get_first_index(table);
+
+        while (index != NULL) {
+                if (index->type & DICT_FTS) {
+                        goto next_rec;
+		} else if (dict_foreign_qualify_index(
+			table, columns, n_cols, index, NULL, TRUE, FALSE)) {
+			/* Check if this index is in the drop list */
+			if (index) {
+				KEY**	drop_key;
+
+				drop_key = ha_alter_info->index_drop_buffer;
+
+				for (uint i = 0;
+				     i < ha_alter_info->index_drop_count;
+				     i++) {
+					if (innobase_strcasecmp(
+						drop_key[i]->name,
+						index->name) == 0) {
+						goto next_rec;
+					}
+				}
+			}
+
+			found_index = index;
+			break;
+		}
+
+next_rec:
+                index = dict_table_get_next_index(index);
+	}
+
+	return(found_index);
+}
+
+/*************************************************************//**
+Create InnoDB foreign key structure from MySQL alter_info
+@retval true if successful
+@retval false on error (will call my_error()) */
+static
+bool
+innobase_get_foreign_key_info(
+/*==========================*/
+	Alter_inplace_info*
+			ha_alter_info,	/*!< in: alter table info */
+	const TABLE_SHARE*
+			table_share,	/*!< in: the TABLE_SHARE */
+	dict_table_t*	table,		/*!< in: table */
+	dict_foreign_t**add_fk,		/*!< out: foreign constraint added */
+	ulint*		n_add_fk,	/*!< out: number of foreign
+					constraints added */
+	mem_heap_t*	heap,		/*!< in: memory heap */
+	const trx_t*	trx)		/*!< in: user transaction */
+{
+	Key*		key;
+	Foreign_key*	fk_key;
+	ulint		i = 0;
+	dict_table_t*	referenced_table = NULL;
+	char*		referenced_table_name = NULL;
+	ulint		num_fk = 0;
+	Alter_info*	alter_info = ha_alter_info->alter_info;
+
+	*n_add_fk = 0;
+
+	List_iterator<Key> key_iterator(alter_info->key_list);
+
+	while ((key=key_iterator++)) {
+		if (key->type == Key::FOREIGN_KEY) {
+			const char*	column_names[MAX_NUM_FK_COLUMNS];
+			dict_index_t*	index = NULL;
+			const char*	referenced_column_names[MAX_NUM_FK_COLUMNS];
+			dict_index_t*	referenced_index = NULL;
+			ulint		num_col = 0;
+			ulint		referenced_num_col = 0;
+			bool		correct_option;
+			char*		db_namep = NULL;
+			char*		tbl_namep = NULL;
+			ulint		db_name_len = 0;
+			ulint		tbl_name_len = 0;
+#ifdef __WIN__
+			char		db_name[MAX_DATABASE_NAME_LEN];
+			char		tbl_name[MAX_TABLE_NAME_LEN];
+#endif
+
+			fk_key= static_cast<Foreign_key*>(key);
+
+			if (fk_key->columns.elements > 0) {
+				Key_part_spec* column;
+				List_iterator<Key_part_spec> key_part_iterator(
+					fk_key->columns);
+
+				/* Get all the foreign key column info for the
+				current table */
+				while ((column = key_part_iterator++)) {
+					column_names[i] =
+						 column->field_name.str;
+					ut_ad(i < MAX_NUM_FK_COLUMNS);
+					i++;
+				}
+
+				index = innobase_find_fk_index(
+					ha_alter_info, table, column_names, i);
+
+				/* MySQL would add a index in the creation
+				list if no such index for foreign table,
+				so we have to use DBUG_EXECUTE_IF to simulate
+				the scenario */
+				DBUG_EXECUTE_IF("innodb_test_no_foreign_idx",
+						index = NULL;);
+
+				/* Check whether there exist such
+				index in the the index create clause */
+				if (!index && !innobase_find_equiv_index(
+					column_names, i,
+					ha_alter_info->key_info_buffer,
+					ha_alter_info->index_add_buffer,
+					ha_alter_info->index_add_count)) {
+					my_error(
+						ER_FK_NO_INDEX_CHILD,
+						MYF(0),
+						fk_key->name.str,
+						table_share->table_name.str);
+					goto err_exit;
+				}
+
+				num_col = i;
+			}
+
+			add_fk[num_fk] = dict_mem_foreign_create();
+
+#ifndef __WIN__
+			tbl_namep = fk_key->ref_table.str;
+			tbl_name_len = fk_key->ref_table.length;
+			db_namep = fk_key->ref_db.str;
+			db_name_len = fk_key->ref_db.length;
+#else
+			ut_ad(fk_key->ref_table.str);
+
+			memcpy(tbl_name, fk_key->ref_table.str,
+			       fk_key->ref_table.length);
+			tbl_name[fk_key->ref_table.length] = 0;
+			innobase_casedn_str(tbl_name);
+			tbl_name_len = strlen(tbl_name);
+			tbl_namep = &tbl_name[0];
+
+			if (fk_key->ref_db.str != NULL) {
+				memcpy(db_name, fk_key->ref_db.str,
+				       fk_key->ref_db.length);
+				db_name[fk_key->ref_db.length] = 0;
+				innobase_casedn_str(db_name);
+				db_name_len = strlen(db_name);
+				db_namep = &db_name[0];
+			}
+#endif
+			mutex_enter(&dict_sys->mutex);
+
+			referenced_table_name = dict_get_referenced_table(
+				table->name,
+				db_namep,
+				db_name_len,
+				tbl_namep,
+				tbl_name_len,
+				&referenced_table,
+				add_fk[num_fk]->heap);
+
+			/* Test the case when referenced_table failed to
+			open, if trx->check_foreigns is not set, we should
+			still be able to add the foreign key */
+			DBUG_EXECUTE_IF("innodb_test_open_ref_fail",
+					referenced_table = NULL;);
+
+			if (!referenced_table && trx->check_foreigns) {
+				mutex_exit(&dict_sys->mutex);
+				my_error(ER_FK_CANNOT_OPEN_PARENT,
+					 MYF(0), tbl_namep);
+
+				goto err_exit;
+			}
+
+			i = 0;
+
+			if (fk_key->ref_columns.elements > 0) {
+				Key_part_spec* column;
+				List_iterator<Key_part_spec> key_part_iterator(
+					fk_key->ref_columns);
+
+				while ((column = key_part_iterator++)) {
+					referenced_column_names[i] =
+						 column->field_name.str;
+					ut_ad(i < MAX_NUM_FK_COLUMNS);
+					i++;
+				}
+
+				if (referenced_table) {
+					referenced_index =
+						dict_foreign_find_index(
+							referenced_table,
+							referenced_column_names,
+							i, NULL,
+							TRUE, FALSE);
+
+					DBUG_EXECUTE_IF(
+						"innodb_test_no_reference_idx",
+						referenced_index = NULL;);
+
+					/* Check whether there exist such
+					index in the the index create clause */
+					if (!referenced_index) {
+						mutex_exit(&dict_sys->mutex);
+						my_error(
+							ER_FK_NO_INDEX_PARENT,
+							MYF(0),
+							fk_key->name.str,
+							tbl_namep);
+						goto err_exit;
+					}
+				} else {
+					ut_a(!trx->check_foreigns);
+				}
+
+				referenced_num_col = i;
+			}
+
+			if (!innobase_init_foreign(
+				add_fk[num_fk], fk_key->name.str,
+				table, index, column_names,
+				num_col, referenced_table_name,
+				referenced_table, referenced_index,
+				referenced_column_names, referenced_num_col)) {
+					mutex_exit(&dict_sys->mutex);
+					my_error(
+						ER_FK_DUP_NAME,
+						MYF(0),
+						add_fk[num_fk]->id);
+					goto err_exit;
+			}
+
+			mutex_exit(&dict_sys->mutex);
+
+			correct_option = innobase_set_foreign_key_option(
+						add_fk[num_fk], fk_key);
+
+			DBUG_EXECUTE_IF("innodb_test_wrong_fk_option",
+					correct_option = false;);
+
+			if (!correct_option) {
+				my_error(ER_FK_INCORRECT_OPTION,
+					 MYF(0),
+					 table_share->table_name.str,
+					 add_fk[num_fk]->id);
+				goto err_exit;
+			}
+
+			num_fk++;
+			i = 0;
+		}
+
+	}
+
+	*n_add_fk = num_fk;
+
+	return(true);
+err_exit:
+	for (i = 0; i <= num_fk; i++) {
+		if (add_fk[i]) {
+			dict_foreign_free(add_fk[i]);
+		}
+	}
+
+	return(false);
+}
+
 /*************************************************************//**
 Copies an InnoDB column to a MySQL field.  This function is
 adapted from row_sel_field_store_in_mysql_format(). */
@@ -91,10 +1086,9 @@ innobase_col_to_mysql(
 		break;
 
 	case DATA_BLOB:
-		/* Store a pointer to the BLOB buffer to dest: the BLOB was
-		already copied to the buffer in row_sel_store_mysql_rec */
-
-		row_mysql_store_blob_ref(dest, flen, data, len);
+		/* Skip MySQL BLOBs when reporting an erroneous row
+		during index creation or table rebuild. */
+		field->set_null();
 		break;
 
 #ifdef UNIV_DEBUG
@@ -102,8 +1096,6 @@ innobase_col_to_mysql(
 		ut_ad(flen >= len);
 		ut_ad(DATA_MBMAXLEN(col->mbminmaxlen)
 		      >= DATA_MBMINLEN(col->mbminmaxlen));
-		ut_ad(DATA_MBMAXLEN(col->mbminmaxlen)
-		      > DATA_MBMINLEN(col->mbminmaxlen) || flen == len);
 		memcpy(dest, data, len);
 		break;
 
@@ -137,20 +1129,19 @@ UNIV_INTERN
 void
 innobase_rec_to_mysql(
 /*==================*/
-	TABLE*			table,		/*!< in/out: MySQL table */
-	const rec_t*		rec,		/*!< in: record */
-	const dict_index_t*	index,		/*!< in: index */
-	const ulint*		offsets)	/*!< in: rec_get_offsets(
-						rec, index, ...) */
+	struct TABLE*		table,	/*!< in/out: MySQL table */
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: index */
+	const ulint*		offsets)/*!< in: rec_get_offsets(
+					rec, index, ...) */
 {
 	uint	n_fields	= table->s->fields;
-	uint	i;
 
 	ut_ad(n_fields == dict_table_get_n_user_cols(index->table)
-	      || (DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_FTS_HAS_DOC_ID)
-		  && n_fields + 1 == dict_table_get_n_user_cols(index->table)));
+	      - !!(DICT_TF2_FLAG_IS_SET(index->table,
+					DICT_TF2_FTS_HAS_DOC_ID)));
 
-	for (i = 0; i < n_fields; i++) {
+	for (uint i = 0; i < n_fields; i++) {
 		Field*		field	= table->field[i];
 		ulint		ipos;
 		ulint		ilen;
@@ -160,7 +1151,8 @@ innobase_rec_to_mysql(
 
 		ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE);
 
-		if (UNIV_UNLIKELY(ipos == ULINT_UNDEFINED)) {
+		if (ipos == ULINT_UNDEFINED
+		    || rec_offs_nth_extern(offsets, ipos)) {
 null_field:
 			field->set_null();
 			continue;
@@ -184,6 +1176,85 @@ null_field:
 }
 
 /*************************************************************//**
+Copies an InnoDB index entry to table->record[0]. */
+UNIV_INTERN
+void
+innobase_fields_to_mysql(
+/*=====================*/
+	struct TABLE*		table,	/*!< in/out: MySQL table */
+	const dict_index_t*	index,	/*!< in: InnoDB index */
+	const dfield_t*		fields)	/*!< in: InnoDB index fields */
+{
+	uint	n_fields	= table->s->fields;
+
+	ut_ad(n_fields == dict_table_get_n_user_cols(index->table)
+	      - !!(DICT_TF2_FLAG_IS_SET(index->table,
+					DICT_TF2_FTS_HAS_DOC_ID)));
+
+	for (uint i = 0; i < n_fields; i++) {
+		Field*		field	= table->field[i];
+		ulint		ipos;
+
+		field->reset();
+
+		ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE);
+
+		if (ipos == ULINT_UNDEFINED
+		    || dfield_is_ext(&fields[ipos])
+		    || dfield_is_null(&fields[ipos])) {
+
+			field->set_null();
+		} else {
+			field->set_notnull();
+
+			const dfield_t*	df	= &fields[ipos];
+
+			innobase_col_to_mysql(
+				dict_field_get_col(
+					dict_index_get_nth_field(index, ipos)),
+				static_cast<const uchar*>(dfield_get_data(df)),
+				dfield_get_len(df), field);
+		}
+	}
+}
+
+/*************************************************************//**
+Copies an InnoDB row to table->record[0]. */
+UNIV_INTERN
+void
+innobase_row_to_mysql(
+/*==================*/
+	struct TABLE*		table,	/*!< in/out: MySQL table */
+	const dict_table_t*	itab,	/*!< in: InnoDB table */
+	const dtuple_t*		row)	/*!< in: InnoDB row */
+{
+	uint  n_fields	= table->s->fields;
+
+	/* The InnoDB row may contain an extra FTS_DOC_ID column at the end. */
+	ut_ad(row->n_fields == dict_table_get_n_cols(itab));
+	ut_ad(n_fields == row->n_fields - DATA_N_SYS_COLS
+	      - !!(DICT_TF2_FLAG_IS_SET(itab, DICT_TF2_FTS_HAS_DOC_ID)));
+
+	for (uint i = 0; i < n_fields; i++) {
+		Field*		field	= table->field[i];
+		const dfield_t*	df	= dtuple_get_nth_field(row, i);
+
+		field->reset();
+
+		if (dfield_is_ext(df) || dfield_is_null(df)) {
+			field->set_null();
+		} else {
+			field->set_notnull();
+
+			innobase_col_to_mysql(
+				dict_table_get_nth_col(itab, i),
+				static_cast<const uchar*>(dfield_get_data(df)),
+				dfield_get_len(df), field);
+		}
+	}
+}
+
+/*************************************************************//**
 Resets table->record[0]. */
 UNIV_INTERN
 void
@@ -199,66 +1270,29 @@ innobase_rec_reset(
 	}
 }
 
-/******************************************************************//**
-Removes the filename encoding of a database and table name. */
-static
-void
-innobase_convert_tablename(
-/*=======================*/
-	char*	s)	/*!< in: identifier; out: decoded identifier */
-{
-	uint	errors;
-
-	char*	slash = strchr(s, '/');
-
-	if (slash) {
-		char*	t;
-		/* Temporarily replace the '/' with NUL. */
-		*slash = 0;
-		/* Convert the database name. */
-		strconvert(&my_charset_filename, s, system_charset_info,
-			   s, slash - s + 1, &errors);
-
-		t = s + strlen(s);
-		ut_ad(slash >= t);
-		/* Append a  '.' after the database name. */
-		*t++ = '.';
-		slash++;
-		/* Convert the table name. */
-		strconvert(&my_charset_filename, slash, system_charset_info,
-			   t, slash - t + strlen(slash), &errors);
-	} else {
-		strconvert(&my_charset_filename, s,
-			   system_charset_info, s, strlen(s), &errors);
-	}
-}
-
 /*******************************************************************//**
 This function checks that index keys are sensible.
 @return	0 or error number */
-static
+static __attribute__((nonnull, warn_unused_result))
 int
 innobase_check_index_keys(
 /*======================*/
-	const KEY*		key_info,	/*!< in: Indexes to be
-						created */
-	ulint			num_of_keys,	/*!< in: Number of
-						indexes to be created */
-	const dict_table_t*	table)		/*!< in: Existing indexes */
+	const Alter_inplace_info*	info,
+				/*!< in: indexes to be created or dropped */
+	const dict_table_t*		innodb_table)
+				/*!< in: Existing indexes */
 {
-	ulint		key_num;
-
-	ut_ad(key_info);
-	ut_ad(num_of_keys);
-
-	for (key_num = 0; key_num < num_of_keys; key_num++) {
-		const KEY&	key = key_info[key_num];
+	for (uint key_num = 0; key_num < info->index_add_count;
+	     key_num++) {
+		const KEY&	key = info->key_info_buffer[
+			info->index_add_buffer[key_num]];
 
 		/* Check that the same index name does not appear
 		twice in indexes to be created. */
 
 		for (ulint i = 0; i < key_num; i++) {
-			const KEY&	key2 = key_info[i];
+			const KEY&	key2 = info->key_info_buffer[
+				info->index_add_buffer[i]];
 
 			if (0 == strcmp(key.name, key2.name)) {
 				my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
@@ -270,23 +1304,36 @@ innobase_check_index_keys(
 
 		/* Check that the same index name does not already exist. */
 
-		for (const dict_index_t* index
-			     = dict_table_get_first_index(table);
-		     index; index = dict_table_get_next_index(index)) {
+		const dict_index_t* index;
 
-			if (0 == strcmp(key.name, index->name)) {
-				my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
-					 key.name);
+		for (index = dict_table_get_first_index(innodb_table);
+		     index; index = dict_table_get_next_index(index)) {
 
-				return(ER_WRONG_NAME_FOR_INDEX);
+			if (!strcmp(key.name, index->name)) {
+				break;
 			}
 		}
 
-		/* Check that MySQL does not try to create a column
-		prefix index field on an inappropriate data type and
-		that the same column does not appear twice in the index. */
+		if (index) {
+			/* If a key by the same name is being created and
+			dropped, the name clash is OK. */
+			for (uint i = 0; i < info->index_drop_count;
+			     i++) {
+				const KEY*	drop_key
+					= info->index_drop_buffer[i];
 
-		for (ulint i = 0; i < key.key_parts; i++) {
+				if (0 == strcmp(key.name, drop_key->name)) {
+					goto name_ok;
+				}
+			}
+
+			my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), key.name);
+
+			return(ER_WRONG_NAME_FOR_INDEX);
+		}
+
+name_ok:
+		for (ulint i = 0; i < key.user_defined_key_parts; i++) {
 			const KEY_PART_INFO&	key_part1
 				= key.key_part[i];
 			const Field*		field
@@ -301,6 +1348,10 @@ innobase_check_index_keys(
 			case DATA_FLOAT:
 			case DATA_DOUBLE:
 			case DATA_DECIMAL:
+				/* Check that MySQL does not try to
+				create a column prefix index field on
+				an inappropriate data type. */
+
 				if (field->type() == MYSQL_TYPE_VARCHAR) {
 					if (key_part1.length
 					    >= field->pack_length()
@@ -320,17 +1371,19 @@ innobase_check_index_keys(
 				return(ER_WRONG_KEY_COLUMN);
 			}
 
+			/* Check that the same column does not appear
+			twice in the index. */
+
 			for (ulint j = 0; j < i; j++) {
 				const KEY_PART_INFO&	key_part2
 					= key.key_part[j];
 
-				if (strcmp(key_part1.field->field_name,
-					   key_part2.field->field_name)) {
+				if (key_part1.fieldnr != key_part2.fieldnr) {
 					continue;
 				}
 
 				my_error(ER_WRONG_KEY_COLUMN, MYF(0),
-					 key_part1.field->field_name);
+					 field->field_name);
 				return(ER_WRONG_KEY_COLUMN);
 			}
 		}
@@ -341,16 +1394,19 @@ innobase_check_index_keys(
 
 /*******************************************************************//**
 Create index field definition for key part */
-static
+static __attribute__((nonnull(2,3)))
 void
 innobase_create_index_field_def(
 /*============================*/
-	KEY_PART_INFO*		key_part,	/*!< in: MySQL key definition */
-	mem_heap_t*		heap,		/*!< in: memory heap */
-	merge_index_field_t*	index_field)	/*!< out: index field
+	const TABLE*		altered_table,	/*!< in: MySQL table that is
+						being altered, or NULL
+						if a new clustered index is
+						not being created */
+	const KEY_PART_INFO*	key_part,	/*!< in: MySQL key definition */
+	index_field_t*		index_field)	/*!< out: index field
 						definition for key_part */
 {
-	Field*		field;
+	const Field*	field;
 	ibool		is_unsigned;
 	ulint		col_type;
 
@@ -359,9 +1415,13 @@ innobase_create_index_field_def(
 	ut_ad(key_part);
 	ut_ad(index_field);
 
-	field = key_part->field;
+	field = altered_table
+		? altered_table->field[key_part->fieldnr]
+		: key_part->field;
 	ut_a(field);
 
+	index_field->col_no = key_part->fieldnr;
+
 	col_type = get_innobase_type_from_mysql_type(&is_unsigned, field);
 
 	if (DATA_BLOB == col_type
@@ -376,44 +1436,48 @@ innobase_create_index_field_def(
 		index_field->prefix_len = 0;
 	}
 
-	index_field->field_name = mem_heap_strdup(heap, field->field_name);
-
 	DBUG_VOID_RETURN;
 }
 
 /*******************************************************************//**
 Create index definition for key */
-static
+static __attribute__((nonnull))
 void
 innobase_create_index_def(
 /*======================*/
-	KEY*			key,		/*!< in: key definition */
-	bool			new_primary,	/*!< in: TRUE=generating
-						a new primary key
+	const TABLE*		altered_table,	/*!< in: MySQL table that is
+						being altered */
+	const KEY*		keys,		/*!< in: key definitions */
+	ulint			key_number,	/*!< in: MySQL key number */
+	bool			new_clustered,	/*!< in: true if generating
+						a new clustered index
 						on the table */
-	bool			key_primary,	/*!< in: TRUE if this key
-						is a primary key */
-	merge_index_def_t*	index,		/*!< out: index definition */
+	bool			key_clustered,	/*!< in: true if this is
+						the new clustered index */
+	index_def_t*		index,		/*!< out: index definition */
 	mem_heap_t*		heap)		/*!< in: heap where memory
 						is allocated */
 {
-	ulint	i;
-	ulint	len;
-	ulint	n_fields = key->key_parts;
-	char*	index_name;
+	const KEY*	key = &keys[key_number];
+	ulint		i;
+	ulint		len;
+	ulint		n_fields = key->user_defined_key_parts;
+	char*		index_name;
 
 	DBUG_ENTER("innobase_create_index_def");
+	DBUG_ASSERT(!key_clustered || new_clustered);
 
-	index->fields = (merge_index_field_t*) mem_heap_alloc(
-		heap, n_fields * sizeof *index->fields);
+	index->fields = static_cast<index_field_t*>(
+		mem_heap_alloc(heap, n_fields * sizeof *index->fields));
 
 	index->ind_type = 0;
+	index->key_number = key_number;
 	index->n_fields = n_fields;
 	len = strlen(key->name) + 1;
-	index->name = index_name = (char*) mem_heap_alloc(heap,
-							  len + !new_primary);
+	index->name = index_name = static_cast<char*>(
+		mem_heap_alloc(heap, len + !new_clustered));
 
-	if (UNIV_LIKELY(!new_primary)) {
+	if (!new_clustered) {
 		*index_name++ = TEMP_INDEX_PREFIX;
 	}
 
@@ -423,144 +1487,155 @@ innobase_create_index_def(
 		index->ind_type |= DICT_UNIQUE;
 	}
 
-	if (key->flags & HA_FULLTEXT) {
+	if (key_clustered) {
+		DBUG_ASSERT(!(key->flags & HA_FULLTEXT));
+		index->ind_type |= DICT_CLUSTERED;
+	} else if (key->flags & HA_FULLTEXT) {
+		DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
+			      & ~(HA_FULLTEXT
+				  | HA_PACK_KEY
+				  | HA_BINARY_PACK_KEY)));
+		DBUG_ASSERT(!(key->flags & HA_NOSAME));
+		DBUG_ASSERT(!index->ind_type);
 		index->ind_type |= DICT_FTS;
 	}
 
-	if (key_primary) {
-		index->ind_type |= DICT_CLUSTERED;
+	if (!new_clustered) {
+		altered_table = NULL;
 	}
 
 	for (i = 0; i < n_fields; i++) {
-		innobase_create_index_field_def(&key->key_part[i], heap,
-						&index->fields[i]);
+		innobase_create_index_field_def(
+			altered_table, &key->key_part[i], &index->fields[i]);
 	}
 
 	DBUG_VOID_RETURN;
 }
 
 /*******************************************************************//**
-Copy index field definition */
+Check whether the table has the FTS_DOC_ID column
+@return whether there exists an FTS_DOC_ID column */
 static
-void
-innobase_copy_index_field_def(
+bool
+innobase_fts_check_doc_id_col(
 /*==========================*/
-	const dict_field_t*	field,		/*!< in: definition to copy */
-	merge_index_field_t*	index_field)	/*!< out: copied definition */
+	const dict_table_t*	table,  /*!< in: InnoDB table with
+					fulltext index */
+	const TABLE*		altered_table,
+					/*!< in: MySQL table with
+					fulltext index */
+	ulint*			fts_doc_col_no)
+					/*!< out: The column number for
+					Doc ID, or ULINT_UNDEFINED
+					if it is of wrong type */
 {
-	DBUG_ENTER("innobase_copy_index_field_def");
-	DBUG_ASSERT(field != NULL);
-	DBUG_ASSERT(index_field != NULL);
-
-	index_field->field_name = field->name;
-	index_field->prefix_len = field->prefix_len;
-
-	DBUG_VOID_RETURN;
-}
-
-/*******************************************************************//**
-Copy index definition for the index */
-static
-void
-innobase_copy_index_def(
-/*====================*/
-	const dict_index_t*	index,	/*!< in: index definition to copy */
-	merge_index_def_t*	new_index,/*!< out: Index definition */
-	mem_heap_t*		heap)	/*!< in: heap where allocated */
-{
-	ulint	n_fields;
-	ulint	i;
-
-	DBUG_ENTER("innobase_copy_index_def");
+	*fts_doc_col_no = ULINT_UNDEFINED;
 
-	/* Note that we take only those fields that user defined to be
-	in the index.  In the internal representation more colums were
-	added and those colums are not copied .*/
+	const uint n_cols = altered_table->s->fields;
+	uint i;
 
-	n_fields = index->n_user_defined_cols;
+	for (i = 0; i < n_cols; i++) {
+		const Field*	field = altered_table->s->field[i];
 
-	new_index->fields = (merge_index_field_t*) mem_heap_alloc(
-		heap, n_fields * sizeof *new_index->fields);
+		if (my_strcasecmp(system_charset_info,
+				  field->field_name, FTS_DOC_ID_COL_NAME)) {
+			continue;
+		}
 
-	/* When adding a PRIMARY KEY, we may convert a previous
-	clustered index to a secondary index (UNIQUE NOT NULL). */
-	new_index->ind_type = index->type & ~DICT_CLUSTERED;
-	new_index->n_fields = n_fields;
-	new_index->name = index->name;
+		if (strcmp(field->field_name, FTS_DOC_ID_COL_NAME)) {
+			my_error(ER_WRONG_COLUMN_NAME, MYF(0),
+				 field->field_name);
+		} else if (field->type() != MYSQL_TYPE_LONGLONG
+			   || field->pack_length() != 8
+			   || field->real_maybe_null()
+			   || !(field->flags & UNSIGNED_FLAG)) {
+			my_error(ER_INNODB_FT_WRONG_DOCID_COLUMN, MYF(0),
+				 field->field_name);
+		} else {
+			*fts_doc_col_no = i;
+		}
 
-	for (i = 0; i < n_fields; i++) {
-		innobase_copy_index_field_def(&index->fields[i],
-					      &new_index->fields[i]);
+		return(true);
 	}
 
-	DBUG_VOID_RETURN;
-}
-
-/*******************************************************************//**
-Check whether the table has the FTS_DOC_ID column
-@return TRUE if there exists the FTS_DOC_ID column, if TRUE but fts_doc_col_no
-        equal to ULINT_UNDEFINED then that means the column exists but is not
-	of the right type. */
-static
-ibool
-innobase_fts_check_doc_id_col(
-/*==========================*/
-	dict_table_t*	table,		/*!< in: table with FTS index */
-	ulint*		fts_doc_col_no)	/*!< out: The column number for
-					Doc ID */
-{
-	*fts_doc_col_no = ULINT_UNDEFINED;
+	if (!table) {
+		return(false);
+	}
 
-	for (ulint i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) {
+	for (; i + DATA_N_SYS_COLS < (uint) table->n_cols; i++) {
 		const char*     name = dict_table_get_col_name(table, i);
 
 		if (strcmp(name, FTS_DOC_ID_COL_NAME) == 0) {
+#ifdef UNIV_DEBUG
 			const dict_col_t*       col;
 
 			col = dict_table_get_nth_col(table, i);
 
-			if (col->mtype != DATA_INT || col->len != 8) {
-				fprintf(stderr,
-					" InnoDB: %s column in table %s"
-					" must be of the BIGINT datatype\n",
-					FTS_DOC_ID_COL_NAME, table->name);
-			} else if (!(col->prtype & DATA_NOT_NULL)) {
-				fprintf(stderr,
-					" InnoDB: %s column in table %s"
-					" must be NOT NULL\n",
-					FTS_DOC_ID_COL_NAME, table->name);
-
-			} else if (!(col->prtype & DATA_UNSIGNED)) {
-				fprintf(stderr,
-					" InnoDB: %s column in table %s"
-					" must be UNSIGNED\n",
-					FTS_DOC_ID_COL_NAME, table->name);
-			} else {
-				*fts_doc_col_no = i;
-			}
-
-			return(TRUE);
+			/* Because the FTS_DOC_ID does not exist in
+			the MySQL data dictionary, this must be the
+			internally created FTS_DOC_ID column. */
+			ut_ad(col->mtype == DATA_INT);
+			ut_ad(col->len == 8);
+			ut_ad(col->prtype & DATA_NOT_NULL);
+			ut_ad(col->prtype & DATA_UNSIGNED);
+#endif /* UNIV_DEBUG */
+			*fts_doc_col_no = i;
+			return(true);
 		}
 	}
 
-	return(FALSE);
+	return(false);
 }
 
 /*******************************************************************//**
 Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
 on the Doc ID column.
-@return	FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index,
-FTS_INCORRECT_DOC_ID_INDEX if the FTS_DOC_ID index is of wrong format */
+@return	the status of the FTS_DOC_ID index */
 UNIV_INTERN
 enum fts_doc_id_index_enum
 innobase_fts_check_doc_id_index(
 /*============================*/
-	dict_table_t*	table,		/*!< in: table definition */
-	ulint*		fts_doc_col_no)	/*!< out: The column number for
-					Doc ID */
+	const dict_table_t*	table,		/*!< in: table definition */
+	const TABLE*		altered_table,	/*!< in: MySQL table
+						that is being altered */
+	ulint*			fts_doc_col_no)	/*!< out: The column number for
+						Doc ID, or ULINT_UNDEFINED
+						if it is being created in
+						ha_alter_info */
 {
-	dict_index_t*	index;
-	dict_field_t*	field;
+	const dict_index_t*	index;
+	const dict_field_t*	field;
+
+	if (altered_table) {
+		/* Check if a unique index with the name of
+		FTS_DOC_ID_INDEX_NAME is being created. */
+
+		for (uint i = 0; i < altered_table->s->keys; i++) {
+			const KEY& key = altered_table->s->key_info[i];
+
+			if (innobase_strcasecmp(
+				    key.name, FTS_DOC_ID_INDEX_NAME)) {
+				continue;
+			}
+
+			if ((key.flags & HA_NOSAME)
+			    && key.user_defined_key_parts == 1
+			    && !strcmp(key.name, FTS_DOC_ID_INDEX_NAME)
+			    && !strcmp(key.key_part[0].field->field_name,
+				       FTS_DOC_ID_COL_NAME)) {
+				if (fts_doc_col_no) {
+					*fts_doc_col_no = ULINT_UNDEFINED;
+				}
+				return(FTS_EXIST_DOC_ID_INDEX);
+			} else {
+				return(FTS_INCORRECT_DOC_ID_INDEX);
+			}
+		}
+	}
+
+	if (!table) {
+		return(FTS_NOT_EXIST_DOC_ID_INDEX);
+	}
 
 	for (index = dict_table_get_first_index(table);
 	     index; index = dict_table_get_next_index(index)) {
@@ -572,6 +1647,7 @@ innobase_fts_check_doc_id_index(
 		}
 
 		if (!dict_index_is_unique(index)
+		    || dict_index_get_n_unique(index) > 1
 		    || strcmp(index->name, FTS_DOC_ID_INDEX_NAME)) {
 			return(FTS_INCORRECT_DOC_ID_INDEX);
 		}
@@ -592,9 +1668,9 @@ innobase_fts_check_doc_id_index(
 		} else {
 			return(FTS_INCORRECT_DOC_ID_INDEX);
 		}
-
 	}
 
+
 	/* Not found */
 	return(FTS_NOT_EXIST_DOC_ID_INDEX);
 }
@@ -608,12 +1684,12 @@ enum fts_doc_id_index_enum
 innobase_fts_check_doc_id_index_in_def(
 /*===================================*/
 	ulint		n_key,		/*!< in: Number of keys */
-	KEY *		key_info)	/*!< in: Key definition */
+	const KEY*	key_info)	/*!< in: Key definition */
 {
 	/* Check whether there is a "FTS_DOC_ID_INDEX" in the to be built index
 	list */
 	for (ulint j = 0; j < n_key; j++) {
-		KEY*    key = &key_info[j];
+		const KEY*	key = &key_info[j];
 
 		if (innobase_strcasecmp(key->name, FTS_DOC_ID_INDEX_NAME)) {
 			continue;
@@ -622,14 +1698,15 @@ innobase_fts_check_doc_id_index_in_def(
 		/* Do a check on FTS DOC ID_INDEX, it must be unique,
 		named as "FTS_DOC_ID_INDEX" and on column "FTS_DOC_ID" */
 		if (!(key->flags & HA_NOSAME)
+		    || key->user_defined_key_parts != 1
 		    || strcmp(key->name, FTS_DOC_ID_INDEX_NAME)
 		    || strcmp(key->key_part[0].field->field_name,
-			     FTS_DOC_ID_COL_NAME)) {
+			      FTS_DOC_ID_COL_NAME)) {
 			return(FTS_INCORRECT_DOC_ID_INDEX);
-	       }
+		}
 
 		return(FTS_EXIST_DOC_ID_INDEX);
-        }
+	}
 
 	return(FTS_NOT_EXIST_DOC_ID_INDEX);
 }
@@ -639,8 +1716,7 @@ Create an index table where indexes are ordered as follows:
 IF a new primary key is defined for the table THEN
 
 	1) New primary key
-	2) Original secondary indexes
-	3) New secondary indexes
+	2) The remaining keys in key_info
 
 ELSE
 
@@ -648,626 +1724,1272 @@ ELSE
 
 ENDIF
 
-
-@return	key definitions or NULL */
-static
-merge_index_def_t*
-innobase_create_key_def(
-/*====================*/
-	trx_t*		trx,		/*!< in: trx */
-	dict_table_t*	table,		/*!< in: table definition */
-	mem_heap_t*	heap,		/*!< in: heap where space for key
-					definitions are allocated */
-	KEY*		key_info,	/*!< in: Indexes to be created */
-	ulint&		n_keys,		/*!< in/out: Number of indexes to
-					be created */
-	ulint*		num_fts_index,	/*!< out: Number of FTS indexes */
-	ibool*		add_fts_doc_id,	/*!< out: Whether we need to add
-					new DOC ID column for FTS index */
-	ibool*		add_fts_doc_id_idx)/*!< out: Whether we need to add
-					new index on DOC ID column */
+@return	key definitions */
+static __attribute__((nonnull, warn_unused_result, malloc))
+index_def_t*
+innobase_create_key_defs(
+/*=====================*/
+	mem_heap_t*			heap,
+			/*!< in/out: memory heap where space for key
+			definitions are allocated */
+	const Alter_inplace_info*	ha_alter_info,
+			/*!< in: alter operation */
+	const TABLE*			altered_table,
+			/*!< in: MySQL table that is being altered */
+	ulint&				n_add,
+			/*!< in/out: number of indexes to be created */
+	ulint&				n_fts_add,
+			/*!< out: number of FTS indexes to be created */
+	bool				got_default_clust,
+			/*!< in: whether the table lacks a primary key */
+	ulint&				fts_doc_id_col,
+			/*!< in: The column number for Doc ID */
+	bool&				add_fts_doc_id,
+			/*!< in: whether we need to add new DOC ID
+			column for FTS index */
+	bool&				add_fts_doc_idx)
+			/*!< in: whether we need to add new DOC ID
+			index for FTS index */
 {
-	ulint			i = 0;
-	merge_index_def_t*	indexdef;
-	merge_index_def_t*	indexdefs;
+	index_def_t*		indexdef;
+	index_def_t*		indexdefs;
 	bool			new_primary;
+	const uint*const	add
+		= ha_alter_info->index_add_buffer;
+	const KEY*const		key_info
+		= ha_alter_info->key_info_buffer;
 
-	DBUG_ENTER("innobase_create_key_def");
-
-	indexdef = indexdefs = (merge_index_def_t*)
-		mem_heap_alloc(heap, sizeof *indexdef
-			       * (n_keys + UT_LIST_GET_LEN(table->indexes)));
-
-	*add_fts_doc_id = FALSE;
-	*add_fts_doc_id_idx = FALSE;
+	DBUG_ENTER("innobase_create_key_defs");
+	DBUG_ASSERT(!add_fts_doc_id || add_fts_doc_idx);
+	DBUG_ASSERT(ha_alter_info->index_add_count == n_add);
 
 	/* If there is a primary key, it is always the first index
-	defined for the table. */
+	defined for the innodb_table. */
 
-	new_primary = !my_strcasecmp(system_charset_info,
-				     key_info->name, "PRIMARY");
+	new_primary = n_add > 0
+		&& !my_strcasecmp(system_charset_info,
+				  key_info[*add].name, "PRIMARY");
+	n_fts_add = 0;
 
 	/* If there is a UNIQUE INDEX consisting entirely of NOT NULL
 	columns and if the index does not contain column prefix(es)
 	(only prefix/part of the column is indexed), MySQL will treat the
 	index as a PRIMARY KEY unless the table already has one. */
 
-	if (!new_primary && (key_info->flags & HA_NOSAME)
-	    && (!(key_info->flags & HA_KEY_HAS_PART_KEY_SEG))
-	    && row_table_got_default_clust_index(table)) {
-		uint	key_part = key_info->key_parts;
+	if (n_add > 0 && !new_primary && got_default_clust
+	    && (key_info[*add].flags & HA_NOSAME)
+	    && !(key_info[*add].flags & HA_KEY_HAS_PART_KEY_SEG)) {
+		uint	key_part = key_info[*add].user_defined_key_parts;
 
-		new_primary = TRUE;
+		new_primary = true;
 
 		while (key_part--) {
-			if (key_info->key_part[key_part].key_type
-			    & FIELDFLAG_MAYBE_NULL) {
-				new_primary = FALSE;
+			const uint	maybe_null
+				= key_info[*add].key_part[key_part].key_type
+				& FIELDFLAG_MAYBE_NULL;
+			DBUG_ASSERT(!maybe_null
+				    == !key_info[*add].key_part[key_part].
+				    field->real_maybe_null());
+
+			if (maybe_null) {
+				new_primary = false;
 				break;
 			}
 		}
 	}
 
-	/* Check whether any indexes in the create list are Full
-	Text Indexes*/
-	for (ulint j = 0; j < n_keys; j++) {
-		if (key_info[j].flags & HA_FULLTEXT) {
-			(*num_fts_index)++;
-		}
-	}
-
-	/* Check whether there is a "FTS_DOC_ID_INDEX" in the to be built index
-	list */
-	if (innobase_fts_check_doc_id_index_in_def(n_keys, key_info)
-	    == FTS_INCORRECT_DOC_ID_INDEX) {
-		push_warning_printf((THD*) trx->mysql_thd,
-				   Sql_condition::WARN_LEVEL_WARN,
-				   ER_WRONG_NAME_FOR_INDEX,
-				   " InnoDB: Index name %s is reserved"
-				   " for the unique index on"
-				   " FTS_DOC_ID column for FTS"
-				   " document ID indexing"
-				   " on table %s. Please check"
-				   " the index definition to"
-				   " make sure it is of correct"
-				   " type\n",
-				   FTS_DOC_ID_INDEX_NAME,
-				   table->name);
-	       DBUG_RETURN(NULL);
-	}
-
-	/* If we are to build an FTS index, check whether the table
-	already has a DOC ID column, if not, we will need to add a
-	Doc ID hidden column and rebuild the primary index */
-	if (*num_fts_index) {
-		enum fts_doc_id_index_enum	ret;
-		ibool				exists;
-		ulint				doc_col_no;
-		ulint				fts_doc_col_no;
-
-		exists = innobase_fts_check_doc_id_col(table, &fts_doc_col_no);
-
-		if (exists) {
-
-			if (fts_doc_col_no == ULINT_UNDEFINED) {
-
-				push_warning_printf(
-					(THD*) trx->mysql_thd,
-					Sql_condition::WARN_LEVEL_WARN,
-					ER_WRONG_COLUMN_NAME,
-					" InnoDB: There exists a column %s "
-					"in table %s, but it is the wrong "
-					"type. Create of FTS index failed.\n",
-					FTS_DOC_ID_COL_NAME, table->name);
+	const bool rebuild = new_primary || add_fts_doc_id
+		|| innobase_need_rebuild(ha_alter_info);
+	/* Reserve one more space if new_primary is true, and we might
+	need to add the FTS_DOC_ID_INDEX */
+	indexdef = indexdefs = static_cast<index_def_t*>(
+		mem_heap_alloc(
+			heap, sizeof *indexdef
+			* (ha_alter_info->key_count
+			   + rebuild
+			   + got_default_clust)));
 
-				DBUG_RETURN(NULL);
-
-			} else if (!table->fts) {
-				table->fts = fts_create(table);
-			}
-
-			table->fts->doc_col = fts_doc_col_no;
+	if (rebuild) {
+		ulint	primary_key_number;
 
+		if (new_primary) {
+			DBUG_ASSERT(n_add > 0);
+			primary_key_number = *add;
+		} else if (got_default_clust) {
+			/* Create the GEN_CLUST_INDEX */
+			index_def_t*	index = indexdef++;
+
+			index->fields = NULL;
+			index->n_fields = 0;
+			index->ind_type = DICT_CLUSTERED;
+			index->name = mem_heap_strdup(
+				heap, innobase_index_reserve_name);
+			index->key_number = ~0;
+			primary_key_number = ULINT_UNDEFINED;
+			goto created_clustered;
 		} else {
-			*add_fts_doc_id = TRUE;
-			*add_fts_doc_id_idx = TRUE;
-
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: Rebuild table %s to add "
-					"DOC_ID column\n", table->name);
+			primary_key_number = 0;
 		}
 
-		ret = innobase_fts_check_doc_id_index(table, &doc_col_no);
+		/* Create the PRIMARY key index definition */
+		innobase_create_index_def(
+			altered_table, key_info, primary_key_number,
+			TRUE, TRUE, indexdef++, heap);
 
-		switch (ret) {
-		case FTS_NOT_EXIST_DOC_ID_INDEX:
-			*add_fts_doc_id_idx = TRUE;
-			break;
-		case FTS_INCORRECT_DOC_ID_INDEX:
+created_clustered:
+		n_add = 1;
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: Index %s is used for FTS"
-					" Doc ID indexing on table %s, it is"
-					" now on the wrong column or of"
-					" wrong format. Please drop it.\n",
-					FTS_DOC_ID_INDEX_NAME, table->name);
-			DBUG_RETURN(NULL);
+		for (ulint i = 0; i < ha_alter_info->key_count; i++) {
+			if (i == primary_key_number) {
+				continue;
+			}
+			/* Copy the index definitions. */
+			innobase_create_index_def(
+				altered_table, key_info, i, TRUE, FALSE,
+				indexdef, heap);
 
-		default:
-			ut_ad(ret == FTS_EXIST_DOC_ID_INDEX);
+			if (indexdef->ind_type & DICT_FTS) {
+				n_fts_add++;
+			}
 
-			ut_ad(doc_col_no == fts_doc_col_no);
+			indexdef++;
+			n_add++;
 		}
-	}
 
-	/* If DICT_TF2_FTS_ADD_DOC_ID is set, we will need to rebuild
-	the table to add the unique Doc ID column for FTS index. And
-	thus the primary index would required to be rebuilt. Copy all
-	the index definitions */
-	if (new_primary || *add_fts_doc_id) {
-		const dict_index_t*	index;
-
-		if (new_primary) {
-			/* Create the PRIMARY key index definition */
-			innobase_create_index_def(&key_info[i++],
-						  TRUE, TRUE,
-						  indexdef++, heap);
-		}
+		if (n_fts_add > 0) {
+			if (!add_fts_doc_id
+			    && !innobase_fts_check_doc_id_col(
+				    NULL, altered_table,
+				    &fts_doc_id_col)) {
+				fts_doc_id_col = altered_table->s->fields;
+				add_fts_doc_id = true;
+			}
 
-		row_mysql_lock_data_dictionary(trx);
+			if (!add_fts_doc_idx) {
+				fts_doc_id_index_enum	ret;
+				ulint			doc_col_no;
 
-		index = dict_table_get_first_index(table);
+				ret = innobase_fts_check_doc_id_index(
+					NULL, altered_table, &doc_col_no);
 
-		/* Copy the index definitions of the old table.  Skip
-		the old clustered index if it is a generated clustered
-		index or a PRIMARY KEY.  If the clustered index is a
-		UNIQUE INDEX, it must be converted to a secondary index. */
+				/* This should have been checked before */
+				ut_ad(ret != FTS_INCORRECT_DOC_ID_INDEX);
 
-		if (new_primary
-		    && (dict_index_get_nth_col(index, 0)->mtype
-			== DATA_SYS
-		        || !my_strcasecmp(system_charset_info,
-					  index->name, "PRIMARY"))) {
-			index = dict_table_get_next_index(index);
+				if (ret == FTS_NOT_EXIST_DOC_ID_INDEX) {
+					add_fts_doc_idx = true;
+				} else {
+					ut_ad(ret == FTS_EXIST_DOC_ID_INDEX);
+					ut_ad(doc_col_no == ULINT_UNDEFINED
+					      || doc_col_no == fts_doc_id_col);
+				}
+			}
 		}
+	} else {
+		/* Create definitions for added secondary indexes. */
 
-		while (index) {
-			innobase_copy_index_def(index, indexdef++, heap);
+		for (ulint i = 0; i < n_add; i++) {
+			innobase_create_index_def(
+				altered_table, key_info, add[i], FALSE, FALSE,
+				indexdef, heap);
 
-			if (new_primary && index->type & DICT_FTS) {
-				(*num_fts_index)++;
+			if (indexdef->ind_type & DICT_FTS) {
+				n_fts_add++;
 			}
 
-			index = dict_table_get_next_index(index);
+			indexdef++;
 		}
+	}
 
-		/* The primary index would be rebuilt if a FTS Doc ID
-		column is to be added, and the primary index definition
-		is just copied from old table and stored in indexdefs[0] */
-		if (*add_fts_doc_id) {
-			indexdefs[0].ind_type |= DICT_CLUSTERED;
-			DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_ADD_DOC_ID);
-		}
+	DBUG_ASSERT(indexdefs + n_add == indexdef);
 
-		row_mysql_unlock_data_dictionary(trx);
-	}
+	if (add_fts_doc_idx) {
+		index_def_t*	index = indexdef++;
 
-	/* Create definitions for added secondary indexes. */
+		index->fields = static_cast<index_field_t*>(
+			mem_heap_alloc(heap, sizeof *index->fields));
+		index->n_fields = 1;
+		index->fields->col_no = fts_doc_id_col;
+		index->fields->prefix_len = 0;
+		index->ind_type = DICT_UNIQUE;
 
-	while (i < n_keys) {
-		innobase_create_index_def(&key_info[i++], new_primary, FALSE,
-					  indexdef++, heap);
-	}
+		if (rebuild) {
+			index->name = mem_heap_strdup(
+				heap, FTS_DOC_ID_INDEX_NAME);
+			ut_ad(!add_fts_doc_id
+			      || fts_doc_id_col == altered_table->s->fields);
+		} else {
+			char*	index_name;
+			index->name = index_name = static_cast<char*>(
+				mem_heap_alloc(
+					heap,
+					1 + sizeof FTS_DOC_ID_INDEX_NAME));
+			*index_name++ = TEMP_INDEX_PREFIX;
+			memcpy(index_name, FTS_DOC_ID_INDEX_NAME,
+			       sizeof FTS_DOC_ID_INDEX_NAME);
+		}
 
-	n_keys = indexdef - indexdefs;
+		/* TODO: assign a real MySQL key number for this */
+		index->key_number = ULINT_UNDEFINED;
+		n_add++;
+	}
 
+	DBUG_ASSERT(indexdef > indexdefs);
+	DBUG_ASSERT((ulint) (indexdef - indexdefs)
+		    <= ha_alter_info->key_count
+		    + add_fts_doc_idx + got_default_clust);
+	DBUG_ASSERT(ha_alter_info->index_add_count <= n_add);
 	DBUG_RETURN(indexdefs);
 }
 
 /*******************************************************************//**
 Check each index column size, make sure they do not exceed the max limit
-@return	HA_ERR_INDEX_COL_TOO_LONG if index column size exceeds limit */
-static
-int
+@return	true if index column size exceeds limit */
+static __attribute__((nonnull, warn_unused_result))
+bool
 innobase_check_column_length(
 /*=========================*/
-	const dict_table_t*table,	/*!< in: table definition */
+	ulint		max_col_len,	/*!< in: maximum column length */
 	const KEY*	key_info)	/*!< in: Indexes to be created */
 {
-	ulint	max_col_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table);
-
-	for (ulint key_part = 0; key_part < key_info->key_parts; key_part++) {
+	for (ulint key_part = 0; key_part < key_info->user_defined_key_parts; key_part++) {
 		if (key_info->key_part[key_part].length > max_col_len) {
-			my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0), max_col_len);
-			return(HA_ERR_INDEX_COL_TOO_LONG);
+			return(true);
 		}
 	}
-	return(0);
+	return(false);
 }
 
-/*******************************************************************//**
-Create a temporary tablename using query id, thread id, and id
-@return	temporary tablename */
-static
-char*
-innobase_create_temporary_tablename(
-/*================================*/
-	mem_heap_t*	heap,		/*!< in: memory heap */
-	char		id,		/*!< in: identifier [0-9a-zA-Z] */
-	const char*     table_name)	/*!< in: table name */
+struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
 {
-	char*			name;
-	ulint			len;
-	static const char	suffix[] = "@0023 "; /* "# " */
+	/** Dummy query graph */
+	que_thr_t*	thr;
+	/** InnoDB indexes being created */
+	dict_index_t**	add;
+	/** MySQL key numbers for the InnoDB indexes that are being created */
+	const ulint*	add_key_numbers;
+	/** number of InnoDB indexes being created */
+	const ulint	num_to_add;
+	/** InnoDB indexes being dropped */
+	dict_index_t**	drop;
+	/** number of InnoDB indexes being dropped */
+	const ulint	num_to_drop;
+	/** InnoDB foreign key constraints being dropped */
+	dict_foreign_t** drop_fk;
+	/** number of InnoDB foreign key constraints being dropped */
+	const ulint	num_to_drop_fk;
+	/** InnoDB foreign key constraints being added */
+	dict_foreign_t** add_fk;
+	/** number of InnoDB foreign key constraints being dropped */
+	const ulint	num_to_add_fk;
+	/** whether to create the indexes online */
+	bool		online;
+	/** memory heap */
+	mem_heap_t*	heap;
+	/** dictionary transaction */
+	trx_t*		trx;
+	/** table where the indexes are being created or dropped */
+	dict_table_t*	indexed_table;
+	/** mapping of old column numbers to new ones, or NULL */
+	const ulint*	col_map;
+	/** added AUTO_INCREMENT column position, or ULINT_UNDEFINED */
+	const ulint	add_autoinc;
+	/** default values of ADD COLUMN, or NULL */
+	const dtuple_t*	add_cols;
+	/** autoinc sequence to use */
+	ib_sequence_t	sequence;
+
+	ha_innobase_inplace_ctx(trx_t* user_trx,
+				dict_index_t** add_arg,
+				const ulint* add_key_numbers_arg,
+				ulint num_to_add_arg,
+				dict_index_t** drop_arg,
+				ulint num_to_drop_arg,
+				dict_foreign_t** drop_fk_arg,
+				ulint num_to_drop_fk_arg,
+				dict_foreign_t** add_fk_arg,
+				ulint num_to_add_fk_arg,
+				bool online_arg,
+				mem_heap_t* heap_arg,
+				trx_t* trx_arg,
+				dict_table_t* indexed_table_arg,
+				const ulint* col_map_arg,
+				ulint add_autoinc_arg,
+				ulonglong autoinc_col_min_value_arg,
+				ulonglong autoinc_col_max_value_arg,
+				const dtuple_t*	add_cols_arg) :
+		inplace_alter_handler_ctx(),
+		add (add_arg), add_key_numbers (add_key_numbers_arg),
+		num_to_add (num_to_add_arg),
+		drop (drop_arg), num_to_drop (num_to_drop_arg),
+		drop_fk (drop_fk_arg), num_to_drop_fk (num_to_drop_fk_arg),
+		add_fk (add_fk_arg), num_to_add_fk (num_to_add_fk_arg),
+		online (online_arg), heap (heap_arg), trx (trx_arg),
+		indexed_table (indexed_table_arg),
+		col_map (col_map_arg), add_autoinc (add_autoinc_arg),
+		add_cols (add_cols_arg),
+		sequence(user_trx ? user_trx->mysql_thd : 0,
+			 autoinc_col_min_value_arg, autoinc_col_max_value_arg)
+	{
+#ifdef UNIV_DEBUG
+		for (ulint i = 0; i < num_to_add; i++) {
+			ut_ad(!add[i]->to_be_dropped);
+		}
+		for (ulint i = 0; i < num_to_drop; i++) {
+			ut_ad(drop[i]->to_be_dropped);
+		}
+#endif /* UNIV_DEBUG */
 
-	len = strlen(table_name);
+		thr = pars_complete_graph_for_exec(NULL, user_trx, heap);
+	}
 
-	name = (char*) mem_heap_alloc(heap, len + sizeof suffix);
-	memcpy(name, table_name, len);
-	memcpy(name + len, suffix, sizeof suffix);
-	name[len + (sizeof suffix - 2)] = id;
+	~ha_innobase_inplace_ctx()
+	{
+		mem_heap_free(heap);
+	}
 
-	return(name);
-}
+private:
+	// Disable copying
+	ha_innobase_inplace_ctx(const ha_innobase_inplace_ctx&);
+	ha_innobase_inplace_ctx& operator=(const ha_innobase_inplace_ctx&);
+};
 
-class ha_innobase_add_index : public handler_add_index
+/********************************************************************//**
+Drop any indexes that we were not able to free previously due to
+open table handles. */
+static
+void
+online_retry_drop_indexes_low(
+/*==========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	trx_t*		trx)	/*!< in/out: transaction */
 {
-public:
-	/** table where the indexes are being created */
-	dict_table_t* indexed_table;
-	ha_innobase_add_index(TABLE* table, KEY* key_info, uint num_of_keys,
-			      dict_table_t* indexed_table_arg) :
-		handler_add_index(table, key_info, num_of_keys),
-		indexed_table (indexed_table_arg) {}
-	~ha_innobase_add_index() {}
-};
+	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+	ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+
+	/* We can have table->n_ref_count > 1, because other threads
+	may have prebuilt->table pointing to the table. However, these
+	other threads should be between statements, waiting for the
+	next statement to execute, or for a meta-data lock. */
+	ut_ad(table->n_ref_count >= 1);
+
+	if (table->drop_aborted) {
+		row_merge_drop_indexes(trx, table, TRUE);
+	}
+}
 
-/*******************************************************************//**
-This is to create FTS_DOC_ID_INDEX definition on the newly added Doc ID for
-the FTS indexes table
-@return	dict_index_t for the FTS_DOC_ID_INDEX */
-dict_index_t*
-innobase_create_fts_doc_id_idx(
-/*===========================*/
-	dict_table_t*	indexed_table,	/*!< in: Table where indexes are
-					created */
-	trx_t*		trx,		/*!< in: Transaction */
-	mem_heap_t*     heap)		/*!< Heap for index definitions */
+/********************************************************************//**
+Drop any indexes that we were not able to free previously due to
+open table handles. */
+static __attribute__((nonnull))
+void
+online_retry_drop_indexes(
+/*======================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	THD*		user_thd)	/*!< in/out: MySQL connection */
 {
-	dict_index_t*		index;
-	merge_index_def_t	fts_index_def;
-	char*			index_name;
-
-	/* Create the temp index name for FTS_DOC_ID_INDEX */
-	fts_index_def.name = index_name = (char*) mem_heap_alloc(
-		heap, FTS_DOC_ID_INDEX_NAME_LEN + 2);
-	*index_name++ = TEMP_INDEX_PREFIX;
-	memcpy(index_name, FTS_DOC_ID_INDEX_NAME,
-	       FTS_DOC_ID_INDEX_NAME_LEN);
-	index_name[FTS_DOC_ID_INDEX_NAME_LEN] = 0;
-
-	/* Only the Doc ID will be indexed */
-	fts_index_def.n_fields = 1;
-	fts_index_def.ind_type = DICT_UNIQUE;
-	fts_index_def.fields = (merge_index_field_t*) mem_heap_alloc(
-		heap, sizeof *fts_index_def.fields);
-	fts_index_def.fields[0].prefix_len = 0;
-	fts_index_def.fields[0].field_name = mem_heap_strdup(
-		heap, FTS_DOC_ID_COL_NAME);
-
-	index = row_merge_create_index(trx, indexed_table, &fts_index_def);
-	return(index);
+	if (table->drop_aborted) {
+		trx_t*	trx = innobase_trx_allocate(user_thd);
+
+		trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+		row_mysql_lock_data_dictionary(trx);
+		online_retry_drop_indexes_low(table, trx);
+		trx_commit_for_mysql(trx);
+		row_mysql_unlock_data_dictionary(trx);
+		trx_free_for_mysql(trx);
+	}
+
+#ifdef UNIV_DEBUG
+	mutex_enter(&dict_sys->mutex);
+	dict_table_check_for_dup_indexes(table, CHECK_ALL_COMPLETE);
+	mutex_exit(&dict_sys->mutex);
+	ut_a(!table->drop_aborted);
+#endif /* UNIV_DEBUG */
 }
 
-/*******************************************************************//**
-Clean up on ha_innobase::add_index error. */
-static
+/********************************************************************//**
+Commit a dictionary transaction and drop any indexes that we were not
+able to free previously due to open table handles. */
+static __attribute__((nonnull))
 void
-innobase_add_index_cleanup(
-/*=======================*/
-	row_prebuilt_t*	prebuilt,		/*!< in/out: prebuilt */
-	trx_t*		trx,			/*!< in/out: transaction */
-	dict_table_t*	table)			/*!< in/out: table on which
-						the indexes were going to be
-						created */
+online_retry_drop_indexes_with_trx(
+/*===============================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	trx_t*		trx)	/*!< in/out: transaction */
 {
-	trx_rollback_to_savepoint(trx, NULL);
+	ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
+	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
 
-	ut_a(trx != prebuilt->trx);
+	/* Now that the dictionary is being locked, check if we can
+	drop any incompletely created indexes that may have been left
+	behind in rollback_inplace_alter_table() earlier. */
+	if (table->drop_aborted) {
 
-	trx_free_for_mysql(trx);
+		trx->table_id = 0;
 
-	trx_commit_for_mysql(prebuilt->trx);
+		trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
 
-	if (table != NULL) {
+		online_retry_drop_indexes_low(table, trx);
+		trx_commit_for_mysql(trx);
+	}
+}
 
-		rw_lock_x_lock(&dict_operation_lock);
+/** Determines if InnoDB is dropping a foreign key constraint.
+@param foreign		the constraint
+@param drop_fk		constraints being dropped
+@param n_drop_fk	number of constraints that are being dropped
+@return whether the constraint is being dropped */
+inline __attribute__((pure, nonnull, warn_unused_result))
+bool
+innobase_dropping_foreign(
+/*======================*/
+	const dict_foreign_t*	foreign,
+	dict_foreign_t**	drop_fk,
+	ulint			n_drop_fk)
+{
+	while (n_drop_fk--) {
+		if (*drop_fk++ == foreign) {
+			return(true);
+		}
+	}
 
-		dict_mutex_enter_for_mysql();
+	return(false);
+}
 
-		/* Note: This check excludes the system tables. However, we
-		should be safe because users cannot add indexes to system
-		tables. */
+/** Determines if an InnoDB FOREIGN KEY constraint depends on a
+column that is being dropped or modified to NOT NULL.
+@param user_table	InnoDB table as it is before the ALTER operation
+@param col_name		Name of the column being altered
+@param drop_fk		constraints being dropped
+@param n_drop_fk	number of constraints that are being dropped
+@param drop		true=drop column, false=set NOT NULL
+@retval true		Not allowed (will call my_error())
+@retval false		Allowed
+*/
+static __attribute__((pure, nonnull, warn_unused_result))
+bool
+innobase_check_foreigns_low(
+/*========================*/
+	const dict_table_t*	user_table,
+	dict_foreign_t**	drop_fk,
+	ulint			n_drop_fk,
+	const char*		col_name,
+	bool			drop)
+{
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	/* Check if any FOREIGN KEY constraints are defined on this
+	column. */
+	for (const dict_foreign_t* foreign = UT_LIST_GET_FIRST(
+		     user_table->foreign_list);
+	     foreign;
+	     foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
+		if (!drop && !(foreign->type
+			       & (DICT_FOREIGN_ON_DELETE_SET_NULL
+				  | DICT_FOREIGN_ON_UPDATE_SET_NULL))) {
+			continue;
+		}
 
-		if (UT_LIST_GET_LEN(table->foreign_list) == 0
-		    && UT_LIST_GET_LEN(table->referenced_list) == 0
-		    && !table->can_be_evicted) {
+		if (innobase_dropping_foreign(foreign, drop_fk, n_drop_fk)) {
+			continue;
+		}
 
-			dict_table_move_from_non_lru_to_lru(table);
+		for (unsigned f = 0; f < foreign->n_fields; f++) {
+			if (!strcmp(foreign->foreign_col_names[f],
+				    col_name)) {
+				my_error(drop
+					 ? ER_FK_COLUMN_CANNOT_DROP
+					 : ER_FK_COLUMN_NOT_NULL, MYF(0),
+					 col_name, foreign->id);
+				return(true);
+			}
 		}
+	}
+
+	if (!drop) {
+		/* SET NULL clauses on foreign key constraints of
+		child tables affect the child tables, not the parent table.
+		The column can be NOT NULL in the parent table. */
+		return(false);
+	}
 
-		dict_table_close(table, TRUE);
+	/* Check if any FOREIGN KEY constraints in other tables are
+	referring to the column that is being dropped. */
+	for (const dict_foreign_t* foreign = UT_LIST_GET_FIRST(
+		     user_table->referenced_list);
+	     foreign;
+	     foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
+		if (innobase_dropping_foreign(foreign, drop_fk, n_drop_fk)) {
+			continue;
+		}
 
-		dict_mutex_exit_for_mysql();
+		for (unsigned f = 0; f < foreign->n_fields; f++) {
+			char display_name[FN_REFLEN];
 
-		rw_lock_x_unlock(&dict_operation_lock);
+			if (strcmp(foreign->referenced_col_names[f],
+				   col_name)) {
+				continue;
+			}
+
+			char* buf_end = innobase_convert_name(
+				display_name, (sizeof display_name) - 1,
+				foreign->foreign_table_name,
+				strlen(foreign->foreign_table_name),
+				NULL, TRUE);
+			*buf_end = '\0';
+			my_error(ER_FK_COLUMN_CANNOT_DROP_CHILD,
+				 MYF(0), col_name, foreign->id,
+				 display_name);
+
+			return(true);
+		}
 	}
+
+	return(false);
 }
 
-/*******************************************************************//**
-Create indexes.
-@return	0 or error number */
-UNIV_INTERN
-int
-ha_innobase::add_index(
-/*===================*/
-	TABLE*			in_table,	/*!< in: Table where indexes
-						are created */
-	KEY*			key_info,	/*!< in: Indexes
-						to be created */
-	uint			num_of_keys,	/*!< in: Number of indexes
-						to be created */
-	handler_add_index**	add)		/*!< out: context */
+/** Determines if an InnoDB FOREIGN KEY constraint depends on a
+column that is being dropped or modified to NOT NULL.
+@param ha_alter_info	Data used during in-place alter
+@param altered_table	MySQL table that is being altered
+@param old_table	MySQL table as it is before the ALTER operation
+@param user_table	InnoDB table as it is before the ALTER operation
+@param drop_fk		constraints being dropped
+@param n_drop_fk	number of constraints that are being dropped
+@retval true		Not allowed (will call my_error())
+@retval false		Allowed
+*/
+static __attribute__((pure, nonnull, warn_unused_result))
+bool
+innobase_check_foreigns(
+/*====================*/
+	Alter_inplace_info*	ha_alter_info,
+	const TABLE*		altered_table,
+	const TABLE*		old_table,
+	const dict_table_t*	user_table,
+	dict_foreign_t**	drop_fk,
+	ulint			n_drop_fk)
 {
-	dict_index_t**	index = NULL;	/*!< Index to be created */
-	dict_index_t*	fts_index = NULL;/*!< FTS Index to be created */
-	dict_table_t*	indexed_table;	/*!< Table where indexes are created */
-	merge_index_def_t* index_defs;	/*!< Index definitions */
-	mem_heap_t*     heap = NULL;	/*!< Heap for index definitions */
-	trx_t*		trx;		/*!< Transaction */
-	ulint		num_of_idx;
-	ulint		num_created	= 0;
-	ibool		dict_locked	= FALSE;
-	ulint		new_primary	= 0;
-	int		error;
-	ulint		num_fts_index	= 0;
-	ulint		num_idx_create	= 0;
-	ibool		fts_add_doc_id	= FALSE;
-	ibool		fts_add_doc_idx	= FALSE;
+	List_iterator_fast<Create_field> cf_it(
+		ha_alter_info->alter_info->create_list);
 
-	DBUG_ENTER("ha_innobase::add_index");
-	ut_a(table);
-	ut_a(key_info);
-	ut_a(num_of_keys);
+	for (Field** fp = old_table->field; *fp; fp++) {
+		cf_it.rewind();
+		const Create_field* new_field;
 
-	*add = NULL;
+		ut_ad(!(*fp)->real_maybe_null()
+		      == !!((*fp)->flags & NOT_NULL_FLAG));
 
-	if (srv_created_new_raw || srv_force_recovery) {
-		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+		while ((new_field = cf_it++)) {
+			if (new_field->field == *fp) {
+				break;
+			}
+		}
+
+		if (!new_field || (new_field->flags & NOT_NULL_FLAG)) {
+			if (innobase_check_foreigns_low(
+				    user_table, drop_fk, n_drop_fk,
+				    (*fp)->field_name, !new_field)) {
+				return(true);
+			}
+		}
 	}
 
-	update_thd();
+	return(false);
+}
 
-	/* In case MySQL calls this in the middle of a SELECT query, release
-	possible adaptive hash latch to avoid deadlocks of threads. */
-	trx_search_latch_release_if_reserved(prebuilt->trx);
+/** Convert a default value for ADD COLUMN.
 
-	/* Check if the index name is reserved. */
-	if (innobase_index_name_is_reserved(user_thd, key_info, num_of_keys)) {
-		DBUG_RETURN(-1);
+@param heap	Memory heap where allocated
+@param dfield	InnoDB data field to copy to
+@param field	MySQL value for the column
+@param comp	nonzero if in compact format */
+static __attribute__((nonnull))
+void
+innobase_build_col_map_add(
+/*=======================*/
+	mem_heap_t*	heap,
+	dfield_t*	dfield,
+	const Field*	field,
+	ulint		comp)
+{
+	if (field->is_real_null()) {
+		dfield_set_null(dfield);
+		return;
 	}
 
-	indexed_table = dict_table_open_on_name(prebuilt->table->name, FALSE);
+	ulint	size	= field->pack_length();
 
-	if (UNIV_UNLIKELY(!indexed_table)) {
-		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
-	}
+	byte*	buf	= static_cast<byte*>(mem_heap_alloc(heap, size));
 
-	ut_a(indexed_table == prebuilt->table);
+	row_mysql_store_col_in_innobase_format(
+		dfield, buf, TRUE, field->ptr, size, comp);
+}
 
-	if (indexed_table->tablespace_discarded) {
-		DBUG_RETURN(-1);
+/** Construct the translation table for reordering, dropping or
+adding columns.
+
+@param ha_alter_info	Data used during in-place alter
+@param altered_table	MySQL table that is being altered
+@param table		MySQL table as it is before the ALTER operation
+@param new_table	InnoDB table corresponding to MySQL altered_table
+@param old_table	InnoDB table corresponding to MYSQL table
+@param add_cols		Default values for ADD COLUMN, or NULL if no ADD COLUMN
+@param heap		Memory heap where allocated
+@return	array of integers, mapping column numbers in the table
+to column numbers in altered_table */
+static __attribute__((nonnull(1,2,3,4,5,7), warn_unused_result))
+const ulint*
+innobase_build_col_map(
+/*===================*/
+	Alter_inplace_info*	ha_alter_info,
+	const TABLE*		altered_table,
+	const TABLE*		table,
+	const dict_table_t*	new_table,
+	const dict_table_t*	old_table,
+	dtuple_t*		add_cols,
+	mem_heap_t*		heap)
+{
+	DBUG_ENTER("innobase_build_col_map");
+	DBUG_ASSERT(altered_table != table);
+	DBUG_ASSERT(new_table != old_table);
+	DBUG_ASSERT(dict_table_get_n_cols(new_table)
+		    >= altered_table->s->fields + DATA_N_SYS_COLS);
+	DBUG_ASSERT(dict_table_get_n_cols(old_table)
+		    >= table->s->fields + DATA_N_SYS_COLS);
+	DBUG_ASSERT(!!add_cols == !!(ha_alter_info->handler_flags
+				     & Alter_inplace_info::ADD_COLUMN));
+	DBUG_ASSERT(!add_cols || dtuple_get_n_fields(add_cols)
+		    == dict_table_get_n_cols(new_table));
+
+	ulint*	col_map = static_cast<ulint*>(
+		mem_heap_alloc(heap, old_table->n_cols * sizeof *col_map));
+
+	List_iterator_fast<Create_field> cf_it(
+		ha_alter_info->alter_info->create_list);
+	uint i = 0;
+
+	/* Any dropped columns will map to ULINT_UNDEFINED. */
+	for (uint old_i = 0; old_i + DATA_N_SYS_COLS < old_table->n_cols;
+	     old_i++) {
+		col_map[old_i] = ULINT_UNDEFINED;
 	}
 
-	/* Check that index keys are sensible */
-	error = innobase_check_index_keys(key_info, num_of_keys, prebuilt->table);
+	while (const Create_field* new_field = cf_it++) {
+		for (uint old_i = 0; table->field[old_i]; old_i++) {
+			const Field* field = table->field[old_i];
+			if (new_field->field == field) {
+				col_map[old_i] = i;
+				goto found_col;
+			}
+		}
 
-	if (UNIV_UNLIKELY(error)) {
-		dict_table_close(prebuilt->table, FALSE);
-		DBUG_RETURN(error);
+		innobase_build_col_map_add(
+			heap, dtuple_get_nth_field(add_cols, i),
+			altered_table->s->field[i],
+			dict_table_is_comp(new_table));
+found_col:
+		i++;
 	}
 
-	/* Check each index's column length to make sure they do not
-	exceed limit */
-	for (ulint i = 0; i < num_of_keys; i++) {
-		if (key_info[i].flags & HA_FULLTEXT) {
-			continue;
+	DBUG_ASSERT(i == altered_table->s->fields);
+
+	i = table->s->fields;
+
+	/* Add the InnoDB hidden FTS_DOC_ID column, if any. */
+	if (i + DATA_N_SYS_COLS < old_table->n_cols) {
+		/* There should be exactly one extra field,
+		the FTS_DOC_ID. */
+		DBUG_ASSERT(DICT_TF2_FLAG_IS_SET(old_table,
+						 DICT_TF2_FTS_HAS_DOC_ID));
+		DBUG_ASSERT(i + DATA_N_SYS_COLS + 1 == old_table->n_cols);
+		DBUG_ASSERT(!strcmp(dict_table_get_col_name(
+					    old_table, table->s->fields),
+				    FTS_DOC_ID_COL_NAME));
+		if (altered_table->s->fields + DATA_N_SYS_COLS
+		    < new_table->n_cols) {
+			DBUG_ASSERT(DICT_TF2_FLAG_IS_SET(
+					    new_table,
+					    DICT_TF2_FTS_HAS_DOC_ID));
+			DBUG_ASSERT(altered_table->s->fields
+				    + DATA_N_SYS_COLS + 1
+				    == new_table->n_cols);
+			col_map[i] = altered_table->s->fields;
+		} else {
+			DBUG_ASSERT(!DICT_TF2_FLAG_IS_SET(
+					    new_table,
+					    DICT_TF2_FTS_HAS_DOC_ID));
+			col_map[i] = ULINT_UNDEFINED;
 		}
 
-		error = innobase_check_column_length(prebuilt->table,
-						     &key_info[i]);
+		i++;
+	} else {
+		DBUG_ASSERT(!DICT_TF2_FLAG_IS_SET(
+				    old_table,
+				    DICT_TF2_FTS_HAS_DOC_ID));
+	}
+
+	for (; i < old_table->n_cols; i++) {
+		col_map[i] = i + new_table->n_cols - old_table->n_cols;
+	}
+
+	DBUG_RETURN(col_map);
+}
+
+/** Drop newly create FTS index related auxiliary table during
+FIC create index process, before fts_add_index is called
+@param table    table that was being rebuilt online
+@param trx	transaction
+@return		DB_SUCCESS if successful, otherwise last error code
+*/
+static
+dberr_t
+innobase_drop_fts_index_table(
+/*==========================*/
+        dict_table_t*   table,
+	trx_t*		trx)
+{
+	dberr_t		ret_err = DB_SUCCESS;
+
+	for (dict_index_t* index = dict_table_get_first_index(table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+		if (index->type & DICT_FTS) {
+			dberr_t	err;
+
+			err = fts_drop_index_tables(trx, index);
 
-		if (error) {
-			dict_table_close(prebuilt->table, FALSE);
-			DBUG_RETURN(error);
+			if (err != DB_SUCCESS) {
+				ret_err = err;
+			}
 		}
 	}
 
-	heap = mem_heap_create(1024);
-	trx_start_if_not_started(prebuilt->trx);
+	return(ret_err);
+}
+
+/** Update internal structures with concurrent writes blocked,
+while preparing ALTER TABLE.
+
+@param ha_alter_info	Data used during in-place alter
+@param altered_table	MySQL table that is being altered
+@param old_table	MySQL table as it is before the ALTER operation
+@param user_table	InnoDB table that is being altered
+@param user_trx		User transaction, for locking the table
+@param table_name	Table name in MySQL
+@param flags		Table and tablespace flags
+@param flags2		Additional table flags
+@param heap		Memory heap, or NULL
+@param drop_index	Indexes to be dropped, or NULL
+@param n_drop_index	Number of indexes to drop
+@param drop_foreign	Foreign key constraints to be dropped, or NULL
+@param n_drop_foreign	Number of foreign key constraints to drop
+@param fts_doc_id_col	The column number of FTS_DOC_ID
+@param add_autoinc_col	The number of an added AUTO_INCREMENT column,
+			or ULINT_UNDEFINED if none was added
+@param add_fts_doc_id	Flag: add column FTS_DOC_ID?
+@param add_fts_doc_id_idx Flag: add index (FTS_DOC_ID)?
+
+@retval true		Failure
+@retval false		Success
+*/
+static __attribute__((warn_unused_result, nonnull(1,2,3,4)))
+bool
+prepare_inplace_alter_table_dict(
+/*=============================*/
+	Alter_inplace_info*	ha_alter_info,
+	const TABLE*		altered_table,
+	const TABLE*		old_table,
+	dict_table_t*		user_table,
+	trx_t*			user_trx,
+	const char*		table_name,
+	ulint			flags,
+	ulint			flags2,
+	mem_heap_t*		heap,
+	dict_index_t**		drop_index,
+	ulint			n_drop_index,
+	dict_foreign_t**	drop_foreign,
+	ulint			n_drop_foreign,
+	dict_foreign_t**	add_foreign,
+	ulint			n_add_foreign,
+	ulint			fts_doc_id_col,
+	ulint			add_autoinc_col,
+	ulonglong		autoinc_col_max_value,
+	bool			add_fts_doc_id,
+	bool			add_fts_doc_id_idx)
+{
+	trx_t*			trx;
+	bool			dict_locked	= false;
+	dict_index_t**		add_index;	/* indexes to be created */
+	ulint*			add_key_nums;	/* MySQL key numbers */
+	ulint			n_add_index;
+	index_def_t*		index_defs;	/* index definitions */
+	dict_index_t*		fts_index	= NULL;
+	dict_table_t*		indexed_table	= user_table;
+	ulint			new_clustered	= 0;
+	dberr_t			error;
+	THD*			user_thd	= user_trx->mysql_thd;
+	const ulint*		col_map		= NULL;
+	dtuple_t*		add_cols	= NULL;
+	ulint			num_fts_index;
+
+	DBUG_ENTER("prepare_inplace_alter_table_dict");
+	DBUG_ASSERT((add_autoinc_col != ULINT_UNDEFINED)
+		    == (autoinc_col_max_value > 0));
+	DBUG_ASSERT(!n_drop_index == !drop_index);
+	DBUG_ASSERT(!n_drop_foreign == !drop_foreign);
+	DBUG_ASSERT(!add_fts_doc_id || add_fts_doc_id_idx);
+	DBUG_ASSERT(!add_fts_doc_id_idx
+		    || innobase_fulltext_exist(altered_table->s));
+
+	trx_start_if_not_started_xa(user_trx);
 
 	/* Create a background transaction for the operations on
 	the data dictionary tables. */
 	trx = innobase_trx_allocate(user_thd);
-	trx_start_if_not_started(trx);
 
-	/* We don't want this table to be evicted from the cache while we
-	are building an index on it. Another issue is that while we are
-	building the index this table could be referred to in a foreign
-	key relationship. In innobase_add_index_cleanup() we check for
-	that condition before moving it back to the LRU list. */
+	trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
 
-	row_mysql_lock_data_dictionary(trx);
-
-	if (prebuilt->table->can_be_evicted) {
-		dict_table_move_from_lru_to_non_lru(prebuilt->table);
+	if (!heap) {
+		heap = mem_heap_create(1024);
 	}
 
-	row_mysql_unlock_data_dictionary(trx);
-
 	/* Create table containing all indexes to be built in this
-	alter table add index so that they are in the correct order
+	ALTER TABLE ADD INDEX so that they are in the correct order
 	in the table. */
 
-	num_of_idx = num_of_keys;
+	n_add_index = ha_alter_info->index_add_count;
 
-	index_defs = innobase_create_key_def(
-		trx, prebuilt->table, heap, key_info, num_of_idx,
-		&num_fts_index, &fts_add_doc_id, &fts_add_doc_idx);
+	index_defs = innobase_create_key_defs(
+		heap, ha_alter_info, altered_table, n_add_index,
+		num_fts_index, row_table_got_default_clust_index(indexed_table),
+		fts_doc_id_col, add_fts_doc_id, add_fts_doc_id_idx);
 
-	if (!index_defs) {
-		error = DB_UNSUPPORTED;
-		goto error_handling;
-	}
+	new_clustered = DICT_CLUSTERED & index_defs[0].ind_type;
+
+	const bool locked =
+		!ha_alter_info->online
+		|| add_autoinc_col != ULINT_UNDEFINED
+		|| num_fts_index > 0
+		|| (innobase_need_rebuild(ha_alter_info)
+		    && innobase_fulltext_exist(altered_table->s));
 
-	/* Currently, support create one single FULLTEXT index in parallel at
-	a time */
 	if (num_fts_index > 1) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Only support create ONE Fulltext index"
-			" at a time\n");
-		error = DB_UNSUPPORTED;
-		goto error_handling;
+		my_error(ER_INNODB_FT_LIMIT, MYF(0));
+		goto error_handled;
 	}
 
-	new_primary = DICT_CLUSTERED & index_defs[0].ind_type;
+	if (locked && ha_alter_info->online) {
+		/* This should have been blocked in
+		check_if_supported_inplace_alter(). */
+		ut_ad(0);
+		my_error(ER_NOT_SUPPORTED_YET, MYF(0),
+			 thd_query_string(user_thd)->str);
+		goto error_handled;
+	}
 
-	/* If a new FTS Doc ID column is to be added, there will be
-	one additional index to be built on the Doc ID column itself. */
-	num_idx_create = (fts_add_doc_idx) ? num_of_idx + 1 : num_of_idx;
+	/* The primary index would be rebuilt if a FTS Doc ID
+	column is to be added, and the primary index definition
+	is just copied from old table and stored in indexdefs[0] */
+	DBUG_ASSERT(!add_fts_doc_id || new_clustered);
+	DBUG_ASSERT(!!new_clustered ==
+		    (innobase_need_rebuild(ha_alter_info)
+		     || add_fts_doc_id));
 
 	/* Allocate memory for dictionary index definitions */
-	index = (dict_index_t**) mem_heap_alloc(
-		heap, num_idx_create * sizeof *index);
 
-	/* Flag this transaction as a dictionary operation, so that
-	the data dictionary will be locked in crash recovery. */
-	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+	add_index = (dict_index_t**) mem_heap_alloc(
+		heap, n_add_index * sizeof *add_index);
+	add_key_nums = (ulint*) mem_heap_alloc(
+		heap, n_add_index * sizeof *add_key_nums);
+
+	/* This transaction should be dictionary operation, so that
+	the data dictionary will be locked during crash recovery. */
+
+	ut_ad(trx->dict_operation == TRX_DICT_OP_INDEX);
 
 	/* Acquire a lock on the table before creating any indexes. */
-	error = row_merge_lock_table(prebuilt->trx, prebuilt->table,
-				     new_primary ? LOCK_X : LOCK_S);
 
-	if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+	if (locked) {
+		error = row_merge_lock_table(
+			user_trx, indexed_table, LOCK_S);
 
-		goto error_handling;
+		if (error != DB_SUCCESS) {
+
+			goto error_handling;
+		}
+	} else {
+		error = DB_SUCCESS;
 	}
 
 	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
 	or lock waits can happen in it during an index create operation. */
 
 	row_mysql_lock_data_dictionary(trx);
-	dict_locked = TRUE;
+	dict_locked = true;
 
-	ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
+	/* Wait for background stats processing to stop using the table that
+	we are going to alter. We know bg stats will not start using it again
+	until we are holding the data dict locked and we are holding it here
+	at least until checking ut_ad(user_table->n_ref_count == 1) below.
+	XXX what may happen if bg stats opens the table after we
+	have unlocked data dictionary below? */
+	dict_stats_wait_bg_to_stop_using_tables(user_table, NULL, trx);
 
-	/* If a new primary key is defined for the table we need
+	online_retry_drop_indexes_low(indexed_table, trx);
+
+	ut_d(dict_table_check_for_dup_indexes(
+		     indexed_table, CHECK_ABORTED_OK));
+
+	/* If a new clustered index is defined for the table we need
 	to drop the original table and rebuild all indexes. */
 
-	if (UNIV_UNLIKELY(new_primary)) {
-		/* This transaction should be the only one
-		operating on the table. The table get above
-		would have incremented the ref count to 2. */
-		ut_a(prebuilt->table->n_ref_count == 2);
+	if (new_clustered) {
+		char*	new_table_name = dict_mem_create_temporary_tablename(
+			heap, indexed_table->name, indexed_table->id);
+		ulint	n_cols;
 
-		char*	new_table_name = innobase_create_temporary_tablename(
-			heap, '1', prebuilt->table->name);
+		if (innobase_check_foreigns(
+			    ha_alter_info, altered_table, old_table,
+			    user_table, drop_foreign, n_drop_foreign)) {
+			goto new_clustered_failed;
+		}
 
-		/* Clone the table. */
+		n_cols = altered_table->s->fields;
+
+		if (add_fts_doc_id) {
+			n_cols++;
+			DBUG_ASSERT(flags2 & DICT_TF2_FTS);
+			DBUG_ASSERT(add_fts_doc_id_idx);
+			flags2 |= DICT_TF2_FTS_ADD_DOC_ID
+				| DICT_TF2_FTS_HAS_DOC_ID
+				| DICT_TF2_FTS;
+		}
+
+		DBUG_ASSERT(!add_fts_doc_id_idx || (flags2 & DICT_TF2_FTS));
+
+		/* Create the table. */
 		trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-		indexed_table = row_merge_create_temporary_table(
-			new_table_name, index_defs, prebuilt->table, trx);
 
-		if (!indexed_table) {
+		if (dict_table_get_low(new_table_name)) {
+			my_error(ER_TABLE_EXISTS_ERROR, MYF(0),
+				 new_table_name);
+			goto new_clustered_failed;
+		}
 
-			switch (trx->error_state) {
-			case DB_TABLESPACE_ALREADY_EXISTS:
-			case DB_DUPLICATE_KEY:
-				innobase_convert_tablename(new_table_name);
-				my_error(HA_ERR_TABLE_EXIST, MYF(0),
-					 new_table_name);
-				error = HA_ERR_TABLE_EXIST;
-				break;
-			default:
-				error = convert_error_code_to_mysql(
-					trx->error_state,
-					prebuilt->table->flags,
-					user_thd);
+		/* The initial space id 0 may be overridden later. */
+		indexed_table = dict_mem_table_create(
+			new_table_name, 0, n_cols, flags, flags2);
+
+		if (DICT_TF_HAS_DATA_DIR(flags)) {
+			indexed_table->data_dir_path =
+				mem_heap_strdup(indexed_table->heap,
+				user_table->data_dir_path);
+		}
+
+		for (uint i = 0; i < altered_table->s->fields; i++) {
+			const Field*	field = altered_table->field[i];
+			ulint		is_unsigned;
+			ulint		field_type
+				= (ulint) field->type();
+			ulint		col_type
+				= get_innobase_type_from_mysql_type(
+					&is_unsigned, field);
+			ulint		charset_no;
+			ulint		col_len;
+
+			/* we assume in dtype_form_prtype() that this
+			fits in two bytes */
+			ut_a(field_type <= MAX_CHAR_COLL_NUM);
+
+			if (!field->real_maybe_null()) {
+				field_type |= DATA_NOT_NULL;
+			}
+
+			if (field->binary()) {
+				field_type |= DATA_BINARY_TYPE;
+			}
+
+			if (is_unsigned) {
+				field_type |= DATA_UNSIGNED;
 			}
 
-			ut_d(dict_table_check_for_dup_indexes(prebuilt->table,
-							      TRUE));
-			row_mysql_unlock_data_dictionary(trx);
-			mem_heap_free(heap);
+			if (dtype_is_string_type(col_type)) {
+				charset_no = (ulint) field->charset()->number;
 
-			innobase_add_index_cleanup(
-				prebuilt, trx, prebuilt->table);
+				if (charset_no > MAX_CHAR_COLL_NUM) {
+					dict_mem_table_free(indexed_table);
+					my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+						 field->field_name);
+					goto new_clustered_failed;
+				}
+			} else {
+				charset_no = 0;
+			}
+
+			col_len = field->pack_length();
+
+			/* The MySQL pack length contains 1 or 2 bytes
+			length field for a true VARCHAR. Let us
+			subtract that, so that the InnoDB column
+			length in the InnoDB data dictionary is the
+			real maximum byte length of the actual data. */
+
+			if (field->type() == MYSQL_TYPE_VARCHAR) {
+				uint32	length_bytes
+					= static_cast<const Field_varstring*>(
+						field)->length_bytes;
+
+				col_len -= length_bytes;
+
+				if (length_bytes == 2) {
+					field_type |= DATA_LONG_TRUE_VARCHAR;
+				}
+			}
 
-			DBUG_RETURN(error);
+			if (dict_col_name_is_reserved(field->field_name)) {
+				dict_mem_table_free(indexed_table);
+				my_error(ER_WRONG_COLUMN_NAME, MYF(0),
+					 field->field_name);
+				goto new_clustered_failed;
+			}
+
+			dict_mem_table_add_col(
+				indexed_table, heap,
+				field->field_name,
+				col_type,
+				dtype_form_prtype(field_type, charset_no),
+				col_len);
+		}
+
+		if (add_fts_doc_id) {
+			fts_add_doc_id_column(indexed_table, heap);
+			indexed_table->fts->doc_col = fts_doc_id_col;
+			ut_ad(fts_doc_id_col == altered_table->s->fields);
+		} else if (indexed_table->fts) {
+			indexed_table->fts->doc_col = fts_doc_id_col;
 		}
 
-		trx->table_id = indexed_table->id;
+		error = row_create_table_for_mysql(indexed_table, trx, false);
+
+		switch (error) {
+			dict_table_t*	temp_table;
+		case DB_SUCCESS:
+			/* We need to bump up the table ref count and
+			before we can use it we need to open the
+			table. The new_table must be in the data
+			dictionary cache, because we are still holding
+			the dict_sys->mutex. */
+			ut_ad(mutex_own(&dict_sys->mutex));
+			temp_table = dict_table_open_on_name(
+				indexed_table->name, TRUE, FALSE,
+				DICT_ERR_IGNORE_NONE);
+			ut_a(indexed_table == temp_table);
+			/* n_ref_count must be 1, because purge cannot
+			be executing on this very table as we are
+			holding dict_operation_lock X-latch. */
+			DBUG_ASSERT(indexed_table->n_ref_count == 1);
+			break;
+		case DB_TABLESPACE_EXISTS:
+			my_error(ER_TABLESPACE_EXISTS, MYF(0),
+				 new_table_name);
+			goto new_clustered_failed;
+		case DB_DUPLICATE_KEY:
+			my_error(HA_ERR_TABLE_EXIST, MYF(0),
+				 altered_table->s->table_name.str);
+			goto new_clustered_failed;
+		default:
+			my_error_innodb(error, table_name, flags);
+		new_clustered_failed:
+			DBUG_ASSERT(trx != user_trx);
+			trx_rollback_to_savepoint(trx, NULL);
+
+			ut_ad(user_table->n_ref_count == 1);
+
+			online_retry_drop_indexes_with_trx(user_table, trx);
+
+			goto err_exit;
+		}
+
+		if (ha_alter_info->handler_flags
+		    & Alter_inplace_info::ADD_COLUMN) {
+
+			add_cols = dtuple_create(
+				heap, dict_table_get_n_cols(indexed_table));
+
+			dict_table_copy_types(add_cols, indexed_table);
+		}
+
+		col_map = innobase_build_col_map(
+			ha_alter_info, altered_table, old_table,
+			indexed_table, user_table,
+			add_cols, heap);
+	} else {
+		DBUG_ASSERT(!innobase_need_rebuild(ha_alter_info));
+
+		if (!indexed_table->fts
+		    && innobase_fulltext_exist(altered_table->s)) {
+			indexed_table->fts = fts_create(indexed_table);
+			indexed_table->fts->doc_col = fts_doc_id_col;
+		}
 	}
 
+	/* Assign table_id, so that no table id of
+	fts_create_index_tables() will be written to the undo logs. */
+	DBUG_ASSERT(indexed_table->id != 0);
+	trx->table_id = indexed_table->id;
+
 	/* Create the indexes in SYS_INDEXES and load into dictionary. */
 
-	for (num_created = 0; num_created < num_of_idx; num_created++) {
+	for (ulint num_created = 0; num_created < n_add_index; num_created++) {
 
-		index[num_created] = row_merge_create_index(
+		add_index[num_created] = row_merge_create_index(
 			trx, indexed_table, &index_defs[num_created]);
 
-		if (!index[num_created]) {
+		add_key_nums[num_created] = index_defs[num_created].key_number;
+
+		if (!add_index[num_created]) {
 			error = trx->error_state;
+			DBUG_ASSERT(error != DB_SUCCESS);
 			goto error_handling;
 		}
 
-		if (index[num_created]->type & DICT_FTS) {
-			fts_index = index[num_created];
-			fts_create_index_tables(trx, fts_index);
+		if (add_index[num_created]->type & DICT_FTS) {
+			DBUG_ASSERT(num_fts_index);
+			DBUG_ASSERT(!fts_index);
+			DBUG_ASSERT(add_index[num_created]->type == DICT_FTS);
+			fts_index = add_index[num_created];
+		}
 
+		/* If only online ALTER TABLE operations have been
+		requested, allocate a modification log. If the table
+		will be locked anyway, the modification
+		log is unnecessary. When rebuilding the table
+		(new_clustered), we will allocate the log for the
+		clustered index of the old table, later. */
+		if (new_clustered
+		    || locked
+		    || user_table->ibd_file_missing
+		    || dict_table_is_discarded(user_table)) {
+			/* No need to allocate a modification log. */
+			ut_ad(!add_index[num_created]->online_log);
+		} else if (add_index[num_created]->type & DICT_FTS) {
+			/* Fulltext indexes are not covered
+			by a modification log. */
+		} else {
+			DBUG_EXECUTE_IF("innodb_OOM_prepare_inplace_alter",
+					error = DB_OUT_OF_MEMORY;
+					goto error_handling;);
+			rw_lock_x_lock(&add_index[num_created]->lock);
+			bool ok = row_log_allocate(add_index[num_created],
+						   NULL, true, NULL, NULL);
+			rw_lock_x_unlock(&add_index[num_created]->lock);
+
+			if (!ok) {
+				error = DB_OUT_OF_MEMORY;
+				goto error_handling;
+			}
 		}
 	}
 
-	/* create FTS_DOC_ID_INDEX on the Doc ID column on the table */
-	if (fts_add_doc_idx) {
-		index[num_of_idx] = innobase_create_fts_doc_id_idx(
-					       indexed_table, trx, heap);
-		/* FTS_DOC_ID_INDEX is internal defined new index */
-		num_of_idx++;
-		num_created++;
+	ut_ad(new_clustered == (indexed_table != user_table));
+
+	DBUG_EXECUTE_IF("innodb_OOM_prepare_inplace_alter",
+			error = DB_OUT_OF_MEMORY;
+			goto error_handling;);
+
+	if (new_clustered && !locked) {
+		/* Allocate a log for online table rebuild. */
+		dict_index_t* clust_index = dict_table_get_first_index(
+			user_table);
+
+		rw_lock_x_lock(&clust_index->lock);
+		bool ok = row_log_allocate(
+			clust_index, indexed_table,
+			!(ha_alter_info->handler_flags
+			  & Alter_inplace_info::ADD_PK_INDEX),
+			add_cols, col_map);
+		rw_lock_x_unlock(&clust_index->lock);
+
+		if (!ok) {
+			error = DB_OUT_OF_MEMORY;
+			goto error_handling;
+		}
+
+		/* Assign a consistent read view for
+		row_merge_read_clustered_index(). */
+		trx_assign_read_view(user_trx);
 	}
 
-	if (num_fts_index) {
+	if (fts_index) {
+		/* Ensure that the dictionary operation mode will
+		not change while creating the auxiliary tables. */
+		trx_dict_op_t	op = trx_get_dict_operation(trx);
+
+#ifdef UNIV_DEBUG
+		switch (op) {
+		case TRX_DICT_OP_NONE:
+			break;
+		case TRX_DICT_OP_TABLE:
+		case TRX_DICT_OP_INDEX:
+			goto op_ok;
+		}
+		ut_error;
+op_ok:
+#endif /* UNIV_DEBUG */
+		ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+		ut_ad(mutex_own(&dict_sys->mutex));
+#ifdef UNIV_SYNC_DEBUG
+		ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
 		DICT_TF2_FLAG_SET(indexed_table, DICT_TF2_FTS);
 
+		/* This function will commit the transaction and reset
+		the trx_t::dict_operation flag on success. */
+
+		error = fts_create_index_tables(trx, fts_index);
+
+		DBUG_EXECUTE_IF("innodb_test_fail_after_fts_index_table",
+				error = DB_LOCK_WAIT_TIMEOUT;
+				goto error_handling;);
+
+		if (error != DB_SUCCESS) {
+			goto error_handling;
+		}
+
+		trx_start_for_ddl(trx, op);
+
 		if (!indexed_table->fts
 		    || ib_vector_size(indexed_table->fts->indexes) == 0) {
-			fts_create_common_tables(trx, indexed_table,
-						 prebuilt->table->name, TRUE);
+			error = fts_create_common_tables(
+				trx, indexed_table, user_table->name, TRUE);
+
+			DBUG_EXECUTE_IF("innodb_test_fail_after_fts_common_table",
+					error = DB_LOCK_WAIT_TIMEOUT;
+					goto error_handling;);
+
+			if (error != DB_SUCCESS) {
+				goto error_handling;
+			}
 
 			indexed_table->fts->fts_status |= TABLE_DICT_LOCKED;
-			innobase_fts_load_stopword(
-				indexed_table, trx, ha_thd());
+
+			error = innobase_fts_load_stopword(
+				indexed_table, trx, user_thd)
+				? DB_SUCCESS : DB_ERROR;
 			indexed_table->fts->fts_status &= ~TABLE_DICT_LOCKED;
-		}
 
-		if (new_primary && prebuilt->table->fts) {
-			indexed_table->fts->doc_col = prebuilt->table->fts->doc_col;
+			if (error != DB_SUCCESS) {
+				goto error_handling;
+			}
 		}
+
+		ut_ad(trx_get_dict_operation(trx) == op);
 	}
 
-	ut_ad(error == DB_SUCCESS);
+	DBUG_ASSERT(error == DB_SUCCESS);
 
 	/* Commit the data dictionary transaction in order to release
 	the table locks on the system tables.  This means that if
@@ -1278,633 +3000,2212 @@ ha_innobase::add_index(
 	trx_commit_for_mysql(trx);
 
 	row_mysql_unlock_data_dictionary(trx);
-	dict_locked = FALSE;
+	dict_locked = false;
 
 	ut_a(trx->lock.n_active_thrs == 0);
 
-	if (UNIV_UNLIKELY(new_primary)) {
-		/* A primary key is to be built.  Acquire an exclusive
-		table lock also on the table that is being created. */
-		ut_ad(indexed_table != prebuilt->table);
-
-		error = row_merge_lock_table(prebuilt->trx, indexed_table,
-					     LOCK_X);
-
-		if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+error_handling:
+	/* After an error, remove all those index definitions from the
+	dictionary which were defined. */
 
-			goto error_handling;
-		}
+	switch (error) {
+	case DB_SUCCESS:
+		ut_a(!dict_locked);
+
+		ut_d(mutex_enter(&dict_sys->mutex));
+		ut_d(dict_table_check_for_dup_indexes(
+			     user_table, CHECK_PARTIAL_OK));
+		ut_d(mutex_exit(&dict_sys->mutex));
+		ha_alter_info->handler_ctx = new ha_innobase_inplace_ctx(
+			user_trx, add_index, add_key_nums, n_add_index,
+			drop_index, n_drop_index,
+			drop_foreign, n_drop_foreign,
+			add_foreign, n_add_foreign,
+			!locked, heap, trx, indexed_table, col_map,
+			add_autoinc_col,
+			ha_alter_info->create_info->auto_increment_value,
+			autoinc_col_max_value,
+			add_cols);
+		DBUG_RETURN(false);
+	case DB_TABLESPACE_EXISTS:
+		my_error(ER_TABLESPACE_EXISTS, MYF(0), "(unknown)");
+		break;
+	case DB_DUPLICATE_KEY:
+		my_error(ER_DUP_KEY, MYF(0), "SYS_INDEXES");
+		break;
+	default:
+		my_error_innodb(error, table_name, user_table->flags);
 	}
 
-	/* Read the clustered index of the table and build indexes
-	based on this information using temporary files and merge sort. */
-	error = row_merge_build_indexes(prebuilt->trx,
-					prebuilt->table, indexed_table,
-					index, num_of_idx, table);
+error_handled:
 
-error_handling:
-
-	/* After an error, remove all those index definitions from the
-	dictionary which were defined. */
+	user_trx->error_info = NULL;
+	trx->error_state = DB_SUCCESS;
 
 	if (!dict_locked) {
 		row_mysql_lock_data_dictionary(trx);
-		dict_locked = TRUE;
 	}
 
-	switch (error) {
-	case DB_SUCCESS:
-		ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
+	if (new_clustered) {
+		if (indexed_table != user_table) {
 
-		*add = new ha_innobase_add_index(
-			table, key_info, num_of_keys, indexed_table);
+			if (DICT_TF2_FLAG_IS_SET(indexed_table, DICT_TF2_FTS)) {
+				innobase_drop_fts_index_table(
+					indexed_table, trx);
+			}
 
-		dict_table_close(prebuilt->table, dict_locked);
-		break;
+			dict_table_close(indexed_table, TRUE, FALSE);
 
-	case DB_TOO_BIG_RECORD:
-		my_error(HA_ERR_TO_BIG_ROW, MYF(0));
-		goto error_exit;
-	case DB_PRIMARY_KEY_IS_NULL:
-		my_error(ER_PRIMARY_CANT_HAVE_NULL, MYF(0));
-		/* fall through */
-	case DB_DUPLICATE_KEY:
-		if (fts_add_doc_idx
-		    && prebuilt->trx->error_key_num == num_of_idx - 1) {
-			prebuilt->trx->error_key_num = ULINT_UNDEFINED;
-		}
-error_exit:
-		prebuilt->trx->error_info = NULL;
-		/* fall through */
-	default:
-		dict_table_close(prebuilt->table, dict_locked);
+#ifdef UNIV_DDL_DEBUG
+			/* Nobody should have initialized the stats of the
+			newly created table yet. When this is the case, we
+			know that it has not been added for background stats
+			gathering. */
+			ut_a(!indexed_table->stat_initialized);
+#endif /* UNIV_DDL_DEBUG */
 
-		trx->error_state = DB_SUCCESS;
+			row_merge_drop_table(trx, indexed_table);
 
-		if (new_primary) {
-			if (indexed_table != prebuilt->table) {
-				dict_table_close(indexed_table, dict_locked);
-				row_merge_drop_table(trx, indexed_table);
+			/* Free the log for online table rebuild, if
+			one was allocated. */
+
+			dict_index_t* clust_index = dict_table_get_first_index(
+				user_table);
+
+			rw_lock_x_lock(&clust_index->lock);
+
+			if (clust_index->online_log) {
+				ut_ad(!locked);
+				row_log_abort_sec(clust_index);
+				clust_index->online_status
+					= ONLINE_INDEX_COMPLETE;
 			}
-		} else {
-			row_merge_drop_indexes(trx, indexed_table,
-					       index, num_created);
+
+			rw_lock_x_unlock(&clust_index->lock);
 		}
+
+		trx_commit_for_mysql(trx);
+		/* n_ref_count must be 1, because purge cannot
+		be executing on this very table as we are
+		holding dict_operation_lock X-latch. */
+		DBUG_ASSERT(user_table->n_ref_count == 1 || !locked);
+
+		online_retry_drop_indexes_with_trx(user_table, trx);
+	} else {
+		ut_ad(indexed_table == user_table);
+		row_merge_drop_indexes(trx, user_table, TRUE);
+		trx_commit_for_mysql(trx);
+	}
+
+	ut_d(dict_table_check_for_dup_indexes(user_table, CHECK_ALL_COMPLETE));
+	ut_ad(!user_table->drop_aborted);
+
+err_exit:
+	/* Clear the to_be_dropped flag in the data dictionary cache. */
+	for (ulint i = 0; i < n_drop_index; i++) {
+		DBUG_ASSERT(*drop_index[i]->name != TEMP_INDEX_PREFIX);
+		DBUG_ASSERT(drop_index[i]->to_be_dropped);
+		drop_index[i]->to_be_dropped = 0;
 	}
 
-	ut_ad(!new_primary || prebuilt->table->n_ref_count == 1);
-	trx_commit_for_mysql(trx);
-	ut_ad(dict_locked);
 	row_mysql_unlock_data_dictionary(trx);
+
 	trx_free_for_mysql(trx);
 	mem_heap_free(heap);
 
-	if (prebuilt->trx) {
-		trx_commit_for_mysql(prebuilt->trx);
-	}
+	trx_commit_for_mysql(user_trx);
 
 	/* There might be work for utility threads.*/
 	srv_active_wake_master_thread();
 
-	DBUG_RETURN(convert_error_code_to_mysql(error, prebuilt->table->flags,
-						user_thd));
+	DBUG_RETURN(true);
 }
 
-/*******************************************************************//**
-Finalize or undo add_index().
-@return	0 or error number */
+/* Check whether an index is needed for the foreign key constraint.
+If so, if it is dropped, is there an equivalent index can play its role.
+@return true if the index is needed and can't be dropped */
+static __attribute__((warn_unused_result))
+bool
+innobase_check_foreign_key_index(
+/*=============================*/
+	Alter_inplace_info*	ha_alter_info,	/*!< in: Structure describing
+						changes to be done by ALTER
+						TABLE */
+	dict_index_t*		index,		/*!< in: index to check */
+	dict_table_t*		indexed_table,	/*!< in: table that owns the
+						foreign keys */
+	trx_t*			trx,		/*!< in/out: transaction */
+	dict_foreign_t**	drop_fk,	/*!< in: Foreign key constraints
+						to drop */
+	ulint			n_drop_fk)	/*!< in: Number of foreign keys
+						to drop */
+{
+	dict_foreign_t*	foreign;
+
+	ut_ad(!index->to_be_dropped);
+
+	/* Check if the index is referenced. */
+	foreign = dict_table_get_referenced_constraint(indexed_table, index);
+
+	ut_ad(!foreign || indexed_table
+	      == foreign->referenced_table);
+
+	if (foreign
+	    && !dict_foreign_find_index(
+		    indexed_table,
+		    foreign->referenced_col_names,
+		    foreign->n_fields, index,
+		    /*check_charsets=*/TRUE,
+		    /*check_null=*/FALSE)
+	    && !innobase_find_equiv_index(
+		    foreign->referenced_col_names,
+		    foreign->n_fields,
+		    ha_alter_info->key_info_buffer,
+		    ha_alter_info->index_add_buffer,
+		    ha_alter_info->index_add_count)
+	    ) {
+		trx->error_info = index;
+		return(true);
+	}
+
+	/* Check if this index references some
+	other table */
+	foreign = dict_table_get_foreign_constraint(
+		indexed_table, index);
+
+	ut_ad(!foreign || indexed_table
+	      == foreign->foreign_table);
+
+	if (foreign
+	    && !innobase_dropping_foreign(
+		    foreign, drop_fk, n_drop_fk)
+	    && !dict_foreign_find_index(
+		    indexed_table,
+		    foreign->foreign_col_names,
+		    foreign->n_fields, index,
+		    /*check_charsets=*/TRUE,
+		    /*check_null=*/FALSE)
+	    && !innobase_find_equiv_index(
+		    foreign->foreign_col_names,
+		    foreign->n_fields,
+		    ha_alter_info->key_info_buffer,
+		    ha_alter_info->index_add_buffer,
+		    ha_alter_info->index_add_count)
+	    ) {
+		trx->error_info = index;
+		return(true);
+	}
+
+	return(false);
+}
+
+/** Allows InnoDB to update internal structures with concurrent
+writes blocked (provided that check_if_supported_inplace_alter()
+did not return HA_ALTER_INPLACE_NO_LOCK).
+This will be invoked before inplace_alter_table().
+
+@param altered_table	TABLE object for new version of table.
+@param ha_alter_info	Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+
+@retval true		Failure
+@retval false		Success
+*/
 UNIV_INTERN
-int
-ha_innobase::final_add_index(
-/*=========================*/
-	handler_add_index*	add_arg,/*!< in: context from add_index() */
-	bool			commit)	/*!< in: true=commit, false=rollback */
+bool
+ha_innobase::prepare_inplace_alter_table(
+/*=====================================*/
+	TABLE*			altered_table,
+	Alter_inplace_info*	ha_alter_info)
 {
-	ha_innobase_add_index*	add;
-	trx_t*			trx;
-	int			err	= 0;
+	dict_index_t**	drop_index;	/*!< Index to be dropped */
+	ulint		n_drop_index;	/*!< Number of indexes to drop */
+	dict_foreign_t**drop_fk;	/*!< Foreign key constraints to drop */
+	ulint		n_drop_fk;	/*!< Number of foreign keys to drop */
+	dict_foreign_t**add_fk = NULL;	/*!< Foreign key constraints to drop */
+	ulint		n_add_fk;	/*!< Number of foreign keys to drop */
+	dict_table_t*	indexed_table;	/*!< Table where indexes are created */
+	mem_heap_t*     heap;
+	int		error;
+	ulint		flags;
+	ulint		flags2;
+	ulint		max_col_len;
+	ulint		add_autoinc_col_no	= ULINT_UNDEFINED;
+	ulonglong	autoinc_col_max_value	= 0;
+	ulint		fts_doc_col_no		= ULINT_UNDEFINED;
+	bool		add_fts_doc_id		= false;
+	bool		add_fts_doc_id_idx	= false;
+
+	DBUG_ENTER("prepare_inplace_alter_table");
+	DBUG_ASSERT(!ha_alter_info->handler_ctx);
+	DBUG_ASSERT(ha_alter_info->create_info);
+
+	if (srv_read_only_mode) {
+		DBUG_RETURN(false);
+	}
 
-	DBUG_ENTER("ha_innobase::final_add_index");
+	MONITOR_ATOMIC_INC(MONITOR_PENDING_ALTER_TABLE);
 
-	ut_ad(add_arg);
-	add = static_cast<class ha_innobase_add_index*>(add_arg);
+#ifdef UNIV_DEBUG
+	for (dict_index_t* index = dict_table_get_first_index(prebuilt->table);
+	     index;
+	     index = dict_table_get_next_index(index)) {
+		ut_ad(!index->to_be_dropped);
+	}
+#endif /* UNIV_DEBUG */
 
-	/* Create a background transaction for the operations on
-	the data dictionary tables. */
-	trx = innobase_trx_allocate(user_thd);
-	trx_start_if_not_started(trx);
+	ut_d(mutex_enter(&dict_sys->mutex));
+	ut_d(dict_table_check_for_dup_indexes(
+		     prebuilt->table, CHECK_ABORTED_OK));
+	ut_d(mutex_exit(&dict_sys->mutex));
 
-	/* Flag this transaction as a dictionary operation, so that
-	the data dictionary will be locked in crash recovery. */
-	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+	if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
+		/* Nothing to do */
+		goto func_exit;
+	}
 
-	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
-	or lock waits can happen in it during an index create operation. */
-	row_mysql_lock_data_dictionary(trx);
+	if (ha_alter_info->handler_flags
+	    == Alter_inplace_info::CHANGE_CREATE_OPTION
+	    && !innobase_need_rebuild(ha_alter_info)) {
+		goto func_exit;
+	}
 
-	if (add->indexed_table != prebuilt->table) {
-		ulint	error;
+	if (ha_alter_info->handler_flags
+	    & Alter_inplace_info::CHANGE_CREATE_OPTION) {
+		if (const char* invalid_opt = create_options_are_invalid(
+			    user_thd, altered_table,
+			    ha_alter_info->create_info,
+			    prebuilt->table->space != 0)) {
+			my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
+				 table_type(), invalid_opt);
+			goto err_exit_no_heap;
+		}
+	}
 
-		/* We copied the table (new_primary). */
-		if (commit) {
-			mem_heap_t*	heap;
-			char*		tmp_name;
+	/* Check if any index name is reserved. */
+	if (innobase_index_name_is_reserved(
+		    user_thd,
+		    ha_alter_info->key_info_buffer,
+		    ha_alter_info->key_count)) {
+err_exit_no_heap:
+		DBUG_ASSERT(prebuilt->trx->dict_operation_lock_mode == 0);
+		if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) {
+			online_retry_drop_indexes(prebuilt->table, user_thd);
+		}
+		DBUG_RETURN(true);
+	}
 
-			heap = mem_heap_create(1024);
+	indexed_table = prebuilt->table;
 
-			/* A new primary key was defined for the table
-			and there was no error at this point. We can
-			now rename the old table as a temporary table,
-			rename the new temporary table as the old
-			table and drop the old table. */
-			tmp_name = innobase_create_temporary_tablename(
-				heap, '2', prebuilt->table->name);
+	/* Check that index keys are sensible */
+	error = innobase_check_index_keys(ha_alter_info, indexed_table);
 
-			error = row_merge_rename_tables(
-				prebuilt->table, add->indexed_table,
-				tmp_name, trx);
+	if (error) {
+		goto err_exit_no_heap;
+	}
 
-			ut_a(prebuilt->table->n_ref_count == 1);
+	/* Prohibit renaming a column to something that the table
+	already contains. */
+	if (ha_alter_info->handler_flags
+	    & Alter_inplace_info::ALTER_COLUMN_NAME) {
+		List_iterator_fast<Create_field> cf_it(
+			ha_alter_info->alter_info->create_list);
 
-			switch (error) {
-			case DB_TABLESPACE_ALREADY_EXISTS:
-			case DB_DUPLICATE_KEY:
-				ut_a(add->indexed_table->n_ref_count == 0);
-				innobase_convert_tablename(tmp_name);
-				my_error(HA_ERR_TABLE_EXIST, MYF(0), tmp_name);
-				err = HA_ERR_TABLE_EXIST;
-				break;
-			default:
-				err = convert_error_code_to_mysql(
-					error, prebuilt->table->flags,
-					user_thd);
-				break;
+		for (Field** fp = table->field; *fp; fp++) {
+			if (!((*fp)->flags & FIELD_IS_RENAMED)) {
+				continue;
 			}
 
-			mem_heap_free(heap);
+			const char* name = 0;
+
+			cf_it.rewind();
+			while (Create_field* cf = cf_it++) {
+				if (cf->field == *fp) {
+					name = cf->field_name;
+					goto check_if_ok_to_rename;
+				}
+			}
+
+			ut_error;
+check_if_ok_to_rename:
+			/* Prohibit renaming a column from FTS_DOC_ID
+			if full-text indexes exist. */
+			if (!my_strcasecmp(system_charset_info,
+					   (*fp)->field_name,
+					   FTS_DOC_ID_COL_NAME)
+			    && innobase_fulltext_exist(altered_table->s)) {
+				my_error(ER_INNODB_FT_WRONG_DOCID_COLUMN,
+					 MYF(0), name);
+				goto err_exit_no_heap;
+			}
+
+			/* Prohibit renaming a column to an internal column. */
+			const char*	s = prebuilt->table->col_names;
+			unsigned j;
+			/* Skip user columns.
+			MySQL should have checked these already.
+			We want to allow renaming of c1 to c2, c2 to c1. */
+			for (j = 0; j < table->s->fields; j++) {
+				s += strlen(s) + 1;
+			}
+
+			for (; j < prebuilt->table->n_def; j++) {
+				if (!my_strcasecmp(
+					    system_charset_info, name, s)) {
+					my_error(ER_WRONG_COLUMN_NAME, MYF(0),
+						 s);
+					goto err_exit_no_heap;
+				}
+
+				s += strlen(s) + 1;
+			}
 		}
+	}
 
-		if (!commit || err) {
-			dict_table_close(add->indexed_table, TRUE);
-			error = row_merge_drop_table(trx, add->indexed_table);
-			trx_commit_for_mysql(prebuilt->trx);
-		} else {
-			dict_table_t*	old_table = prebuilt->table;
-			trx_commit_for_mysql(prebuilt->trx);
-			row_prebuilt_free(prebuilt, TRUE);
-			error = row_merge_drop_table(trx, old_table);
-			prebuilt = row_create_prebuilt(add->indexed_table,
-				0 /* XXX Do we know the mysql_row_len here?
-				Before the addition of this parameter to
-				row_create_prebuilt() the mysql_row_len
-				member was left 0 (from zalloc) in the
-				prebuilt object. */);
+	if (!innobase_table_flags(altered_table,
+				  ha_alter_info->create_info,
+				  user_thd,
+				  srv_file_per_table
+				  || indexed_table->space != 0,
+				  &flags, &flags2)) {
+		goto err_exit_no_heap;
+	}
+
+	max_col_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags);
+
+	/* Check each index's column length to make sure they do not
+	exceed limit */
+	for (ulint i = 0; i < ha_alter_info->index_add_count; i++) {
+		const KEY* key = &ha_alter_info->key_info_buffer[
+			ha_alter_info->index_add_buffer[i]];
+
+		if (key->flags & HA_FULLTEXT) {
+			/* The column length does not matter for
+			fulltext search indexes. But, UNIQUE
+			fulltext indexes are not supported. */
+			DBUG_ASSERT(!(key->flags & HA_NOSAME));
+			DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
+				      & ~(HA_FULLTEXT
+					  | HA_PACK_KEY
+					  | HA_BINARY_PACK_KEY)));
+			continue;
 		}
 
-		err = convert_error_code_to_mysql(
-			error, prebuilt->table->flags, user_thd);
+		if (innobase_check_column_length(max_col_len, key)) {
+			my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
+				 max_col_len);
+			goto err_exit_no_heap;
+		}
 	}
 
-	if (add->indexed_table == prebuilt->table
-	    || DICT_TF2_FLAG_IS_SET(prebuilt->table, DICT_TF2_FTS_ADD_DOC_ID)) {
-		/* We created secondary indexes (!new_primary) or create full
-		text index and added a new Doc ID column, we will need to
-		rename the secondary index on the Doc ID column to its
-		official index name.. */
+	/* Check existing index definitions for too-long column
+	prefixes as well, in case max_col_len shrunk. */
+	for (const dict_index_t* index
+		     = dict_table_get_first_index(indexed_table);
+	     index;
+	     index = dict_table_get_next_index(index)) {
+		if (index->type & DICT_FTS) {
+			DBUG_ASSERT(index->type == DICT_FTS
+				    || (index->type & DICT_CORRUPT));
+			continue;
+		}
 
-		if (commit) {
-			err = convert_error_code_to_mysql(
-				row_merge_rename_indexes(trx, prebuilt->table),
-				prebuilt->table->flags, user_thd);
+		for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
+			const dict_field_t* field
+				= dict_index_get_nth_field(index, i);
+			if (field->prefix_len > max_col_len) {
+				my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
+					 max_col_len);
+				goto err_exit_no_heap;
+			}
 		}
+	}
+
+	n_drop_index = 0;
+	n_drop_fk = 0;
+
+	if (ha_alter_info->handler_flags
+	    & Alter_inplace_info::DROP_FOREIGN_KEY) {
+		DBUG_ASSERT(ha_alter_info->alter_info->drop_list.elements > 0);
 
-		if (!commit || err) {
-			dict_index_t*	index;
-			dict_index_t*	next_index;
+		heap = mem_heap_create(1024);
 
-			for (index = dict_table_get_first_index(
-				     prebuilt->table);
-			     index; index = next_index) {
+		drop_fk = static_cast<dict_foreign_t**>(
+			mem_heap_alloc(
+				heap,
+				ha_alter_info->alter_info->drop_list.elements
+				* sizeof(dict_foreign_t*)));
 
-				next_index = dict_table_get_next_index(index);
+		List_iterator<Alter_drop> drop_it(
+			ha_alter_info->alter_info->drop_list);
 
-				if (*index->name == TEMP_INDEX_PREFIX) {
-					row_merge_drop_index(
-						index, prebuilt->table, trx);
+		while (Alter_drop* drop = drop_it++) {
+			if (drop->type != Alter_drop::FOREIGN_KEY) {
+				continue;
+			}
+
+			for (dict_foreign_t* foreign = UT_LIST_GET_FIRST(
+				     prebuilt->table->foreign_list);
+			     foreign != NULL;
+			     foreign = UT_LIST_GET_NEXT(
+				     foreign_list, foreign)) {
+				const char* fid = strchr(foreign->id, '/');
+
+				DBUG_ASSERT(fid);
+				/* If no database/ prefix was present in
+				the FOREIGN KEY constraint name, compare
+				to the full constraint name. */
+				fid = fid ? fid + 1 : foreign->id;
+
+				if (!my_strcasecmp(system_charset_info,
+						   fid, drop->name)) {
+					drop_fk[n_drop_fk++] = foreign;
+					goto found_fk;
 				}
 			}
+
+			my_error(ER_CANT_DROP_FIELD_OR_KEY, MYF(0),
+				 drop->name);
+			goto err_exit;
+found_fk:
+			continue;
 		}
 
-		DICT_TF2_FLAG_UNSET(prebuilt->table, DICT_TF2_FTS_ADD_DOC_ID);
+		DBUG_ASSERT(n_drop_fk > 0);
+		DBUG_ASSERT(n_drop_fk
+			    == ha_alter_info->alter_info->drop_list.elements);
+	} else {
+		drop_fk = NULL;
+		heap = NULL;
 	}
 
-	/* If index is successfully built, we will need to rebuild index
-	translation table. Set valid index entry count in the translation
-	table to zero. */
-	if (err == 0 && commit) {
-		ibool		new_primary;
-		dict_index_t*	index;
-		dict_index_t*	next_index;
-		ibool		new_fts = FALSE;
-		dict_index_t*	primary;
+	if (ha_alter_info->index_drop_count) {
+		dict_index_t*	drop_primary = NULL;
 
-		new_primary = !my_strcasecmp(
-			system_charset_info, add->key_info[0].name, "PRIMARY");
-
-		primary = dict_table_get_first_index(add->indexed_table);
-
-		if (!new_primary) {
-			new_primary = !my_strcasecmp(
-				system_charset_info, add->key_info[0].name,
-				primary->name);
+		DBUG_ASSERT(ha_alter_info->handler_flags
+			    & (Alter_inplace_info::DROP_INDEX
+			       | Alter_inplace_info::DROP_UNIQUE_INDEX
+			       | Alter_inplace_info::DROP_PK_INDEX));
+		/* Check which indexes to drop. */
+		if (!heap) {
+			heap = mem_heap_create(1024);
+		}
+		drop_index = static_cast<dict_index_t**>(
+			mem_heap_alloc(
+				heap, (ha_alter_info->index_drop_count + 1)
+				* sizeof *drop_index));
+
+		for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
+			const KEY*	key
+				= ha_alter_info->index_drop_buffer[i];
+			dict_index_t*	index
+				= dict_table_get_index_on_name_and_min_id(
+					indexed_table, key->name);
+
+			if (!index) {
+				push_warning_printf(
+					user_thd,
+					Sql_condition::WARN_LEVEL_WARN,
+					HA_ERR_WRONG_INDEX,
+					"InnoDB could not find key "
+					"with name %s", key->name);
+			} else {
+				ut_ad(!index->to_be_dropped);
+				if (!dict_index_is_clust(index)) {
+					drop_index[n_drop_index++] = index;
+				} else {
+					drop_primary = index;
+				}
+			}
 		}
 
-		share->idx_trans_tbl.index_count = 0;
+		/* If all FULLTEXT indexes were removed, drop an
+		internal FTS_DOC_ID_INDEX as well, unless it exists in
+		the table. */
+
+		if (innobase_fulltext_exist(table->s)
+		    && !innobase_fulltext_exist(altered_table->s)
+		    && !DICT_TF2_FLAG_IS_SET(
+			indexed_table, DICT_TF2_FTS_HAS_DOC_ID)) {
+			dict_index_t*	fts_doc_index
+				= dict_table_get_index_on_name(
+					indexed_table, FTS_DOC_ID_INDEX_NAME);
+
+			// Add some fault tolerance for non-debug builds.
+			if (fts_doc_index == NULL) {
+				goto check_if_can_drop_indexes;
+			}
 
-		if (new_primary) {
-			for (index = primary; index; index = next_index) {
+			DBUG_ASSERT(!fts_doc_index->to_be_dropped);
+
+			for (uint i = 0; i < table->s->keys; i++) {
+				if (!my_strcasecmp(
+					    system_charset_info,
+					    FTS_DOC_ID_INDEX_NAME,
+					    table->s->key_info[i].name)) {
+					/* The index exists in the MySQL
+					data dictionary. Do not drop it,
+					even though it is no longer needed
+					by InnoDB fulltext search. */
+					goto check_if_can_drop_indexes;
+				}
+			}
 
-				next_index = dict_table_get_next_index(index);
+			drop_index[n_drop_index++] = fts_doc_index;
+		}
 
-				if (index->type & DICT_FTS) {
-					fts_add_index(index,
-						      add->indexed_table);
-					new_fts = TRUE;
+check_if_can_drop_indexes:
+		/* Check if the indexes can be dropped. */
+
+		/* Prevent a race condition between DROP INDEX and
+		CREATE TABLE adding FOREIGN KEY constraints. */
+		row_mysql_lock_data_dictionary(prebuilt->trx);
+
+		if (prebuilt->trx->check_foreigns) {
+			for (uint i = 0; i < n_drop_index; i++) {
+			     dict_index_t*	index = drop_index[i];
+
+				if (innobase_check_foreign_key_index(
+					ha_alter_info, index, indexed_table,
+					prebuilt->trx, drop_fk, n_drop_fk)) {
+					row_mysql_unlock_data_dictionary(
+						prebuilt->trx);
+					prebuilt->trx->error_info = index;
+					print_error(HA_ERR_DROP_INDEX_FK,
+						    MYF(0));
+					goto err_exit;
 				}
 			}
+
+			/* If a primary index is dropped, need to check
+			any depending foreign constraints get affected */
+			if (drop_primary
+			    && innobase_check_foreign_key_index(
+				ha_alter_info, drop_primary, indexed_table,
+				prebuilt->trx, drop_fk, n_drop_fk)) {
+				row_mysql_unlock_data_dictionary(prebuilt->trx);
+				print_error(HA_ERR_DROP_INDEX_FK, MYF(0));
+				goto err_exit;
+			}
+		}
+
+		if (!n_drop_index) {
+			drop_index = NULL;
 		} else {
-			ulint		i;
-			for (i = 0; i < add->num_of_keys; i++) {
-				if (add->key_info[i].flags & HA_FULLTEXT) {
-					dict_index_t*	fts_index;
-
-					fts_index =
-						dict_table_get_index_on_name(
-							prebuilt->table,
-							 add->key_info[i].name);
-
-					ut_ad(fts_index);
-					fts_add_index(fts_index,
-						      prebuilt->table);
-					new_fts = TRUE;
+			/* Flag all indexes that are to be dropped. */
+			for (ulint i = 0; i < n_drop_index; i++) {
+				ut_ad(!drop_index[i]->to_be_dropped);
+				drop_index[i]->to_be_dropped = 1;
+			}
+		}
+
+		row_mysql_unlock_data_dictionary(prebuilt->trx);
+	} else {
+		drop_index = NULL;
+	}
+
+	n_add_fk = 0;
+
+	if (ha_alter_info->handler_flags
+	    & Alter_inplace_info::ADD_FOREIGN_KEY) {
+		ut_ad(!prebuilt->trx->check_foreigns);
+
+		if (!heap) {
+			heap = mem_heap_create(1024);
+		}
+
+		add_fk = static_cast<dict_foreign_t**>(
+			mem_heap_zalloc(
+				heap,
+				ha_alter_info->alter_info->key_list.elements
+				* sizeof(dict_foreign_t*)));
+
+		if (!innobase_get_foreign_key_info(
+			ha_alter_info, table_share, prebuilt->table,
+			add_fk, &n_add_fk, heap, prebuilt->trx)) {
+err_exit:
+			if (n_drop_index) {
+				row_mysql_lock_data_dictionary(prebuilt->trx);
+
+				/* Clear the to_be_dropped flags, which might
+				have been set at this point. */
+				for (ulint i = 0; i < n_drop_index; i++) {
+					DBUG_ASSERT(*drop_index[i]->name
+						    != TEMP_INDEX_PREFIX);
+					drop_index[i]->to_be_dropped = 0;
 				}
+
+				row_mysql_unlock_data_dictionary(prebuilt->trx);
 			}
+
+			if (heap) {
+				mem_heap_free(heap);
+			}
+			goto err_exit_no_heap;
 		}
+	}
 
-		if (new_fts) {
-			fts_optimize_add_table(prebuilt->table);
+	if (!(ha_alter_info->handler_flags & INNOBASE_INPLACE_CREATE)) {
+		if (heap) {
+			ha_alter_info->handler_ctx
+				= new ha_innobase_inplace_ctx(
+					prebuilt->trx, 0, 0, 0,
+					drop_index, n_drop_index,
+					drop_fk, n_drop_fk,
+					add_fk, n_add_fk,
+					ha_alter_info->online,
+					heap, 0, indexed_table, 0,
+					ULINT_UNDEFINED, 0, 0, 0);
 		}
+
+func_exit:
+		DBUG_ASSERT(prebuilt->trx->dict_operation_lock_mode == 0);
+		if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) {
+			online_retry_drop_indexes(prebuilt->table, user_thd);
+		}
+		DBUG_RETURN(false);
 	}
 
-	trx_commit_for_mysql(trx);
-	if (prebuilt->trx) {
-		trx_commit_for_mysql(prebuilt->trx);
+	/* If we are to build a full-text search index, check whether
+	the table already has a DOC ID column.  If not, we will need to
+	add a Doc ID hidden column and rebuild the primary index */
+	if (innobase_fulltext_exist(altered_table->s)) {
+		ulint	doc_col_no;
+
+		if (!innobase_fts_check_doc_id_col(
+			    prebuilt->table, altered_table, &fts_doc_col_no)) {
+			fts_doc_col_no = altered_table->s->fields;
+			add_fts_doc_id = true;
+			add_fts_doc_id_idx = true;
+
+			push_warning_printf(
+				user_thd,
+				Sql_condition::WARN_LEVEL_WARN,
+				HA_ERR_WRONG_INDEX,
+				"InnoDB rebuilding table to add column "
+				FTS_DOC_ID_COL_NAME);
+		} else if (fts_doc_col_no == ULINT_UNDEFINED) {
+			goto err_exit;
+		}
+
+		switch (innobase_fts_check_doc_id_index(
+				prebuilt->table, altered_table, &doc_col_no)) {
+		case FTS_NOT_EXIST_DOC_ID_INDEX:
+			add_fts_doc_id_idx = true;
+			break;
+		case FTS_INCORRECT_DOC_ID_INDEX:
+			my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0),
+				 FTS_DOC_ID_INDEX_NAME);
+			goto err_exit;
+		case FTS_EXIST_DOC_ID_INDEX:
+			DBUG_ASSERT(doc_col_no == fts_doc_col_no
+				    || doc_col_no == ULINT_UNDEFINED
+				    || (ha_alter_info->handler_flags
+					& (Alter_inplace_info::ALTER_COLUMN_ORDER
+					   | Alter_inplace_info::DROP_COLUMN
+					   | Alter_inplace_info::ADD_COLUMN)));
+		}
 	}
 
-	ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
+	/* See if an AUTO_INCREMENT column was added. */
+	uint i = 0;
+	List_iterator_fast<Create_field> cf_it(
+		ha_alter_info->alter_info->create_list);
+	while (const Create_field* new_field = cf_it++) {
+		const Field*	field;
 
-	ut_a(fts_check_cached_index(prebuilt->table));
+		DBUG_ASSERT(i < altered_table->s->fields);
 
-	row_mysql_unlock_data_dictionary(trx);
+		for (uint old_i = 0; table->field[old_i]; old_i++) {
+			if (new_field->field == table->field[old_i]) {
+				goto found_col;
+			}
+		}
 
-	trx_free_for_mysql(trx);
+		/* This is an added column. */
+		DBUG_ASSERT(!new_field->field);
+		DBUG_ASSERT(ha_alter_info->handler_flags
+			    & Alter_inplace_info::ADD_COLUMN);
 
-	/* There might be work for utility threads.*/
-	srv_active_wake_master_thread();
+		field = altered_table->field[i];
 
-	delete add;
-	DBUG_RETURN(err);
+		DBUG_ASSERT((MTYP_TYPENR(field->unireg_check)
+			     == Field::NEXT_NUMBER)
+			    == !!(field->flags & AUTO_INCREMENT_FLAG));
+
+		if (field->flags & AUTO_INCREMENT_FLAG) {
+			if (add_autoinc_col_no != ULINT_UNDEFINED) {
+				/* This should have been blocked earlier. */
+				ut_ad(0);
+				my_error(ER_WRONG_AUTO_KEY, MYF(0));
+				goto err_exit;
+			}
+			add_autoinc_col_no = i;
+
+			autoinc_col_max_value = innobase_get_int_col_max_value(
+				field);
+		}
+found_col:
+		i++;
+	}
+
+	DBUG_ASSERT(user_thd == prebuilt->trx->mysql_thd);
+	DBUG_RETURN(prepare_inplace_alter_table_dict(
+			    ha_alter_info, altered_table, table,
+			    prebuilt->table, prebuilt->trx,
+			    table_share->table_name.str,
+			    flags, flags2,
+			    heap, drop_index, n_drop_index,
+			    drop_fk, n_drop_fk, add_fk, n_add_fk,
+			    fts_doc_col_no, add_autoinc_col_no,
+			    autoinc_col_max_value, add_fts_doc_id,
+			    add_fts_doc_id_idx));
 }
-/*******************************************************************//**
-Prepare to drop some indexes of a table.
-@return	0 or error number */
+
+/** Alter the table structure in-place with operations
+specified using Alter_inplace_info.
+The level of concurrency allowed during this operation depends
+on the return value from check_if_supported_inplace_alter().
+
+@param altered_table	TABLE object for new version of table.
+@param ha_alter_info	Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+
+@retval true		Failure
+@retval false		Success
+*/
 UNIV_INTERN
-int
-ha_innobase::prepare_drop_index(
-/*============================*/
-	TABLE*	in_table,	/*!< in: Table where indexes are dropped */
-	uint*	key_num,	/*!< in: Key nums to be dropped */
-	uint	num_of_keys)	/*!< in: Number of keys to be dropped */
+bool
+ha_innobase::inplace_alter_table(
+/*=============================*/
+	TABLE*			altered_table,
+	Alter_inplace_info*	ha_alter_info)
 {
-	trx_t*		trx;
-	int		err = 0;
-	uint		n_key;
+	dberr_t	error;
 
-	DBUG_ENTER("ha_innobase::prepare_drop_index");
-	ut_ad(table);
-	ut_ad(key_num);
-	ut_ad(num_of_keys);
-	if (srv_created_new_raw || srv_force_recovery) {
-		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+	DBUG_ENTER("inplace_alter_table");
+
+	if (srv_read_only_mode) {
+		DBUG_RETURN(false);
 	}
 
-	update_thd();
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+	ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
 
-	trx_search_latch_release_if_reserved(prebuilt->trx);
-	trx = prebuilt->trx;
+	DEBUG_SYNC(user_thd, "innodb_inplace_alter_table_enter");
 
-	/* Test and mark all the indexes to be dropped */
+	if (!(ha_alter_info->handler_flags & INNOBASE_INPLACE_CREATE)) {
+ok_exit:
+		DEBUG_SYNC(user_thd, "innodb_after_inplace_alter_table");
+		DBUG_RETURN(false);
+	}
 
-	row_mysql_lock_data_dictionary(trx);
-	ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
+	if (ha_alter_info->handler_flags
+	    == Alter_inplace_info::CHANGE_CREATE_OPTION
+	    && !innobase_need_rebuild(ha_alter_info)) {
+		goto ok_exit;
+	}
 
-	/* Check that none of the indexes have previously been flagged
-	for deletion. */
-	{
-		const dict_index_t*	index
-			= dict_table_get_first_index(prebuilt->table);
-		do {
-			ut_a(!index->to_be_dropped);
-			index = dict_table_get_next_index(index);
-		} while (index);
+	ha_innobase_inplace_ctx*	ctx
+		= static_cast<ha_innobase_inplace_ctx*>
+		(ha_alter_info->handler_ctx);
+
+	DBUG_ASSERT(ctx);
+	DBUG_ASSERT(ctx->trx);
+
+	if (prebuilt->table->ibd_file_missing
+	    || dict_table_is_discarded(prebuilt->table)) {
+		goto all_done;
+	}
+
+	/* Read the clustered index of the table and build
+	indexes based on this information using temporary
+	files and merge sort. */
+	DBUG_EXECUTE_IF("innodb_OOM_inplace_alter",
+			error = DB_OUT_OF_MEMORY; goto oom;);
+	error = row_merge_build_indexes(
+		prebuilt->trx,
+		prebuilt->table, ctx->indexed_table,
+		ctx->online,
+		ctx->add, ctx->add_key_numbers, ctx->num_to_add,
+		altered_table, ctx->add_cols, ctx->col_map,
+		ctx->add_autoinc, ctx->sequence);
+#ifndef DBUG_OFF
+oom:
+#endif /* !DBUG_OFF */
+	if (error == DB_SUCCESS && ctx->online
+	    && ctx->indexed_table != prebuilt->table) {
+		DEBUG_SYNC_C("row_log_table_apply1_before");
+		error = row_log_table_apply(
+			ctx->thr, prebuilt->table, altered_table);
 	}
 
-	for (n_key = 0; n_key < num_of_keys; n_key++) {
-		const KEY*	key;
-		dict_index_t*	index;
+	DEBUG_SYNC_C("inplace_after_index_build");
 
-		key = table->key_info + key_num[n_key];
-		index = dict_table_get_index_on_name_and_min_id(
-			prebuilt->table, key->name);
+	DBUG_EXECUTE_IF("create_index_fail",
+			error = DB_DUPLICATE_KEY;);
 
-		if (!index) {
-			sql_print_error("InnoDB could not find key n:o %u "
-					"with name %s for table %s",
-					key_num[n_key],
-					key ? key->name : "NULL",
-					prebuilt->table->name);
+	/* After an error, remove all those index definitions
+	from the dictionary which were defined. */
 
-			err = HA_ERR_KEY_NOT_FOUND;
-			goto func_exit;
+	switch (error) {
+		KEY*	dup_key;
+	all_done:
+	case DB_SUCCESS:
+		ut_d(mutex_enter(&dict_sys->mutex));
+		ut_d(dict_table_check_for_dup_indexes(
+			     prebuilt->table, CHECK_PARTIAL_OK));
+		ut_d(mutex_exit(&dict_sys->mutex));
+		/* prebuilt->table->n_ref_count can be anything here,
+		given that we hold at most a shared lock on the table. */
+		goto ok_exit;
+	case DB_DUPLICATE_KEY:
+		if (prebuilt->trx->error_key_num == ULINT_UNDEFINED
+		    || ha_alter_info->key_count == 0) {
+			/* This should be the hidden index on
+			FTS_DOC_ID, or there is no PRIMARY KEY in the
+			table. Either way, we should be seeing and
+			reporting a bogus duplicate key error. */
+			dup_key = NULL;
+		} else {
+			DBUG_ASSERT(prebuilt->trx->error_key_num
+				    < ha_alter_info->key_count);
+			dup_key = &ha_alter_info->key_info_buffer[
+				prebuilt->trx->error_key_num];
 		}
+		print_keydup_error(altered_table, dup_key, MYF(0));
+		break;
+	case DB_ONLINE_LOG_TOO_BIG:
+		DBUG_ASSERT(ctx->online);
+		my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0),
+			 (prebuilt->trx->error_key_num == ULINT_UNDEFINED)
+			 ? FTS_DOC_ID_INDEX_NAME
+			 : ha_alter_info->key_info_buffer[
+				 prebuilt->trx->error_key_num].name);
+		break;
+	case DB_INDEX_CORRUPT:
+		my_error(ER_INDEX_CORRUPT, MYF(0),
+			 (prebuilt->trx->error_key_num == ULINT_UNDEFINED)
+			 ? FTS_DOC_ID_INDEX_NAME
+			 : ha_alter_info->key_info_buffer[
+				 prebuilt->trx->error_key_num].name);
+		break;
+	default:
+		my_error_innodb(error,
+				table_share->table_name.str,
+				prebuilt->table->flags);
+	}
 
-		/* Refuse to drop the clustered index.  It would be
-		better to automatically generate a clustered index,
-		but mysql_alter_table() will call this method only
-		after ha_innobase::add_index(). */
+	/* prebuilt->table->n_ref_count can be anything here, given
+	that we hold at most a shared lock on the table. */
+	prebuilt->trx->error_info = NULL;
+	ctx->trx->error_state = DB_SUCCESS;
 
-		if (dict_index_is_clust(index)) {
-			my_error(ER_REQUIRES_PRIMARY_KEY, MYF(0));
-			err = -1;
-			goto func_exit;
-		}
+	DBUG_RETURN(true);
+}
 
-		rw_lock_x_lock(dict_index_get_lock(index));
-		index->to_be_dropped = TRUE;
-		rw_lock_x_unlock(dict_index_get_lock(index));
+/** Free the modification log for online table rebuild.
+@param table	table that was being rebuilt online */
+static
+void
+innobase_online_rebuild_log_free(
+/*=============================*/
+	dict_table_t*	table)
+{
+	dict_index_t* clust_index = dict_table_get_first_index(table);
+
+	ut_ad(mutex_own(&dict_sys->mutex));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	rw_lock_x_lock(&clust_index->lock);
+
+	if (clust_index->online_log) {
+		ut_ad(dict_index_get_online_status(clust_index)
+		      == ONLINE_INDEX_CREATION);
+		clust_index->online_status = ONLINE_INDEX_COMPLETE;
+		row_log_free(clust_index->online_log);
+		DEBUG_SYNC_C("innodb_online_rebuild_log_free_aborted");
+	}
+
+	DBUG_ASSERT(dict_index_get_online_status(clust_index)
+		    == ONLINE_INDEX_COMPLETE);
+	rw_lock_x_unlock(&clust_index->lock);
+}
+
+/** Rollback a secondary index creation, drop the indexes with
+temparary index prefix
+@param prebuilt		the prebuilt struct
+@param table_share	the TABLE_SHARE
+@param trx		the transaction
+*/
+static
+void
+innobase_rollback_sec_index(
+/*========================*/
+	row_prebuilt_t*		prebuilt,
+	const TABLE_SHARE*	table_share,
+	trx_t*			trx)
+{
+	row_merge_drop_indexes(trx, prebuilt->table, FALSE);
+
+	/* Free the table->fts only if there is no FTS_DOC_ID
+	in the table */
+	if (prebuilt->table->fts
+	    && !DICT_TF2_FLAG_IS_SET(prebuilt->table,
+				     DICT_TF2_FTS_HAS_DOC_ID)
+	    && !innobase_fulltext_exist(table_share)) {
+		fts_free(prebuilt->table);
 	}
+}
 
-	/* If FOREIGN_KEY_CHECKS = 1 you may not drop an index defined
-	for a foreign key constraint because InnoDB requires that both
-	tables contain indexes for the constraint. Such index can
-	be dropped only if FOREIGN_KEY_CHECKS is set to 0.
-	Note that CREATE INDEX id ON table does a CREATE INDEX and
-	DROP INDEX, and we can ignore here foreign keys because a
-	new index for the foreign key has already been created.
+/** Roll back the changes made during prepare_inplace_alter_table()
+and inplace_alter_table() inside the storage engine. Note that the
+allowed level of concurrency during this operation will be the same as
+for inplace_alter_table() and thus might be higher than during
+prepare_inplace_alter_table(). (E.g concurrent writes were blocked
+during prepare, but might not be during commit).
+
+@param ha_alter_info	Data used during in-place alter.
+@param table_share	the TABLE_SHARE
+@param prebuilt		the prebuilt struct
+@retval true		Failure
+@retval false		Success
+*/
+inline
+bool
+rollback_inplace_alter_table(
+/*=========================*/
+	Alter_inplace_info*	ha_alter_info,
+	const TABLE_SHARE*	table_share,
+	row_prebuilt_t*		prebuilt)
+{
+	bool	fail	= false;
 
-	We check for the foreign key constraints after marking the
-	candidate indexes for deletion, because when we check for an
-	equivalent foreign index we don't want to select an index that
-	is later deleted. */
+	ha_innobase_inplace_ctx*	ctx
+		= static_cast<ha_innobase_inplace_ctx*>
+		(ha_alter_info->handler_ctx);
 
-	if (trx->check_foreigns
-	    && thd_sql_command(user_thd) != SQLCOM_CREATE_INDEX) {
-		dict_index_t*	index;
+	DBUG_ENTER("rollback_inplace_alter_table");
 
-		for (index = dict_table_get_first_index(prebuilt->table);
-		     index;
-		     index = dict_table_get_next_index(index)) {
-			dict_foreign_t*	foreign;
+	if (!ctx || !ctx->trx) {
+		/* If we have not started a transaction yet,
+		(almost) nothing has been or needs to be done. */
+		goto func_exit;
+	}
 
-			if (!index->to_be_dropped) {
+	row_mysql_lock_data_dictionary(ctx->trx);
 
-				continue;
+	if (prebuilt->table != ctx->indexed_table) {
+		dberr_t	err;
+		ulint	flags	= ctx->indexed_table->flags;
+
+		/* DML threads can access ctx->indexed_table via the
+		online rebuild log. Free it first. */
+		innobase_online_rebuild_log_free(prebuilt->table);
+
+		/* Since the FTS index specific auxiliary tables has
+		not yet registered with "table->fts" by fts_add_index(),
+		we will need explicitly delete them here */
+		if (DICT_TF2_FLAG_IS_SET(ctx->indexed_table, DICT_TF2_FTS)) {
+
+			err = innobase_drop_fts_index_table(
+				ctx->indexed_table, ctx->trx);
+
+			if (err != DB_SUCCESS) {
+				my_error_innodb(
+					err, table_share->table_name.str,
+					flags);
+				fail = true;
 			}
+		}
 
-			/* Check if the index is referenced. */
-			foreign = dict_table_get_referenced_constraint(
-				prebuilt->table, index);
+		/* Drop the table. */
+		dict_table_close(ctx->indexed_table, TRUE, FALSE);
 
-			if (foreign) {
-index_needed:
-				trx_set_detailed_error(
-					trx,
-					"Index needed in foreign key "
-					"constraint");
+#ifdef UNIV_DDL_DEBUG
+		/* Nobody should have initialized the stats of the
+		newly created table yet. When this is the case, we
+		know that it has not been added for background stats
+		gathering. */
+		ut_a(!ctx->indexed_table->stat_initialized);
+#endif /* UNIV_DDL_DEBUG */
 
-				trx->error_info = index;
+		err = row_merge_drop_table(ctx->trx, ctx->indexed_table);
 
-				err = HA_ERR_DROP_INDEX_FK;
-				break;
-			} else {
-				/* Check if this index references some
-				other table */
-				foreign = dict_table_get_foreign_constraint(
-					prebuilt->table, index);
+		switch (err) {
+		case DB_SUCCESS:
+			break;
+		default:
+			my_error_innodb(err, table_share->table_name.str,
+					flags);
+			fail = true;
+		}
+	} else {
+		DBUG_ASSERT(!(ha_alter_info->handler_flags
+			      & Alter_inplace_info::ADD_PK_INDEX));
 
-				if (foreign) {
-					ut_a(foreign->foreign_index == index);
+		trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX);
 
-					/* Search for an equivalent index that
-					the foreign key constraint could use
-					if this index were to be deleted. */
-					if (!dict_foreign_find_equiv_index(
-						foreign)) {
+		innobase_rollback_sec_index(prebuilt, table_share, ctx->trx);
+	}
 
-						goto index_needed;
-					}
-				}
+	trx_commit_for_mysql(ctx->trx);
+	row_mysql_unlock_data_dictionary(ctx->trx);
+	trx_free_for_mysql(ctx->trx);
+
+
+func_exit:
+#ifndef DBUG_OFF
+	dict_index_t* clust_index = dict_table_get_first_index(
+		prebuilt->table);
+	DBUG_ASSERT(!clust_index->online_log);
+	DBUG_ASSERT(dict_index_get_online_status(clust_index)
+		    == ONLINE_INDEX_COMPLETE);
+#endif /* !DBUG_OFF */
+
+	if (ctx) {
+		if (ctx->num_to_add_fk) {
+			for (ulint i = 0; i < ctx->num_to_add_fk; i++) {
+				dict_foreign_free(ctx->add_fk[i]);
 			}
 		}
-	} else if (thd_sql_command(user_thd) == SQLCOM_CREATE_INDEX) {
-		/* This is a drop of a foreign key constraint index that
-		was created by MySQL when the constraint was added.  MySQL
-		does this when the user creates an index explicitly which
-		can be used in place of the automatically generated index. */
 
-		dict_index_t*	index;
+		if (ctx->num_to_drop) {
+			row_mysql_lock_data_dictionary(prebuilt->trx);
+
+			/* Clear the to_be_dropped flags
+			in the data dictionary cache.
+			The flags may already have been cleared,
+			in case an error was detected in
+			commit_inplace_alter_table(). */
+			for (ulint i = 0; i < ctx->num_to_drop; i++) {
+				dict_index_t*	index = ctx->drop[i];
+				DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+
+				index->to_be_dropped = 0;
+			}
+
+			row_mysql_unlock_data_dictionary(prebuilt->trx);
+		}
+	}
+
+	trx_commit_for_mysql(prebuilt->trx);
+	srv_active_wake_master_thread();
+	MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
+	DBUG_RETURN(fail);
+}
+
+/** Drop a FOREIGN KEY constraint.
+@param table_share	the TABLE_SHARE
+@param trx		data dictionary transaction
+@param foreign		the foreign key constraint, will be freed
+@retval true		Failure
+@retval false		Success */
+static __attribute__((nonnull, warn_unused_result))
+bool
+innobase_drop_foreign(
+/*==================*/
+	const TABLE_SHARE*	table_share,
+	trx_t*			trx,
+	dict_foreign_t*		foreign)
+{
+	DBUG_ENTER("innobase_drop_foreign");
+
+	DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+	ut_ad(mutex_own(&dict_sys->mutex));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	/* Drop the constraint from the data dictionary. */
+	static const char sql[] =
+		"PROCEDURE DROP_FOREIGN_PROC () IS\n"
+		"BEGIN\n"
+		"DELETE FROM SYS_FOREIGN WHERE ID=:id;\n"
+		"DELETE FROM SYS_FOREIGN_COLS WHERE ID=:id;\n"
+		"END;\n";
+
+	dberr_t		error;
+	pars_info_t*	info;
+
+	info = pars_info_create();
+	pars_info_add_str_literal(info, "id", foreign->id);
+
+	trx->op_info = "dropping foreign key constraint from dictionary";
+	error = que_eval_sql(info, sql, FALSE, trx);
+	trx->op_info = "";
+
+	DBUG_EXECUTE_IF("ib_drop_foreign_error",
+			error = DB_OUT_OF_FILE_SPACE;);
+
+	if (error != DB_SUCCESS) {
+		my_error_innodb(error, table_share->table_name.str, 0);
+		trx->error_state = DB_SUCCESS;
+		DBUG_RETURN(true);
+	}
+
+	/* Drop the foreign key constraint from the data dictionary cache. */
+	dict_foreign_remove_from_cache(foreign);
+	DBUG_RETURN(false);
+}
+
+/** Rename a column.
+@param table_share	the TABLE_SHARE
+@param prebuilt		the prebuilt struct
+@param trx		data dictionary transaction
+@param nth_col		0-based index of the column
+@param from		old column name
+@param to		new column name
+@param new_clustered	whether the table has been rebuilt
+@retval true		Failure
+@retval false		Success */
+static __attribute__((nonnull, warn_unused_result))
+bool
+innobase_rename_column(
+/*===================*/
+	const TABLE_SHARE*	table_share,
+	row_prebuilt_t*		prebuilt,
+	trx_t*			trx,
+	ulint			nth_col,
+	const char*		from,
+	const char*		to,
+	bool			new_clustered)
+{
+	pars_info_t*	info;
+	dberr_t		error;
 
-		for (index = dict_table_get_first_index(prebuilt->table);
-		     index;
-		     index = dict_table_get_next_index(index)) {
-			dict_foreign_t*	foreign;
+	DBUG_ENTER("innobase_rename_column");
 
-			if (!index->to_be_dropped) {
+	DBUG_ASSERT(trx_get_dict_operation(trx)
+		    == new_clustered ? TRX_DICT_OP_TABLE : TRX_DICT_OP_INDEX);
+	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+	ut_ad(mutex_own(&dict_sys->mutex));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
 
+	if (new_clustered) {
+		goto rename_foreign;
+	}
+
+	info = pars_info_create();
+
+	pars_info_add_ull_literal(info, "tableid", prebuilt->table->id);
+	pars_info_add_int4_literal(info, "nth", nth_col);
+	pars_info_add_str_literal(info, "old", from);
+	pars_info_add_str_literal(info, "new", to);
+
+	trx->op_info = "renaming column in SYS_COLUMNS";
+
+	error = que_eval_sql(
+		info,
+		"PROCEDURE RENAME_SYS_COLUMNS_PROC () IS\n"
+		"BEGIN\n"
+		"UPDATE SYS_COLUMNS SET NAME=:new\n"
+		"WHERE TABLE_ID=:tableid AND NAME=:old\n"
+		"AND POS=:nth;\n"
+		"END;\n",
+		FALSE, trx);
+
+	DBUG_EXECUTE_IF("ib_rename_column_error",
+			error = DB_OUT_OF_FILE_SPACE;);
+
+	if (error != DB_SUCCESS) {
+err_exit:
+		my_error_innodb(error, table_share->table_name.str, 0);
+		trx->error_state = DB_SUCCESS;
+		trx->op_info = "";
+		DBUG_RETURN(true);
+	}
+
+	trx->op_info = "renaming column in SYS_FIELDS";
+
+	for (dict_index_t* index = dict_table_get_first_index(prebuilt->table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+
+		for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
+			if (strcmp(dict_index_get_nth_field(index, i)->name,
+				   from)) {
 				continue;
 			}
 
-			/* Check if this index references some other table */
-			foreign = dict_table_get_foreign_constraint(
-				prebuilt->table, index);
+			info = pars_info_create();
 
-			if (foreign == NULL) {
+			pars_info_add_ull_literal(info, "indexid", index->id);
+			pars_info_add_int4_literal(info, "nth", i);
+			pars_info_add_str_literal(info, "old", from);
+			pars_info_add_str_literal(info, "new", to);
 
-				continue;
+			error = que_eval_sql(
+				info,
+				"PROCEDURE RENAME_SYS_FIELDS_PROC () IS\n"
+				"BEGIN\n"
+
+				"UPDATE SYS_FIELDS SET COL_NAME=:new\n"
+				"WHERE INDEX_ID=:indexid AND COL_NAME=:old\n"
+				"AND POS=:nth;\n"
+
+				/* Try again, in case there is a prefix_len
+				encoded in SYS_FIELDS.POS */
+
+				"UPDATE SYS_FIELDS SET COL_NAME=:new\n"
+				"WHERE INDEX_ID=:indexid AND COL_NAME=:old\n"
+				"AND POS>=65536*:nth AND POS<65536*(:nth+1);\n"
+
+				"END;\n",
+				FALSE, trx);
+
+			if (error != DB_SUCCESS) {
+				goto err_exit;
 			}
+		}
+	}
 
-			ut_a(foreign->foreign_index == index);
+rename_foreign:
+	trx->op_info = "renaming column in SYS_FOREIGN_COLS";
 
-			/* Search for an equivalent index that the
-			foreign key constraint could use if this index
-			were to be deleted. */
+	for (dict_foreign_t* foreign = UT_LIST_GET_FIRST(
+		     prebuilt->table->foreign_list);
+	     foreign != NULL;
+	     foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
+		for (unsigned i = 0; i < foreign->n_fields; i++) {
+			if (strcmp(foreign->foreign_col_names[i], from)) {
+				continue;
+			}
 
-			if (!dict_foreign_find_equiv_index(foreign)) {
-				trx_set_detailed_error(
-					trx,
-					"Index needed in foreign key "
-					"constraint");
+			info = pars_info_create();
+
+			pars_info_add_str_literal(info, "id", foreign->id);
+			pars_info_add_int4_literal(info, "nth", i);
+			pars_info_add_str_literal(info, "old", from);
+			pars_info_add_str_literal(info, "new", to);
+
+			error = que_eval_sql(
+				info,
+				"PROCEDURE RENAME_SYS_FOREIGN_F_PROC () IS\n"
+				"BEGIN\n"
+				"UPDATE SYS_FOREIGN_COLS\n"
+				"SET FOR_COL_NAME=:new\n"
+				"WHERE ID=:id AND POS=:nth\n"
+				"AND FOR_COL_NAME=:old;\n"
+				"END;\n",
+				FALSE, trx);
+
+			if (error != DB_SUCCESS) {
+				goto err_exit;
+			}
+		}
+	}
 
-				trx->error_info = foreign->foreign_index;
+	for (dict_foreign_t* foreign = UT_LIST_GET_FIRST(
+		     prebuilt->table->referenced_list);
+	     foreign != NULL;
+	     foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
+		for (unsigned i = 0; i < foreign->n_fields; i++) {
+			if (strcmp(foreign->referenced_col_names[i], from)) {
+				continue;
+			}
 
-				err = HA_ERR_DROP_INDEX_FK;
-				break;
+			info = pars_info_create();
+
+			pars_info_add_str_literal(info, "id", foreign->id);
+			pars_info_add_int4_literal(info, "nth", i);
+			pars_info_add_str_literal(info, "old", from);
+			pars_info_add_str_literal(info, "new", to);
+
+			error = que_eval_sql(
+				info,
+				"PROCEDURE RENAME_SYS_FOREIGN_R_PROC () IS\n"
+				"BEGIN\n"
+				"UPDATE SYS_FOREIGN_COLS\n"
+				"SET REF_COL_NAME=:new\n"
+				"WHERE ID=:id AND POS=:nth\n"
+				"AND REF_COL_NAME=:old;\n"
+				"END;\n",
+				FALSE, trx);
+
+			if (error != DB_SUCCESS) {
+				goto err_exit;
 			}
 		}
 	}
 
-func_exit:
-	if (err) {
-		/* Undo our changes since there was some sort of error. */
-		dict_index_t*	index
-			= dict_table_get_first_index(prebuilt->table);
+	trx->op_info = "";
+	if (!new_clustered) {
+		/* Rename the column in the data dictionary cache. */
+		dict_mem_table_col_rename(prebuilt->table, nth_col, from, to);
+	}
+	DBUG_RETURN(false);
+}
+
+/** Rename columns.
+@param ha_alter_info	Data used during in-place alter.
+@param new_clustered	whether the table has been rebuilt
+@param table		the TABLE
+@param table_share	the TABLE_SHARE
+@param prebuilt		the prebuilt struct
+@param trx		data dictionary transaction
+@retval true		Failure
+@retval false		Success */
+static __attribute__((nonnull, warn_unused_result))
+bool
+innobase_rename_columns(
+/*====================*/
+	Alter_inplace_info*	ha_alter_info,
+	bool			new_clustered,
+	const TABLE*		table,
+	const TABLE_SHARE*	table_share,
+	row_prebuilt_t*		prebuilt,
+	trx_t*			trx)
+{
+	List_iterator_fast<Create_field> cf_it(
+		ha_alter_info->alter_info->create_list);
+	uint i = 0;
+
+	for (Field** fp = table->field; *fp; fp++, i++) {
+		if (!((*fp)->flags & FIELD_IS_RENAMED)) {
+			continue;
+		}
+
+		cf_it.rewind();
+		while (Create_field* cf = cf_it++) {
+			if (cf->field == *fp) {
+				if (innobase_rename_column(
+					    table_share,
+					    prebuilt, trx, i,
+					    cf->field->field_name,
+					    cf->field_name, new_clustered)) {
+					return(true);
+				}
+				goto processed_field;
+			}
+		}
 
-		do {
-			rw_lock_x_lock(dict_index_get_lock(index));
-			index->to_be_dropped = FALSE;
-			rw_lock_x_unlock(dict_index_get_lock(index));
-			index = dict_table_get_next_index(index);
-		} while (index);
+		ut_error;
+processed_field:
+		continue;
 	}
 
-	ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
-	row_mysql_unlock_data_dictionary(trx);
+	return(false);
+}
 
-	DBUG_RETURN(err);
+/** Undo the in-memory addition of foreign key on table->foreign_list
+and table->referenced_list.
+@param ctx		saved alter table context
+@param table		the foreign table */
+static __attribute__((nonnull))
+void
+innobase_undo_add_fk(
+/*=================*/
+	ha_innobase_inplace_ctx*	ctx,
+	dict_table_t*			fk_table)
+{
+	for (ulint i = 0; i < ctx->num_to_add_fk; i++) {
+		UT_LIST_REMOVE(
+			foreign_list,
+			fk_table->foreign_list,
+			ctx->add_fk[i]);
+
+		if (ctx->add_fk[i]->referenced_table) {
+			UT_LIST_REMOVE(
+				referenced_list,
+				ctx->add_fk[i]->referenced_table
+				->referenced_list,
+				ctx->add_fk[i]);
+		}
+	}
 }
 
-/*******************************************************************//**
-Drop the indexes that were passed to a successful prepare_drop_index().
-@return	0 or error number */
+/** Commit or rollback the changes made during
+prepare_inplace_alter_table() and inplace_alter_table() inside
+the storage engine. Note that the allowed level of concurrency
+during this operation will be the same as for
+inplace_alter_table() and thus might be higher than during
+prepare_inplace_alter_table(). (E.g concurrent writes were
+blocked during prepare, but might not be during commit).
+@param altered_table	TABLE object for new version of table.
+@param ha_alter_info	Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+@param commit		true => Commit, false => Rollback.
+@retval true		Failure
+@retval false		Success
+*/
 UNIV_INTERN
-int
-ha_innobase::final_drop_index(
-/*==========================*/
-	TABLE*	        iin_table)	/*!< in: Table where indexes
-					are dropped */
+bool
+ha_innobase::commit_inplace_alter_table(
+/*====================================*/
+	TABLE*			altered_table,
+	Alter_inplace_info*	ha_alter_info,
+	bool			commit)
 {
-	dict_index_t*	index;		/*!< Index to be dropped */
-	trx_t*		trx;		/*!< Transaction */
-	int		err;
-
-	DBUG_ENTER("ha_innobase::final_drop_index");
-	ut_ad(table);
+	ha_innobase_inplace_ctx*	ctx
+		= static_cast<ha_innobase_inplace_ctx*>
+		(ha_alter_info->handler_ctx);
+	trx_t*				trx;
+	trx_t*				fk_trx = NULL;
+	int				err	= 0;
+	bool				new_clustered;
+	dict_table_t*			fk_table = NULL;
+	ulonglong			max_autoinc;
+
+	ut_ad(!srv_read_only_mode);
+
+	DBUG_ENTER("commit_inplace_alter_table");
+
+	DEBUG_SYNC_C("innodb_commit_inplace_alter_table_enter");
+
+	DEBUG_SYNC_C("innodb_commit_inplace_alter_table_wait");
+
+	if (!commit) {
+		/* A rollback is being requested. So far we may at
+		most have created some indexes. If any indexes were to
+		be dropped, they would actually be dropped in this
+		method if commit=true. */
+		DBUG_RETURN(rollback_inplace_alter_table(
+				    ha_alter_info, table_share, prebuilt));
+	}
 
-	if (srv_created_new_raw || srv_force_recovery) {
-		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+	if (!altered_table->found_next_number_field) {
+		/* There is no AUTO_INCREMENT column in the table
+		after the ALTER operation. */
+		max_autoinc = 0;
+	} else if (ctx && ctx->add_autoinc != ULINT_UNDEFINED) {
+		/* An AUTO_INCREMENT column was added. Get the last
+		value from the sequence, which may be based on a
+		supplied AUTO_INCREMENT value. */
+		max_autoinc = ctx->sequence.last();
+	} else if ((ha_alter_info->handler_flags
+		    & Alter_inplace_info::CHANGE_CREATE_OPTION)
+		   && (ha_alter_info->create_info->used_fields
+		       & HA_CREATE_USED_AUTO)) {
+		/* An AUTO_INCREMENT value was supplied, but the table
+		was not rebuilt. Get the user-supplied value. */
+		max_autoinc = ha_alter_info->create_info->auto_increment_value;
+	} else {
+		/* An AUTO_INCREMENT value was not specified.
+		Read the old counter value from the table. */
+		ut_ad(table->found_next_number_field);
+		dict_table_autoinc_lock(prebuilt->table);
+		max_autoinc = dict_table_autoinc_read(prebuilt->table);
+		dict_table_autoinc_unlock(prebuilt->table);
 	}
 
-	update_thd();
+	if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
+		DBUG_ASSERT(!ctx);
+		/* We may want to update table attributes. */
+		goto func_exit;
+	}
 
-	trx_search_latch_release_if_reserved(prebuilt->trx);
 	trx_start_if_not_started_xa(prebuilt->trx);
 
-	/* Create a background transaction for the operations on
-	the data dictionary tables. */
-	trx = innobase_trx_allocate(user_thd);
-	trx_start_if_not_started_xa(trx);
-
-	/* Flag this transaction as a dictionary operation, so that
-	the data dictionary will be locked in crash recovery. */
-	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
-
-	/* Lock the table exclusively, to ensure that no active
-	transaction depends on an index that is being dropped. */
-	err = convert_error_code_to_mysql(
-		row_merge_lock_table(prebuilt->trx, prebuilt->table, LOCK_X),
-		prebuilt->table->flags, user_thd);
-
-	/* Delete corresponding rows from the stats table.
-	Marko advises not to edit both user tables and SYS_* tables in one
-	trx, thus we use prebuilt->trx instead of trx. Because of this the
-	drop from SYS_* and from the stats table cannot happen in one
-	transaction and eventually if a crash occurs below, between
-	trx_commit_for_mysql(trx); which drops the indexes from SYS_* and
-	trx_commit_for_mysql(prebuilt->trx);
-	then an orphaned rows will be left in the stats table. */
-	for (index = dict_table_get_first_index(prebuilt->table);
-	     index != NULL;
-	     index = dict_table_get_next_index(index)) {
+	{
+		/* Exclusively lock the table, to ensure that no other
+		transaction is holding locks on the table while we
+		change the table definition. The MySQL meta-data lock
+		should normally guarantee that no conflicting locks
+		exist. However, FOREIGN KEY constraints checks and any
+		transactions collected during crash recovery could be
+		holding InnoDB locks only, not MySQL locks. */
+		dberr_t error = row_merge_lock_table(
+			prebuilt->trx, prebuilt->table, LOCK_X);
+
+		if (error != DB_SUCCESS) {
+			my_error_innodb(error, table_share->table_name.str, 0);
+			DBUG_RETURN(true);
+		}
 
-		if (index->to_be_dropped) {
+		DEBUG_SYNC(user_thd, "innodb_alter_commit_after_lock_table");
+	}
 
-			enum db_err	ret;
-			char		errstr[1024];
+	if (ctx) {
+		if (ctx->indexed_table != prebuilt->table) {
+			for (dict_index_t* index = dict_table_get_first_index(
+				     ctx->indexed_table);
+			     index;
+			     index = dict_table_get_next_index(index)) {
+				DBUG_ASSERT(dict_index_get_online_status(index)
+					    == ONLINE_INDEX_COMPLETE);
+				DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+				if (dict_index_is_corrupted(index)) {
+					my_error(ER_INDEX_CORRUPT, MYF(0),
+						 index->name);
+					DBUG_RETURN(true);
+				}
+			}
+		} else {
+			for (ulint i = 0; i < ctx->num_to_add; i++) {
+				dict_index_t*	index = ctx->add[i];
+				DBUG_ASSERT(dict_index_get_online_status(index)
+					    == ONLINE_INDEX_COMPLETE);
+				DBUG_ASSERT(*index->name == TEMP_INDEX_PREFIX);
+				if (dict_index_is_corrupted(index)) {
+					/* Report a duplicate key
+					error for the index that was
+					flagged corrupted, most likely
+					because a duplicate value was
+					inserted (directly or by
+					rollback) after
+					ha_innobase::inplace_alter_table()
+					completed. */
+					my_error(ER_DUP_UNKNOWN_IN_INDEX,
+						 MYF(0), index->name + 1);
+					DBUG_RETURN(true);
+				}
+			}
+		}
+	}
 
-			ret = dict_stats_delete_index_stats(
-				index, prebuilt->trx,
-				errstr, sizeof(errstr));
+	if (!ctx || !ctx->trx) {
+		/* Create a background transaction for the operations on
+		the data dictionary tables. */
+		trx = innobase_trx_allocate(user_thd);
 
-			if (ret != DB_SUCCESS) {
-				push_warning(user_thd,
-					     Sql_condition::WARN_LEVEL_WARN,
-					     ER_LOCK_WAIT_TIMEOUT,
-					     errstr);
-			}
+		trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+		new_clustered = false;
+	} else {
+		trx_dict_op_t	op;
+
+		trx = ctx->trx;
+
+		new_clustered = ctx->indexed_table != prebuilt->table;
+
+		op = (new_clustered) ? TRX_DICT_OP_TABLE : TRX_DICT_OP_INDEX;
+
+		trx_start_for_ddl(trx, op);
+	}
+
+	if (new_clustered) {
+		if (prebuilt->table->fts) {
+			ut_ad(!prebuilt->table->fts->add_wq);
+			fts_optimize_remove_table(prebuilt->table);
+		}
+
+		if (ctx->indexed_table->fts) {
+			ut_ad(!ctx->indexed_table->fts->add_wq);
+			fts_optimize_remove_table(ctx->indexed_table);
 		}
 	}
 
+	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
+	or lock waits can happen in it during the data dictionary operation. */
 	row_mysql_lock_data_dictionary(trx);
-	ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
 
-	if (UNIV_UNLIKELY(err)) {
+	/* Wait for background stats processing to stop using the
+	indexes that we are going to drop (if any). */
+	if (ctx) {
+		dict_stats_wait_bg_to_stop_using_tables(
+			prebuilt->table, ctx->indexed_table, trx);
+	}
 
-		/* Unmark the indexes to be dropped. */
-		for (index = dict_table_get_first_index(prebuilt->table);
-		     index; index = dict_table_get_next_index(index)) {
+	/* Final phase of add foreign key processing */
+	if (ctx && ctx->num_to_add_fk > 0) {
+		ulint		highest_id_so_far;
+		dberr_t		error;
+
+		/* If it runs concurrently with create index or table
+		rebuild, we will need a separate trx to do the system
+		table change, since in the case of failure to rebuild/create
+		index, it will need to commit the trx that drops the newly
+		created table/index, while for FK, it needs to rollback
+		the metadata change */
+		if (new_clustered || ctx->num_to_add) {
+			fk_trx = innobase_trx_allocate(user_thd);
 
-			rw_lock_x_lock(dict_index_get_lock(index));
-			index->to_be_dropped = FALSE;
-			rw_lock_x_unlock(dict_index_get_lock(index));
+			trx_start_for_ddl(fk_trx, TRX_DICT_OP_INDEX);
+
+			fk_trx->dict_operation_lock_mode =
+				 trx->dict_operation_lock_mode;
+		} else {
+			fk_trx = trx;
 		}
 
-		goto func_exit;
+		ut_ad(ha_alter_info->handler_flags
+		      & Alter_inplace_info::ADD_FOREIGN_KEY);
+
+		highest_id_so_far = dict_table_get_highest_foreign_id(
+			prebuilt->table);
+
+		highest_id_so_far++;
+
+		fk_table = ctx->indexed_table;
+
+		for (ulint i = 0; i < ctx->num_to_add_fk; i++) {
+
+			/* Get the new dict_table_t */
+			if (new_clustered) {
+				ctx->add_fk[i]->foreign_table
+					= fk_table;
+			}
+
+			/* Add Foreign Key info to in-memory metadata */
+			UT_LIST_ADD_LAST(foreign_list,
+					 fk_table->foreign_list,
+					 ctx->add_fk[i]);
+
+			if (ctx->add_fk[i]->referenced_table) {
+				UT_LIST_ADD_LAST(
+					referenced_list,
+					ctx->add_fk[i]->referenced_table->referenced_list,
+					ctx->add_fk[i]);
+			}
+
+			if (!ctx->add_fk[i]->foreign_index) {
+				ctx->add_fk[i]->foreign_index
+					= dict_foreign_find_index(
+					fk_table,
+					ctx->add_fk[i]->foreign_col_names,
+					ctx->add_fk[i]->n_fields, NULL,
+					TRUE, FALSE);
+
+				ut_ad(ctx->add_fk[i]->foreign_index);
+
+				if (!innobase_check_fk_option(
+					ctx->add_fk[i])) {
+					my_error(ER_FK_INCORRECT_OPTION,
+						 MYF(0),
+						 table_share->table_name.str,
+						 ctx->add_fk[i]->id);
+					goto undo_add_fk;
+				}
+			}
+
+			/* System table change */
+			error = dict_create_add_foreign_to_dictionary(
+				&highest_id_so_far, prebuilt->table,
+				ctx->add_fk[i], fk_trx);
+
+			DBUG_EXECUTE_IF(
+				"innodb_test_cannot_add_fk_system",
+				error = DB_ERROR;);
+
+			if (error != DB_SUCCESS) {
+				my_error(ER_FK_FAIL_ADD_SYSTEM, MYF(0),
+					 ctx->add_fk[i]->id);
+				goto undo_add_fk;
+			}
+		}
+
+		/* Make sure the tables are moved to non-lru side of
+		dictionary list */
+		error = dict_load_foreigns(prebuilt->table->name, FALSE, TRUE);
+
+		if (error != DB_SUCCESS) {
+			my_error(ER_CANNOT_ADD_FOREIGN, MYF(0));
+
+undo_add_fk:
+			err = -1;
+
+			if (new_clustered) {
+				goto drop_new_clustered;
+			} else if (ctx->num_to_add > 0) {
+				ut_ad(trx != fk_trx);
+
+				innobase_rollback_sec_index(
+					prebuilt, table_share, trx);
+				innobase_undo_add_fk(ctx, fk_table);
+				trx_rollback_for_mysql(fk_trx);
+
+				goto trx_commit;
+			} else {
+				goto trx_rollback;
+			}
+		}
+	}
+
+	if (new_clustered) {
+		dberr_t	error;
+		char*	tmp_name;
+
+		/* Clear the to_be_dropped flag in the data dictionary. */
+		for (ulint i = 0; i < ctx->num_to_drop; i++) {
+			dict_index_t*	index = ctx->drop[i];
+			DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+			DBUG_ASSERT(index->to_be_dropped);
+			index->to_be_dropped = 0;
+		}
+
+		/* We copied the table. Any indexes that were
+		requested to be dropped were not created in the copy
+		of the table. Apply any last bit of the rebuild log
+		and then rename the tables. */
+
+		if (ctx->online) {
+			DEBUG_SYNC_C("row_log_table_apply2_before");
+			error = row_log_table_apply(
+				ctx->thr, prebuilt->table, altered_table);
+
+			switch (error) {
+				KEY*	dup_key;
+			case DB_SUCCESS:
+				break;
+			case DB_DUPLICATE_KEY:
+				if (prebuilt->trx->error_key_num
+				    == ULINT_UNDEFINED) {
+					/* This should be the hidden index on
+					FTS_DOC_ID. */
+					dup_key = NULL;
+				} else {
+					DBUG_ASSERT(
+						prebuilt->trx->error_key_num
+						< ha_alter_info->key_count);
+					dup_key = &ha_alter_info
+						->key_info_buffer[
+							prebuilt->trx
+							->error_key_num];
+				}
+				print_keydup_error(altered_table, dup_key, MYF(0));
+				break;
+			case DB_ONLINE_LOG_TOO_BIG:
+				my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0),
+					 ha_alter_info->key_info_buffer[0]
+					 .name);
+				break;
+			case DB_INDEX_CORRUPT:
+				my_error(ER_INDEX_CORRUPT, MYF(0),
+					 (prebuilt->trx->error_key_num
+					  == ULINT_UNDEFINED)
+					 ? FTS_DOC_ID_INDEX_NAME
+					 : ha_alter_info->key_info_buffer[
+						 prebuilt->trx->error_key_num]
+					 .name);
+				break;
+			default:
+				my_error_innodb(error,
+						table_share->table_name.str,
+						prebuilt->table->flags);
+			}
+
+			if (error != DB_SUCCESS) {
+				err = -1;
+				goto drop_new_clustered;
+			}
+		}
+
+		if ((ha_alter_info->handler_flags
+		     & Alter_inplace_info::ALTER_COLUMN_NAME)
+		    && innobase_rename_columns(ha_alter_info, true, table,
+					       table_share, prebuilt, trx)) {
+			err = -1;
+			goto drop_new_clustered;
+		}
+
+		/* A new clustered index was defined for the table
+		and there was no error at this point. We can
+		now rename the old table as a temporary table,
+		rename the new temporary table as the old
+		table and drop the old table. */
+		tmp_name = dict_mem_create_temporary_tablename(
+			ctx->heap, ctx->indexed_table->name,
+			ctx->indexed_table->id);
+
+		/* Rename table will reload and refresh the in-memory
+		foreign key constraint metadata. This is a rename operation
+		in preparing for dropping the old table. Set the table
+		to_be_dropped bit here, so to make sure DML foreign key
+		constraint check does not use the stale dict_foreign_t.
+		This is done because WL#6049 (FK MDL) has not been
+		implemented yet */
+		prebuilt->table->to_be_dropped = true;
+
+		DBUG_EXECUTE_IF("ib_ddl_crash_before_rename",
+				DBUG_SUICIDE(););
+
+		/* The new table must inherit the flag from the
+		"parent" table. */
+		if (dict_table_is_discarded(prebuilt->table)) {
+			ctx->indexed_table->ibd_file_missing = true;
+			ctx->indexed_table->flags2 |= DICT_TF2_DISCARDED;
+		}
+
+		error = row_merge_rename_tables(
+			prebuilt->table, ctx->indexed_table,
+			tmp_name, trx);
+
+		DBUG_EXECUTE_IF("ib_ddl_crash_after_rename",
+				DBUG_SUICIDE(););
+
+		/* n_ref_count must be 1, because purge cannot
+		be executing on this very table as we are
+		holding dict_operation_lock X-latch. */
+		ut_a(prebuilt->table->n_ref_count == 1);
+
+		switch (error) {
+			dict_table_t*	old_table;
+		case DB_SUCCESS:
+			old_table = prebuilt->table;
+
+			DBUG_EXECUTE_IF("ib_ddl_crash_before_commit",
+					DBUG_SUICIDE(););
+
+			trx_commit_for_mysql(prebuilt->trx);
+
+			DBUG_EXECUTE_IF("ib_ddl_crash_after_commit",
+					DBUG_SUICIDE(););
+
+			if (fk_trx) {
+				ut_ad(fk_trx != trx);
+				trx_commit_for_mysql(fk_trx);
+			}
+
+			row_prebuilt_free(prebuilt, TRUE);
+			error = row_merge_drop_table(trx, old_table);
+			prebuilt = row_create_prebuilt(
+				ctx->indexed_table, table->s->reclength);
+			err = 0;
+			break;
+		case DB_TABLESPACE_EXISTS:
+			ut_a(ctx->indexed_table->n_ref_count == 1);
+			my_error(ER_TABLESPACE_EXISTS, MYF(0), tmp_name);
+			err = HA_ERR_TABLESPACE_EXISTS;
+			goto drop_new_clustered;
+		case DB_DUPLICATE_KEY:
+			ut_a(ctx->indexed_table->n_ref_count == 1);
+			my_error(ER_TABLE_EXISTS_ERROR, MYF(0), tmp_name);
+			err = HA_ERR_TABLE_EXIST;
+			goto drop_new_clustered;
+		default:
+			my_error_innodb(error,
+					table_share->table_name.str,
+					prebuilt->table->flags);
+			err = -1;
+
+drop_new_clustered:
+			/* Reset the to_be_dropped bit for the old table,
+			since we are aborting the operation and dropping
+			the new table due to some error conditions */
+			prebuilt->table->to_be_dropped = false;
+
+			/* Need to drop the added foreign key first */
+			if (fk_trx) {
+				ut_ad(fk_trx != trx);
+				innobase_undo_add_fk(ctx, fk_table);
+				trx_rollback_for_mysql(fk_trx);
+			}
+
+			dict_table_close(ctx->indexed_table, TRUE, FALSE);
+
+#ifdef UNIV_DDL_DEBUG
+			/* Nobody should have initialized the stats of the
+			newly created table yet. When this is the case, we
+			know that it has not been added for background stats
+			gathering. */
+			ut_a(!ctx->indexed_table->stat_initialized);
+#endif /* UNIV_DDL_DEBUG */
+
+			row_merge_drop_table(trx, ctx->indexed_table);
+			ctx->indexed_table = NULL;
+			goto trx_commit;
+		}
+	} else if (ctx) {
+		dberr_t	error;
+
+		/* We altered the table in place. */
+		/* Lose the TEMP_INDEX_PREFIX. */
+		for (ulint i = 0; i < ctx->num_to_add; i++) {
+			dict_index_t*	index = ctx->add[i];
+			DBUG_ASSERT(dict_index_get_online_status(index)
+				    == ONLINE_INDEX_COMPLETE);
+			DBUG_ASSERT(*index->name
+				    == TEMP_INDEX_PREFIX);
+			index->name++;
+			error = row_merge_rename_index_to_add(
+				trx, prebuilt->table->id,
+				index->id);
+			if (error != DB_SUCCESS) {
+				sql_print_error(
+					"InnoDB: rename index to add: %lu\n",
+					(ulong) error);
+				DBUG_ASSERT(0);
+			}
+		}
+
+		/* Drop any indexes that were requested to be dropped.
+		Rename them to TEMP_INDEX_PREFIX in the data
+		dictionary first. We do not bother to rename
+		index->name in the dictionary cache, because the index
+		is about to be freed after row_merge_drop_indexes_dict(). */
+
+		for (ulint i = 0; i < ctx->num_to_drop; i++) {
+			dict_index_t*	index = ctx->drop[i];
+			DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+			DBUG_ASSERT(index->table == prebuilt->table);
+			DBUG_ASSERT(index->to_be_dropped);
+
+			error = row_merge_rename_index_to_drop(
+				trx, index->table->id, index->id);
+			if (error != DB_SUCCESS) {
+				sql_print_error(
+					"InnoDB: rename index to drop: %lu\n",
+					(ulong) error);
+				DBUG_ASSERT(0);
+			}
+		}
+	}
+
+	if (err == 0
+	    && (ha_alter_info->handler_flags
+		& Alter_inplace_info::DROP_FOREIGN_KEY)) {
+		DBUG_ASSERT(ctx->num_to_drop_fk > 0);
+		DBUG_ASSERT(ctx->num_to_drop_fk
+			    == ha_alter_info->alter_info->drop_list.elements);
+		for (ulint i = 0; i < ctx->num_to_drop_fk; i++) {
+			DBUG_ASSERT(prebuilt->table
+				    == ctx->drop_fk[i]->foreign_table);
+
+			if (innobase_drop_foreign(
+				    table_share, trx, ctx->drop_fk[i])) {
+				err = -1;
+			}
+		}
+	}
+
+	if (err == 0 && !new_clustered
+	    && (ha_alter_info->handler_flags
+		& Alter_inplace_info::ALTER_COLUMN_NAME)
+	    && innobase_rename_columns(ha_alter_info, false, table,
+				       table_share, prebuilt, trx)) {
+		err = -1;
 	}
 
-	/* Drop indexes marked to be dropped */
+	if (err == 0) {
+		if (fk_trx && fk_trx != trx) {
+			/* This needs to be placed before "trx_commit" marker,
+			since anyone called "goto trx_commit" has committed
+			or rolled back fk_trx before jumping here */
+			trx_commit_for_mysql(fk_trx);
+		}
+trx_commit:
+		trx_commit_for_mysql(trx);
+	} else {
+trx_rollback:
+		/* undo the addition of foreign key */
+		if (fk_trx) {
+			innobase_undo_add_fk(ctx, fk_table);
 
-	index = dict_table_get_first_index(prebuilt->table);
+			if (fk_trx != trx) {
+				trx_rollback_for_mysql(fk_trx);
+			}
+		}
 
-	while (index) {
-		dict_index_t*	next_index;
+		trx_rollback_for_mysql(trx);
+
+		/* If there are newly added secondary indexes, above
+		rollback will revert the rename operation and put the
+		new indexes with the temp index prefix, we can drop
+		them here */
+		if (ctx && !new_clustered) {
+			ulint	i;
+
+			/* Need to drop the in-memory dict_index_t first
+			to avoid dict_table_check_for_dup_indexes()
+			assertion in row_merge_drop_indexes() in the case
+			of add and drop the same index */
+			for (i = 0; i < ctx->num_to_add; i++) {
+				dict_index_t*   index = ctx->add[i];
+				dict_index_remove_from_cache(
+					prebuilt->table, index);
+			}
 
-		next_index = dict_table_get_next_index(index);
+			if (ctx->num_to_add) {
+				trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+				row_merge_drop_indexes(trx, prebuilt->table,
+						       FALSE);
+				trx_commit_for_mysql(trx);
+			}
 
-		if (index->to_be_dropped) {
-			row_merge_drop_index(index, prebuilt->table, trx);
+			for (i = 0; i < ctx->num_to_drop; i++) {
+				dict_index_t*	index = ctx->drop[i];
+				index->to_be_dropped = false;
+			}
 		}
+	}
 
-		index = next_index;
+	/* Flush the log to reduce probability that the .frm files and
+	the InnoDB data dictionary get out-of-sync if the user runs
+	with innodb_flush_log_at_trx_commit = 0 */
+
+	log_buffer_flush_to_disk();
+
+	if (new_clustered) {
+		innobase_online_rebuild_log_free(prebuilt->table);
 	}
 
-	/* Check that all flagged indexes were dropped. */
-	for (index = dict_table_get_first_index(prebuilt->table);
-	     index; index = dict_table_get_next_index(index)) {
-		ut_a(!index->to_be_dropped);
+	if (err == 0 && ctx) {
+		/* The changes were successfully performed. */
+		bool	add_fts	= false;
+
+		/* Rebuild the index translation table.
+		This should only be needed when !new_clustered. */
+		share->idx_trans_tbl.index_count = 0;
+
+		/* Publish the created fulltext index, if any.
+		Note that a fulltext index can be created without
+		creating the clustered index, if there already exists
+		a suitable FTS_DOC_ID column. If not, one will be
+		created, implying new_clustered */
+		for (ulint i = 0; i < ctx->num_to_add; i++) {
+			dict_index_t*	index = ctx->add[i];
+
+			if (index->type & DICT_FTS) {
+				DBUG_ASSERT(index->type == DICT_FTS);
+				fts_add_index(index, prebuilt->table);
+				add_fts = true;
+			}
+		}
+
+		if (!new_clustered && ha_alter_info->index_drop_count) {
+
+			/* Really drop the indexes that were dropped.
+			The transaction had to be committed first
+			(after renaming the indexes), so that in the
+			event of a crash, crash recovery will drop the
+			indexes, because it drops all indexes whose
+			names start with TEMP_INDEX_PREFIX. Once we
+			have started dropping an index tree, there is
+			no way to roll it back. */
+
+			trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+			for (ulint i = 0; i < ctx->num_to_drop; i++) {
+				dict_index_t*	index = ctx->drop[i];
+				DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+				DBUG_ASSERT(index->table == prebuilt->table);
+				DBUG_ASSERT(index->to_be_dropped);
+
+				/* Replace the indexes in foreign key
+				constraints if needed. */
+
+				dict_foreign_replace_index(
+					prebuilt->table, index, prebuilt->trx);
+
+				/* Mark the index dropped
+				in the data dictionary cache. */
+				rw_lock_x_lock(dict_index_get_lock(index));
+				index->page = FIL_NULL;
+				rw_lock_x_unlock(dict_index_get_lock(index));
+			}
+
+			row_merge_drop_indexes_dict(trx, prebuilt->table->id);
+
+			for (ulint i = 0; i < ctx->num_to_drop; i++) {
+				dict_index_t*	index = ctx->drop[i];
+				DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+				DBUG_ASSERT(index->table == prebuilt->table);
+
+				if (index->type & DICT_FTS) {
+					DBUG_ASSERT(index->type == DICT_FTS
+						    || (index->type
+							& DICT_CORRUPT));
+					DBUG_ASSERT(prebuilt->table->fts);
+					fts_drop_index(
+						prebuilt->table, index, trx);
+				}
+
+				dict_index_remove_from_cache(
+					prebuilt->table, index);
+			}
+
+			trx_commit_for_mysql(trx);
+		}
+
+		ut_d(dict_table_check_for_dup_indexes(
+			     prebuilt->table, CHECK_ALL_COMPLETE));
+		DBUG_ASSERT(new_clustered == !prebuilt->trx);
+
+		if (add_fts) {
+			fts_optimize_add_table(prebuilt->table);
+		}
 	}
 
-	/* We will need to rebuild index translation table. Set
-	valid index entry count in the translation table to zero */
-	share->idx_trans_tbl.index_count = 0;
+	if (!prebuilt->trx) {
+		/* We created a new clustered index and committed the
+		user transaction already, so that we were able to
+		drop the old table. */
+		update_thd();
+		prebuilt->trx->will_lock++;
 
-func_exit:
-	ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
+		DBUG_EXECUTE_IF("ib_ddl_crash_after_user_trx_commit",
+				DBUG_SUICIDE(););
+
+		trx_start_if_not_started_xa(prebuilt->trx);
+	}
 
+	ut_d(dict_table_check_for_dup_indexes(
+		     prebuilt->table, CHECK_ABORTED_OK));
 	ut_a(fts_check_cached_index(prebuilt->table));
+	row_mysql_unlock_data_dictionary(trx);
+	if (fk_trx && fk_trx != trx) {
+		fk_trx->dict_operation_lock_mode = 0;
+		trx_free_for_mysql(fk_trx);
+	}
+	trx_free_for_mysql(trx);
+
+	if (ctx && trx == ctx->trx) {
+		ctx->trx = NULL;
+	}
+
+	if (err == 0) {
+		/* Delete corresponding rows from the stats table. We update
+		the statistics in a separate transaction from trx, because
+		lock waits are not allowed in a data dictionary transaction.
+		(Lock waits are possible on the statistics table, because it
+		is directly accessible by users, not covered by the
+		dict_operation_lock.)
+
+		Because the data dictionary changes were already committed,
+		orphaned rows may be left in the statistics table if the
+		system crashes. */
+
+		for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
+			const KEY*	key
+				= ha_alter_info->index_drop_buffer[i];
+			dberr_t		ret;
+			char		errstr[1024];
+
+			ret = dict_stats_drop_index(
+				prebuilt->table->name, key->name,
+				errstr, sizeof(errstr));
+
+			if (ret != DB_SUCCESS) {
+				push_warning(user_thd,
+					     Sql_condition::WARN_LEVEL_WARN,
+					     ER_LOCK_WAIT_TIMEOUT,
+					     errstr);
+			}
+		}
+
+		if (ctx && !dict_table_is_discarded(prebuilt->table)) {
+			bool	stats_init_called = false;
+
+			for (uint i = 0; i < ctx->num_to_add; i++) {
+				dict_index_t*	index = ctx->add[i];
+
+				if (!(index->type & DICT_FTS)) {
+
+					if (!stats_init_called) {
+						innobase_copy_frm_flags_from_table_share(
+							index->table,
+							altered_table->s);
+
+						dict_stats_init(index->table);
+
+						stats_init_called = true;
+					}
+
+					dict_stats_update_for_index(index);
+				}
+			}
+		}
+	}
 
-	trx_commit_for_mysql(trx);
 	trx_commit_for_mysql(prebuilt->trx);
-	row_mysql_unlock_data_dictionary(trx);
 
 	/* Flush the log to reduce probability that the .frm files and
 	the InnoDB data dictionary get out-of-sync if the user runs
@@ -1912,12 +5213,106 @@ func_exit:
 
 	log_buffer_flush_to_disk();
 
-	trx_free_for_mysql(trx);
-
 	/* Tell the InnoDB server that there might be work for
 	utility threads: */
 
 	srv_active_wake_master_thread();
 
-	DBUG_RETURN(err);
+func_exit:
+
+	if (err == 0 && altered_table->found_next_number_field != 0) {
+		dict_table_autoinc_lock(prebuilt->table);
+		dict_table_autoinc_initialize(prebuilt->table, max_autoinc);
+		dict_table_autoinc_unlock(prebuilt->table);
+	}
+
+#ifndef DBUG_OFF
+	dict_index_t* clust_index = dict_table_get_first_index(
+		prebuilt->table);
+	DBUG_ASSERT(!clust_index->online_log);
+	DBUG_ASSERT(dict_index_get_online_status(clust_index)
+		    == ONLINE_INDEX_COMPLETE);
+#endif /* !DBUG_OFF */
+
+#ifdef UNIV_DEBUG
+	for (dict_index_t* index = dict_table_get_first_index(
+		     prebuilt->table);
+	     index;
+	     index = dict_table_get_next_index(index)) {
+		ut_ad(!index->to_be_dropped);
+	}
+#endif /* UNIV_DEBUG */
+
+	if (err == 0) {
+		MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
+
+#ifdef UNIV_DDL_DEBUG
+		/* Invoke CHECK TABLE atomically after a successful
+		ALTER TABLE. */
+		TABLE* old_table = table;
+		table = altered_table;
+		ut_a(check(user_thd, 0) == HA_ADMIN_OK);
+		table = old_table;
+#endif /* UNIV_DDL_DEBUG */
+	}
+
+	DBUG_RETURN(err != 0);
+}
+
+/**
+@param thd - the session
+@param start_value - the lower bound
+@param max_value - the upper bound (inclusive) */
+ib_sequence_t::ib_sequence_t(
+	THD*		thd,
+	ulonglong	start_value,
+	ulonglong	max_value)
+	:
+	m_max_value(max_value),
+	m_increment(0),
+	m_offset(0),
+	m_next_value(start_value),
+	m_eof(false)
+{
+	if (thd != 0 && m_max_value > 0) {
+
+		thd_get_autoinc(thd, &m_offset, &m_increment);
+
+		if (m_increment > 1 || m_offset > 1) {
+
+			/* If there is an offset or increment specified
+			then we need to work out the exact next value. */
+
+			m_next_value = innobase_next_autoinc(
+				start_value, 1,
+				m_increment, m_offset, m_max_value);
+
+		} else if (start_value == 0) {
+			/* The next value can never be 0. */
+			m_next_value = 1;
+		}
+	} else {
+		m_eof = true;
+	}
+}
+
+/**
+Postfix increment
+@return the next value to insert */
+ulonglong
+ib_sequence_t::operator++(int) UNIV_NOTHROW
+{
+	ulonglong	current = m_next_value;
+
+	ut_ad(!m_eof);
+	ut_ad(m_max_value > 0);
+
+	m_next_value = innobase_next_autoinc(
+		current, 1, m_increment, m_offset, m_max_value);
+
+	if (m_next_value == m_max_value && current == m_next_value) {
+		m_eof = true;
+	}
+
+	return(current);
 }
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index 882f5040a38..4f84f477b3a 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -39,6 +39,7 @@ Created July 18, 2007 Vasil Dimov
 #include "btr0types.h"
 #include "buf0buddy.h"	/* for i_s_cmpmem */
 #include "buf0buf.h"	/* for buf_pool */
+#include "dict0dict.h"	/* for dict_table_stats_lock() */
 #include "dict0load.h"	/* for file sys_tables related info. */
 #include "dict0mem.h"
 #include "dict0types.h"
@@ -57,14 +58,12 @@ Created July 18, 2007 Vasil Dimov
 
 /** structure associates a name string with a file page type and/or buffer
 page state. */
-struct buffer_page_desc_str_struct{
+struct buf_page_desc_t{
 	const char*	type_str;	/*!< String explain the page
 					type/state */
 	ulint		type_value;	/*!< Page type or page state */
 };
 
-typedef struct buffer_page_desc_str_struct	buf_page_desc_str_t;
-
 /** Any states greater than FIL_PAGE_TYPE_LAST would be treated as unknown. */
 #define	I_S_PAGE_TYPE_UNKNOWN		(FIL_PAGE_TYPE_LAST + 1)
 
@@ -73,7 +72,7 @@ in i_s_page_type[] array */
 #define I_S_PAGE_TYPE_INDEX		1
 
 /** Name string for File Page Types */
-static buf_page_desc_str_t	i_s_page_type[] = {
+static buf_page_desc_t	i_s_page_type[] = {
 	{"ALLOCATED", FIL_PAGE_TYPE_ALLOCATED},
 	{"INDEX", FIL_PAGE_INDEX},
 	{"UNDO_LOG", FIL_PAGE_UNDO_LOG},
@@ -98,7 +97,7 @@ static buf_page_desc_str_t	i_s_page_type[] = {
 /** This structure defines information we will fetch from pages
 currently cached in the buffer pool. It will be used to populate
 table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE */
-struct buffer_page_info_struct{
+struct buf_page_info_t{
 	ulint		block_id;	/*!< Buffer Pool block ID */
 	unsigned	space_id:32;	/*!< Tablespace ID */
 	unsigned	page_num:32;	/*!< Page number/offset */
@@ -131,8 +130,6 @@ struct buffer_page_info_struct{
 	index_id_t	index_id;	/*!< Index ID if a index page */
 };
 
-typedef struct buffer_page_info_struct	buf_page_info_t;
-
 /** maximum number of buffer page info we would cache. */
 #define MAX_BUF_INFO_CACHED		10000
 
@@ -282,6 +279,43 @@ field_store_string(
 }
 
 /*******************************************************************//**
+Store the name of an index in a MYSQL_TYPE_VARCHAR field.
+Handles the names of incomplete secondary indexes.
+@return	0 on success */
+static
+int
+field_store_index_name(
+/*===================*/
+	Field*		field,		/*!< in/out: target field for
+					storage */
+	const char*	index_name)	/*!< in: NUL-terminated utf-8
+					index name, possibly starting with
+					TEMP_INDEX_PREFIX */
+{
+	int	ret;
+
+	ut_ad(index_name != NULL);
+	ut_ad(field->real_type() == MYSQL_TYPE_VARCHAR);
+
+	/* Since TEMP_INDEX_PREFIX is not a valid UTF8, we need to convert
+	it to something else. */
+	if (index_name[0] == TEMP_INDEX_PREFIX) {
+		char	buf[NAME_LEN + 1];
+		buf[0] = '?';
+		memcpy(buf + 1, index_name + 1, strlen(index_name));
+		ret = field->store(buf, strlen(buf),
+				   system_charset_info);
+	} else {
+		ret = field->store(index_name, strlen(index_name),
+				   system_charset_info);
+	}
+
+	field->set_notnull();
+
+	return(ret);
+}
+
+/*******************************************************************//**
 Auxiliary function to store ulint value in MYSQL_TYPE_LONGLONG field.
 If the value is ULINT_UNDEFINED then the field it set to NULL.
 @return	0 on success */
@@ -713,7 +747,7 @@ static struct st_mysql_information_schema	i_s_info =
 	MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION
 };
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_trx =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_trx =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -757,9 +791,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_trx =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 /* Fields of the dynamic table INFORMATION_SCHEMA.innodb_locks */
@@ -923,16 +961,9 @@ fill_innodb_locks_from_cache(
 
 		/* lock_index */
 		if (row->lock_index != NULL) {
-
-			bufend = innobase_convert_name(buf, sizeof(buf),
-						       row->lock_index,
-						       strlen(row->lock_index),
-						       thd, FALSE);
-			OK(fields[IDX_LOCK_INDEX]->store(buf, bufend - buf,
-							 system_charset_info));
-			fields[IDX_LOCK_INDEX]->set_notnull();
+			OK(field_store_index_name(fields[IDX_LOCK_INDEX],
+						  row->lock_index));
 		} else {
-
 			fields[IDX_LOCK_INDEX]->set_null();
 		}
 
@@ -979,7 +1010,7 @@ innodb_locks_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_locks =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_locks =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -1023,9 +1054,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_locks =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 /* Fields of the dynamic table INFORMATION_SCHEMA.innodb_lock_waits */
@@ -1162,7 +1197,7 @@ innodb_lock_waits_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_lock_waits =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_lock_waits =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -1206,9 +1241,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_lock_waits =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 /*******************************************************************//**
@@ -1495,7 +1534,7 @@ i_s_cmp_reset_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_cmp =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_cmp =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -1539,12 +1578,16 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_cmp =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_cmp_reset =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_cmp_reset =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -1589,9 +1632,371 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_cmp_reset =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
+};
+
+/* Fields of the dynamic tables
+information_schema.innodb_cmp_per_index and
+information_schema.innodb_cmp_per_index_reset. */
+static ST_FIELD_INFO	i_s_cmp_per_index_fields_info[] =
+{
+#define IDX_DATABASE_NAME	0
+	{STRUCT_FLD(field_name,		"database_name"),
+	 STRUCT_FLD(field_length,	192),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_TABLE_NAME		1
+	{STRUCT_FLD(field_name,		"table_name"),
+	 STRUCT_FLD(field_length,	192),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_INDEX_NAME		2
+	{STRUCT_FLD(field_name,		"index_name"),
+	 STRUCT_FLD(field_length,	192),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_COMPRESS_OPS	3
+	{STRUCT_FLD(field_name,		"compress_ops"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_COMPRESS_OPS_OK	4
+	{STRUCT_FLD(field_name,		"compress_ops_ok"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_COMPRESS_TIME	5
+	{STRUCT_FLD(field_name,		"compress_time"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_UNCOMPRESS_OPS	6
+	{STRUCT_FLD(field_name,		"uncompress_ops"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_UNCOMPRESS_TIME	7
+	{STRUCT_FLD(field_name,		"uncompress_time"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	END_OF_ST_FIELD_INFO
+};
+
+/*******************************************************************//**
+Fill the dynamic table
+information_schema.innodb_cmp_per_index or
+information_schema.innodb_cmp_per_index_reset.
+@return	0 on success, 1 on failure */
+static
+int
+i_s_cmp_per_index_fill_low(
+/*=======================*/
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	Item*		,	/*!< in: condition (ignored) */
+	ibool		reset)	/*!< in: TRUE=reset cumulated counts */
+{
+	TABLE*	table = tables->table;
+	Field**	fields = table->field;
+	int	status = 0;
+
+	DBUG_ENTER("i_s_cmp_per_index_fill_low");
+
+	/* deny access to non-superusers */
+	if (check_global_access(thd, PROCESS_ACL)) {
+
+		DBUG_RETURN(0);
+	}
+
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
+	/* Create a snapshot of the stats so we do not bump into lock
+	order violations with dict_sys->mutex below. */
+	mutex_enter(&page_zip_stat_per_index_mutex);
+	page_zip_stat_per_index_t		snap (page_zip_stat_per_index);
+	mutex_exit(&page_zip_stat_per_index_mutex);
+
+	mutex_enter(&dict_sys->mutex);
+
+	page_zip_stat_per_index_t::iterator	iter;
+	ulint					i;
+
+	for (iter = snap.begin(), i = 0; iter != snap.end(); iter++, i++) {
+
+		char		name[192];
+		dict_index_t*	index = dict_index_find_on_id_low(iter->first);
+
+		if (index != NULL) {
+			char	db_utf8[MAX_DB_UTF8_LEN];
+			char	table_utf8[MAX_TABLE_UTF8_LEN];
+
+			dict_fs2utf8(index->table_name,
+				     db_utf8, sizeof(db_utf8),
+				     table_utf8, sizeof(table_utf8));
+
+			field_store_string(fields[IDX_DATABASE_NAME], db_utf8);
+			field_store_string(fields[IDX_TABLE_NAME], table_utf8);
+			field_store_index_name(fields[IDX_INDEX_NAME],
+					       index->name);
+		} else {
+			/* index not found */
+			ut_snprintf(name, sizeof(name),
+				    "index_id:" IB_ID_FMT, iter->first);
+			field_store_string(fields[IDX_DATABASE_NAME],
+					   "unknown");
+			field_store_string(fields[IDX_TABLE_NAME],
+					   "unknown");
+			field_store_string(fields[IDX_INDEX_NAME],
+					   name);
+		}
+
+		fields[IDX_COMPRESS_OPS]->store(
+			iter->second.compressed);
+
+		fields[IDX_COMPRESS_OPS_OK]->store(
+			iter->second.compressed_ok);
+
+		fields[IDX_COMPRESS_TIME]->store(
+			(long) (iter->second.compressed_usec / 1000000));
+
+		fields[IDX_UNCOMPRESS_OPS]->store(
+			iter->second.decompressed);
+
+		fields[IDX_UNCOMPRESS_TIME]->store(
+			(long) (iter->second.decompressed_usec / 1000000));
+
+		if (schema_table_store_record(thd, table)) {
+			status = 1;
+			break;
+		}
+
+		/* Release and reacquire the dict mutex to allow other
+		threads to proceed. This could eventually result in the
+		contents of INFORMATION_SCHEMA.innodb_cmp_per_index being
+		inconsistent, but it is an acceptable compromise. */
+		if (i % 1000 == 0) {
+			mutex_exit(&dict_sys->mutex);
+			mutex_enter(&dict_sys->mutex);
+		}
+	}
+
+	mutex_exit(&dict_sys->mutex);
+
+	if (reset) {
+		page_zip_reset_stat_per_index();
+	}
+
+	DBUG_RETURN(status);
+}
+
+/*******************************************************************//**
+Fill the dynamic table information_schema.innodb_cmp_per_index.
+@return	0 on success, 1 on failure */
+static
+int
+i_s_cmp_per_index_fill(
+/*===================*/
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	Item*		cond)	/*!< in: condition (ignored) */
+{
+	return(i_s_cmp_per_index_fill_low(thd, tables, cond, FALSE));
+}
+
+/*******************************************************************//**
+Fill the dynamic table information_schema.innodb_cmp_per_index_reset.
+@return	0 on success, 1 on failure */
+static
+int
+i_s_cmp_per_index_reset_fill(
+/*=========================*/
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	Item*		cond)	/*!< in: condition (ignored) */
+{
+	return(i_s_cmp_per_index_fill_low(thd, tables, cond, TRUE));
+}
+
+/*******************************************************************//**
+Bind the dynamic table information_schema.innodb_cmp_per_index.
+@return	0 on success */
+static
+int
+i_s_cmp_per_index_init(
+/*===================*/
+	void*	p)	/*!< in/out: table schema object */
+{
+	DBUG_ENTER("i_s_cmp_init");
+	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = i_s_cmp_per_index_fields_info;
+	schema->fill_table = i_s_cmp_per_index_fill;
+
+	DBUG_RETURN(0);
+}
+
+/*******************************************************************//**
+Bind the dynamic table information_schema.innodb_cmp_per_index_reset.
+@return	0 on success */
+static
+int
+i_s_cmp_per_index_reset_init(
+/*=========================*/
+	void*	p)	/*!< in/out: table schema object */
+{
+	DBUG_ENTER("i_s_cmp_reset_init");
+	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = i_s_cmp_per_index_fields_info;
+	schema->fill_table = i_s_cmp_per_index_reset_fill;
+
+	DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_cmp_per_index =
+{
+	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
+	/* int */
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+	/* pointer to type-specific plugin descriptor */
+	/* void* */
+	STRUCT_FLD(info, &i_s_info),
+
+	/* plugin name */
+	/* const char* */
+	STRUCT_FLD(name, "INNODB_CMP_PER_INDEX"),
+
+	/* plugin author (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(author, plugin_author),
+
+	/* general descriptive text (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(descr, "Statistics for the InnoDB compression (per index)"),
+
+	/* the plugin license (PLUGIN_LICENSE_XXX) */
+	/* int */
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+	/* the function to invoke when plugin is loaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(init, i_s_cmp_per_index_init),
+
+	/* the function to invoke when plugin is unloaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(deinit, i_s_common_deinit),
+
+	/* plugin version (for SHOW PLUGINS) */
+	/* unsigned int */
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+	/* struct st_mysql_show_var* */
+	STRUCT_FLD(status_vars, NULL),
+
+	/* struct st_mysql_sys_var** */
+	STRUCT_FLD(system_vars, NULL),
+
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
+};
+
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_cmp_per_index_reset =
+{
+	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
+	/* int */
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+	/* pointer to type-specific plugin descriptor */
+	/* void* */
+	STRUCT_FLD(info, &i_s_info),
+
+	/* plugin name */
+	/* const char* */
+	STRUCT_FLD(name, "INNODB_CMP_PER_INDEX_RESET"),
+
+	/* plugin author (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(author, plugin_author),
+
+	/* general descriptive text (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(descr, "Statistics for the InnoDB compression (per index);"
+		   " reset cumulated counts"),
+
+	/* the plugin license (PLUGIN_LICENSE_XXX) */
+	/* int */
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+	/* the function to invoke when plugin is loaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(init, i_s_cmp_per_index_reset_init),
+
+	/* the function to invoke when plugin is unloaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(deinit, i_s_common_deinit),
+
+	/* plugin version (for SHOW PLUGINS) */
+	/* unsigned int */
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+	/* struct st_mysql_show_var* */
+	STRUCT_FLD(status_vars, NULL),
+
+	/* struct st_mysql_sys_var** */
+	STRUCT_FLD(system_vars, NULL),
+
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 /* Fields of the dynamic table information_schema.innodb_cmpmem. */
@@ -1695,8 +2100,8 @@ i_s_cmpmem_fill_low(
 			table->field[3]->store(UNIV_LIKELY(x < BUF_BUDDY_SIZES)
 				? UT_LIST_GET_LEN(buf_pool->zip_free[x])
 				: 0);
-			table->field[4]->store((longlong)
-			buddy_stat->relocated, true);
+			table->field[4]->store(
+				(longlong) buddy_stat->relocated, true);
 			table->field[5]->store(
 				(ulong) (buddy_stat->relocated_usec / 1000000));
 
@@ -1786,7 +2191,7 @@ i_s_cmpmem_reset_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_cmpmem =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_cmpmem =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -1830,12 +2235,16 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_cmpmem =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_cmpmem_reset =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_cmpmem_reset =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -1880,9 +2289,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_cmpmem_reset =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 /* Fields of the dynamic table INFORMATION_SCHEMA.innodb_metrics */
@@ -1935,7 +2348,7 @@ static ST_FIELD_INFO	innodb_metrics_fields_info[] =
 
 #define	METRIC_AVG_VALUE_START	5
 	{STRUCT_FLD(field_name,		"AVG_COUNT"),
-	 STRUCT_FLD(field_length,	0),
+	 STRUCT_FLD(field_length,	MAX_FLOAT_STR_LENGTH),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_FLOAT),
 	 STRUCT_FLD(value,		0),
 	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
@@ -1971,7 +2384,7 @@ static ST_FIELD_INFO	innodb_metrics_fields_info[] =
 
 #define	METRIC_AVG_VALUE_RESET	9
 	{STRUCT_FLD(field_name,		"AVG_COUNT_RESET"),
-	 STRUCT_FLD(field_length,	0),
+	 STRUCT_FLD(field_length,	MAX_FLOAT_STR_LENGTH),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_FLOAT),
 	 STRUCT_FLD(value,		0),
 	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
@@ -2360,7 +2773,7 @@ innodb_metrics_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_metrics =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_metrics =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -2404,9 +2817,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_metrics =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 /* Fields of the dynamic table INFORMATION_SCHEMA.innodb_ft_default_stopword */
 static ST_FIELD_INFO	i_s_stopword_fields_info[] =
@@ -2473,7 +2890,7 @@ i_s_stopword_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_default_stopword =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_ft_default_stopword =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -2481,7 +2898,7 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_default_stopword =
 
 	/* pointer to type-specific plugin descriptor */
 	/* void* */
-	STRUCT_FLD(info, &i_s_info),
+	STRUCT_FLD(info, &i_s_stopword_fields_info),
 
 	/* plugin name */
 	/* const char* */
@@ -2517,9 +2934,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_default_stopword =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 /* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED
@@ -2571,8 +2992,8 @@ i_s_fts_deleted_generic_fill(
 
 	deleted = fts_doc_ids_create();
 
-	user_table = dict_table_open_on_name_no_stats(
-			fts_internal_tbl_name, FALSE, DICT_ERR_IGNORE_NONE);
+	user_table = dict_table_open_on_name(
+		fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (!user_table) {
 		DBUG_RETURN(0);
@@ -2603,7 +3024,7 @@ i_s_fts_deleted_generic_fill(
 
 	fts_doc_ids_free(deleted);
 
-	dict_table_close(user_table, FALSE);
+	dict_table_close(user_table, FALSE, FALSE);
 
 	DBUG_RETURN(0);
 }
@@ -2642,7 +3063,7 @@ i_s_fts_deleted_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_deleted =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_ft_deleted =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -2650,7 +3071,7 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_deleted =
 
 	/* pointer to type-specific plugin descriptor */
 	/* void* */
-	STRUCT_FLD(info, &i_s_info),
+	STRUCT_FLD(info, &i_s_fts_doc_fields_info),
 
 	/* plugin name */
 	/* const char* */
@@ -2686,9 +3107,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_deleted =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 /*******************************************************************//**
@@ -2725,7 +3150,7 @@ i_s_fts_being_deleted_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_being_deleted =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_ft_being_deleted =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -2733,7 +3158,7 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_being_deleted =
 
 	/* pointer to type-specific plugin descriptor */
 	/* void* */
-	STRUCT_FLD(info, &i_s_info),
+	STRUCT_FLD(info, &i_s_fts_doc_fields_info),
 
 	/* plugin name */
 	/* const char* */
@@ -2769,9 +3194,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_being_deleted =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 /*******************************************************************//**
@@ -2803,8 +3232,8 @@ i_s_fts_inserted_fill(
 		DBUG_RETURN(0);
 	}
 
-	user_table = dict_table_open_on_name_no_stats(
-			fts_internal_tbl_name, FALSE, DICT_ERR_IGNORE_NONE);
+	user_table = dict_table_open_on_name(
+		fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (!user_table) {
 		DBUG_RETURN(0);
@@ -2835,7 +3264,7 @@ i_s_fts_inserted_fill(
 
 	fts_doc_ids_free(inserted);
 
-	dict_table_close(user_table, FALSE);
+	dict_table_close(user_table, FALSE, FALSE);
 
 	DBUG_RETURN(0);
 }
@@ -2858,7 +3287,7 @@ i_s_fts_inserted_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_inserted =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_ft_inserted =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -2866,7 +3295,7 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_inserted =
 
 	/* pointer to type-specific plugin descriptor */
 	/* void* */
-	STRUCT_FLD(info, &i_s_info),
+	STRUCT_FLD(info, &i_s_fts_doc_fields_info),
 
 	/* plugin name */
 	/* const char* */
@@ -2902,9 +3331,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_inserted =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 /* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHED and
@@ -3078,8 +3511,8 @@ i_s_fts_index_cache_fill(
 		DBUG_RETURN(0);
 	}
 
-	user_table = dict_table_open_on_name_no_stats(
-			fts_internal_tbl_name, FALSE, DICT_ERR_IGNORE_NONE);
+	user_table = dict_table_open_on_name(
+		fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (!user_table) {
 		DBUG_RETURN(0);
@@ -3098,7 +3531,7 @@ i_s_fts_index_cache_fill(
 		i_s_fts_index_cache_fill_one_index(index_cache, thd, tables);
 	}
 
-	dict_table_close(user_table, FALSE);
+	dict_table_close(user_table, FALSE, FALSE);
 
 	DBUG_RETURN(0);
 }
@@ -3121,7 +3554,7 @@ i_s_fts_index_cache_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_index_cache =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_ft_index_cache =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -3129,7 +3562,7 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_index_cache =
 
 	/* pointer to type-specific plugin descriptor */
 	/* void* */
-	STRUCT_FLD(info, &i_s_info),
+	STRUCT_FLD(info, &i_s_fts_index_fields_info),
 
 	/* plugin name */
 	/* const char* */
@@ -3165,9 +3598,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_index_cache =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 /*******************************************************************//**
@@ -3276,6 +3713,7 @@ i_s_fts_index_table_fill_one_index(
 	ulint			num_row_fill;
 
 	DBUG_ENTER("i_s_fts_index_cache_fill_one_index");
+	DBUG_ASSERT(!dict_index_is_online_ddl(index));
 
 	heap = mem_heap_create(1024);
 
@@ -3384,8 +3822,8 @@ i_s_fts_index_table_fill(
 		DBUG_RETURN(0);
 	}
 
-	user_table = dict_table_open_on_name_no_stats(
-			fts_internal_tbl_name, FALSE, DICT_ERR_IGNORE_NONE);
+	user_table = dict_table_open_on_name(
+		fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (!user_table) {
 		DBUG_RETURN(0);
@@ -3398,7 +3836,7 @@ i_s_fts_index_table_fill(
 		}
 	}
 
-	dict_table_close(user_table, FALSE);
+	dict_table_close(user_table, FALSE, FALSE);
 
 	DBUG_RETURN(0);
 }
@@ -3421,7 +3859,7 @@ i_s_fts_index_table_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_index_table =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_ft_index_table =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -3429,7 +3867,7 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_index_table =
 
 	/* pointer to type-specific plugin descriptor */
 	/* void* */
-	STRUCT_FLD(info, &i_s_info),
+	STRUCT_FLD(info, &i_s_fts_index_fields_info),
 
 	/* plugin name */
 	/* const char* */
@@ -3465,9 +3903,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_index_table =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 /* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_FT_CONFIG */
@@ -3541,8 +3983,8 @@ i_s_fts_config_fill(
 
 	fields = table->field;
 
-	user_table = dict_table_open_on_name_no_stats(
-			fts_internal_tbl_name, FALSE, DICT_ERR_IGNORE_NONE);
+	user_table = dict_table_open_on_name(
+		fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (!user_table) {
 		DBUG_RETURN(0);
@@ -3556,6 +3998,7 @@ i_s_fts_config_fill(
 	if (!ib_vector_is_empty(user_table->fts->indexes)) {
 		index = (dict_index_t*) ib_vector_getp_const(
 				user_table->fts->indexes, 0);
+		DBUG_ASSERT(!dict_index_is_online_ddl(index));
 	}
 
 	while (fts_config_key[i]) {
@@ -3567,10 +4010,10 @@ i_s_fts_config_fill(
 
 		value.f_str = str;
 
-		if (strcmp(fts_config_key[i], FTS_TOTAL_WORD_COUNT) == 0
-		    && index) {
+		if (index
+		    && strcmp(fts_config_key[i], FTS_TOTAL_WORD_COUNT) == 0) {
 			key_name = fts_config_create_index_param_name(
-					fts_config_key[i], index);
+				fts_config_key[i], index);
 			allocated = TRUE;
 		} else {
 			key_name = (char*) fts_config_key[i];
@@ -3597,7 +4040,7 @@ i_s_fts_config_fill(
 
 	trx_free_for_background(trx);
 
-	dict_table_close(user_table, FALSE);
+	dict_table_close(user_table, FALSE, FALSE);
 
 	DBUG_RETURN(0);
 }
@@ -3620,7 +4063,7 @@ i_s_fts_config_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_config =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_ft_config =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -3628,7 +4071,7 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_config =
 
 	/* pointer to type-specific plugin descriptor */
 	/* void* */
-	STRUCT_FLD(info, &i_s_info),
+	STRUCT_FLD(info, &i_s_fts_config_fields_info),
 
 	/* plugin name */
 	/* const char* */
@@ -3664,9 +4107,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_config =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 /* Fields of the dynamic table INNODB_BUFFER_POOL_STATS. */
@@ -3782,7 +4229,7 @@ static ST_FIELD_INFO	i_s_innodb_buffer_stats_fields_info[] =
 
 #define	IDX_BUF_STATS_PAGE_YOUNG_RATE	12
 	{STRUCT_FLD(field_name,		"PAGES_MADE_YOUNG_RATE"),
-	 STRUCT_FLD(field_length,	0),
+	 STRUCT_FLD(field_length,	MAX_FLOAT_STR_LENGTH),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_FLOAT),
 	 STRUCT_FLD(value,		0),
 	 STRUCT_FLD(field_flags,	0),
@@ -3791,7 +4238,7 @@ static ST_FIELD_INFO	i_s_innodb_buffer_stats_fields_info[] =
 
 #define	IDX_BUF_STATS_PAGE_NOT_YOUNG_RATE 13
 	{STRUCT_FLD(field_name,		"PAGES_MADE_NOT_YOUNG_RATE"),
-	 STRUCT_FLD(field_length,	0),
+	 STRUCT_FLD(field_length,	MAX_FLOAT_STR_LENGTH),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_FLOAT),
 	 STRUCT_FLD(value,		0),
 	 STRUCT_FLD(field_flags,	0),
@@ -3827,7 +4274,7 @@ static ST_FIELD_INFO	i_s_innodb_buffer_stats_fields_info[] =
 
 #define	IDX_BUF_STATS_PAGE_READ_RATE	17
 	{STRUCT_FLD(field_name,		"PAGES_READ_RATE"),
-	 STRUCT_FLD(field_length,	0),
+	 STRUCT_FLD(field_length,	MAX_FLOAT_STR_LENGTH),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_FLOAT),
 	 STRUCT_FLD(value,		0),
 	 STRUCT_FLD(field_flags,	0),
@@ -3836,7 +4283,7 @@ static ST_FIELD_INFO	i_s_innodb_buffer_stats_fields_info[] =
 
 #define	IDX_BUF_STATS_PAGE_CREATE_RATE	18
 	{STRUCT_FLD(field_name,		"PAGES_CREATE_RATE"),
-	 STRUCT_FLD(field_length,	0),
+	 STRUCT_FLD(field_length,	MAX_FLOAT_STR_LENGTH),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_FLOAT),
 	 STRUCT_FLD(value,		0),
 	 STRUCT_FLD(field_flags,	0),
@@ -3845,7 +4292,7 @@ static ST_FIELD_INFO	i_s_innodb_buffer_stats_fields_info[] =
 
 #define	IDX_BUF_STATS_PAGE_WRITTEN_RATE	19
 	{STRUCT_FLD(field_name,		"PAGES_WRITTEN_RATE"),
-	 STRUCT_FLD(field_length,	0),
+	 STRUCT_FLD(field_length,	MAX_FLOAT_STR_LENGTH),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_FLOAT),
 	 STRUCT_FLD(value,		0),
 	 STRUCT_FLD(field_flags,	0),
@@ -3908,7 +4355,7 @@ static ST_FIELD_INFO	i_s_innodb_buffer_stats_fields_info[] =
 
 #define	IDX_BUF_STATS_READ_AHEAD_RATE	26
 	{STRUCT_FLD(field_name,		"READ_AHEAD_RATE"),
-	 STRUCT_FLD(field_length,	0),
+	 STRUCT_FLD(field_length,	MAX_FLOAT_STR_LENGTH),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_FLOAT),
 	 STRUCT_FLD(value,		0),
 	 STRUCT_FLD(field_flags,	0),
@@ -3917,7 +4364,7 @@ static ST_FIELD_INFO	i_s_innodb_buffer_stats_fields_info[] =
 
 #define	IDX_BUF_STATS_READ_AHEAD_EVICT_RATE 27
 	{STRUCT_FLD(field_name,		"READ_AHEAD_EVICTED_RATE"),
-	 STRUCT_FLD(field_length,	0),
+	 STRUCT_FLD(field_length,	MAX_FLOAT_STR_LENGTH),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_FLOAT),
 	 STRUCT_FLD(value,		0),
 	 STRUCT_FLD(field_flags,	0),
@@ -4023,11 +4470,13 @@ i_s_innodb_stats_fill(
 
 	OK(fields[IDX_BUF_STATS_PAGE_WRITTEN]->store(info->n_pages_written));
 
+	OK(fields[IDX_BUF_STATS_GET]->store(info->n_page_gets));
+
 	OK(fields[IDX_BUF_STATS_PAGE_READ_RATE]->store(info->pages_read_rate));
 
-	OK(fields[IDX_BUF_STATS_PAGE_CREATED]->store(info->pages_created_rate));
+	OK(fields[IDX_BUF_STATS_PAGE_CREATE_RATE]->store(info->pages_created_rate));
 
-	OK(fields[IDX_BUF_STATS_PAGE_WRITTEN]->store(info->pages_written_rate));
+	OK(fields[IDX_BUF_STATS_PAGE_WRITTEN_RATE]->store(info->pages_written_rate));
 
 	if (info->n_page_get_delta) {
 		OK(fields[IDX_BUF_STATS_HIT_RATE]->store(
@@ -4137,7 +4586,7 @@ i_s_innodb_buffer_pool_stats_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_buffer_stats =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_buffer_stats =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -4181,9 +4630,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_buffer_stats =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 /* Fields of the dynamic table INNODB_BUFFER_POOL_PAGE. */
@@ -4384,9 +4837,8 @@ i_s_innodb_buffer_page_fill(
 	TABLE_LIST*		tables,		/*!< in/out: tables to fill */
 	const buf_page_info_t*	info_array,	/*!< in: array cached page
 						info */
-	ulint			num_page,	/*!< in: number of page info
-						 cached */
-	mem_heap_t*		heap)		/*!< in: temp heap memory */
+	ulint			num_page)	/*!< in: number of page info
+						cached */
 {
 	TABLE*			table;
 	Field**			fields;
@@ -4400,15 +4852,13 @@ i_s_innodb_buffer_page_fill(
 	/* Iterate through the cached array and fill the I_S table rows */
 	for (ulint i = 0; i < num_page; i++) {
 		const buf_page_info_t*	page_info;
-		const char*		table_name;
-		const char*		index_name;
+		char			table_name[MAX_FULL_NAME_LEN + 1];
+		const char*		table_name_end = NULL;
 		const char*		state_str;
 		enum buf_page_state	state;
 
 		page_info = info_array + i;
 
-		table_name = NULL;
-		index_name = NULL;
 		state_str = NULL;
 
 		OK(fields[IDX_BUFFER_POOL_ID]->store(page_info->pool_id));
@@ -4446,6 +4896,10 @@ i_s_innodb_buffer_page_fill(
 		OK(fields[IDX_BUFFER_PAGE_ACCESS_TIME]->store(
 			page_info->access_time));
 
+		fields[IDX_BUFFER_PAGE_TABLE_NAME]->set_null();
+
+		fields[IDX_BUFFER_PAGE_INDEX_NAME]->set_null();
+
 		/* If this is an index page, fetch the index name
 		and table name */
 		if (page_info->page_type == I_S_PAGE_TYPE_INDEX) {
@@ -4455,32 +4909,28 @@ i_s_innodb_buffer_page_fill(
 			index = dict_index_get_if_in_cache_low(
 				page_info->index_id);
 
-			/* Copy the index/table name under mutex. We
-			do not want to hold the InnoDB mutex while
-			filling the IS table */
 			if (index) {
-				const char*	name_ptr = index->name;
-
-				if (name_ptr[0] == TEMP_INDEX_PREFIX) {
-					name_ptr++;
-				}
-
-				index_name = mem_heap_strdup(heap, name_ptr);
-
-				table_name = mem_heap_strdup(heap,
-							     index->table_name);
 
+				table_name_end = innobase_convert_name(
+					table_name, sizeof(table_name),
+					index->table_name,
+					strlen(index->table_name),
+					thd, TRUE);
+
+				OK(fields[IDX_BUFFER_PAGE_TABLE_NAME]->store(
+					table_name,
+					table_name_end - table_name,
+					system_charset_info));
+				fields[IDX_BUFFER_PAGE_TABLE_NAME]->set_notnull();
+
+				OK(field_store_index_name(
+					fields[IDX_BUFFER_PAGE_INDEX_NAME],
+					index->name));
 			}
 
 			mutex_exit(&dict_sys->mutex);
 		}
 
-		OK(field_store_string(
-			fields[IDX_BUFFER_PAGE_TABLE_NAME], table_name));
-
-		OK(field_store_string(
-			fields[IDX_BUFFER_PAGE_INDEX_NAME], index_name));
-
 		OK(fields[IDX_BUFFER_PAGE_NUM_RECS]->store(
 			page_info->num_recs));
 
@@ -4593,7 +5043,7 @@ i_s_innodb_set_page_type(
 		/* Encountered an unknown page type */
 		page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
 	} else {
-		/* Make sure we get the righ index into the
+		/* Make sure we get the right index into the
 		i_s_page_type[] array */
 		ut_a(page_type == i_s_page_type[page_type].type_value);
 
@@ -4751,7 +5201,7 @@ i_s_innodb_fill_buffer_pool(
 			just collected from the buffer chunk scan */
 			status = i_s_innodb_buffer_page_fill(
 				thd, tables, info_buffer,
-				num_page, heap);
+				num_page);
 
 			/* If something goes wrong, break and return */
 			if (status) {
@@ -4830,7 +5280,7 @@ i_s_innodb_buffer_page_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_buffer_page =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_buffer_page =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -4874,9 +5324,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_buffer_page =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 static ST_FIELD_INFO	i_s_innodb_buf_page_lru_fields_info[] =
@@ -5094,13 +5548,11 @@ i_s_innodb_buf_page_lru_fill(
 	/* Iterate through the cached array and fill the I_S table rows */
 	for (ulint i = 0; i < num_page; i++) {
 		const buf_page_info_t*	page_info;
-		const char*		table_name;
-		const char*		index_name;
+		char			table_name[MAX_FULL_NAME_LEN + 1];
+		const char*		table_name_end = NULL;
 		const char*		state_str;
 		enum buf_page_state	state;
 
-		table_name = NULL;
-		index_name = NULL;
 		state_str = NULL;
 
 		page_info = info_array + i;
@@ -5140,6 +5592,10 @@ i_s_innodb_buf_page_lru_fill(
 		OK(fields[IDX_BUF_LRU_PAGE_ACCESS_TIME]->store(
 			page_info->access_time));
 
+		fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->set_null();
+
+		fields[IDX_BUF_LRU_PAGE_INDEX_NAME]->set_null();
+
 		/* If this is an index page, fetch the index name
 		and table name */
 		if (page_info->page_type == I_S_PAGE_TYPE_INDEX) {
@@ -5149,30 +5605,28 @@ i_s_innodb_buf_page_lru_fill(
 			index = dict_index_get_if_in_cache_low(
 				page_info->index_id);
 
-			/* Copy the index/table name under mutex. We
-			do not want to hold the InnoDB mutex while
-			filling the IS table */
 			if (index) {
-				const char*	name_ptr = index->name;
-
-				if (name_ptr[0] == TEMP_INDEX_PREFIX) {
-					name_ptr++;
-				}
-
-				index_name = mem_heap_strdup(heap, name_ptr);
 
-				table_name = mem_heap_strdup(heap,
-							     index->table_name);
+				table_name_end = innobase_convert_name(
+					table_name, sizeof(table_name),
+					index->table_name,
+					strlen(index->table_name),
+					thd, TRUE);
+
+				OK(fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->store(
+					table_name,
+					table_name_end - table_name,
+					system_charset_info));
+				fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->set_notnull();
+
+				OK(field_store_index_name(
+					fields[IDX_BUF_LRU_PAGE_INDEX_NAME],
+					index->name));
 			}
 
 			mutex_exit(&dict_sys->mutex);
 		}
 
-		OK(field_store_string(
-			fields[IDX_BUF_LRU_PAGE_TABLE_NAME], table_name));
-
-		OK(field_store_string(
-			fields[IDX_BUF_LRU_PAGE_INDEX_NAME], index_name));
 		OK(fields[IDX_BUF_LRU_PAGE_NUM_RECS]->store(
 			page_info->num_recs));
 
@@ -5372,7 +5826,7 @@ i_s_innodb_buffer_page_lru_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_buffer_page_lru =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_buffer_page_lru =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -5416,9 +5870,13 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_buffer_page_lru =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
 /*******************************************************************//**
@@ -5437,10 +5895,11 @@ i_s_common_deinit(
 	DBUG_RETURN(0);
 }
 
+/**  SYS_TABLES  ***************************************************/
 /* Fields of the dynamic table INFORMATION_SCHEMA.SYS_TABLES */
 static ST_FIELD_INFO	innodb_sys_tables_fields_info[] =
 {
-#define SYS_TABLE_ID		0
+#define SYS_TABLES_ID			0
 	{STRUCT_FLD(field_name,		"TABLE_ID"),
 	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
@@ -5449,7 +5908,7 @@ static ST_FIELD_INFO	innodb_sys_tables_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define SYS_TABLE_NAME		1
+#define SYS_TABLES_NAME			1
 	{STRUCT_FLD(field_name,		"NAME"),
 	 STRUCT_FLD(field_length,	MAX_FULL_NAME_LEN + 1),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
@@ -5458,7 +5917,7 @@ static ST_FIELD_INFO	innodb_sys_tables_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define SYS_TABLE_FLAG		2
+#define SYS_TABLES_FLAG			2
 	{STRUCT_FLD(field_name,		"FLAG"),
 	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
@@ -5467,7 +5926,7 @@ static ST_FIELD_INFO	innodb_sys_tables_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define SYS_TABLE_NUM_COLUMN	3
+#define SYS_TABLES_NUM_COLUMN		3
 	{STRUCT_FLD(field_name,		"N_COLS"),
 	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
@@ -5476,7 +5935,7 @@ static ST_FIELD_INFO	innodb_sys_tables_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
-#define SYS_TABLE_SPACE		4
+#define SYS_TABLES_SPACE		4
 	{STRUCT_FLD(field_name,		"SPACE"),
 	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
@@ -5485,6 +5944,33 @@ static ST_FIELD_INFO	innodb_sys_tables_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
+#define SYS_TABLES_FILE_FORMAT		5
+	{STRUCT_FLD(field_name,		"FILE_FORMAT"),
+	 STRUCT_FLD(field_length,	10),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define SYS_TABLES_ROW_FORMAT		6
+	{STRUCT_FLD(field_name,		"ROW_FORMAT"),
+	 STRUCT_FLD(field_length,	12),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define SYS_TABLES_ZIP_PAGE_SIZE	7
+	{STRUCT_FLD(field_name,		"ZIP_PAGE_SIZE"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
 	END_OF_ST_FIELD_INFO
 };
 
@@ -5501,20 +5987,42 @@ i_s_dict_fill_sys_tables(
 	TABLE*		table_to_fill)	/*!< in/out: fill this table */
 {
 	Field**		fields;
+	ulint	compact		= DICT_TF_GET_COMPACT(table->flags);
+	ulint	atomic_blobs	= DICT_TF_HAS_ATOMIC_BLOBS(table->flags);
+	ulint	zip_size	= dict_tf_get_zip_size(table->flags);
+	const char* file_format;
+	const char* row_format;
+
+	file_format = trx_sys_file_format_id_to_name(atomic_blobs);
+	if (!compact) {
+		row_format = "Redundant";
+	} else if (!atomic_blobs) {
+		row_format = "Compact";
+	} else if DICT_TF_GET_ZIP_SSIZE(table->flags) {
+		row_format = "Compressed";
+	} else {
+		row_format = "Dynamic";
+	}
 
 	DBUG_ENTER("i_s_dict_fill_sys_tables");
 
 	fields = table_to_fill->field;
 
-	OK(fields[SYS_TABLE_ID]->store(longlong(table->id), TRUE));
+	OK(fields[SYS_TABLES_ID]->store(longlong(table->id), TRUE));
+
+	OK(field_store_string(fields[SYS_TABLES_NAME], table->name));
+
+	OK(fields[SYS_TABLES_FLAG]->store(table->flags));
 
-	OK(field_store_string(fields[SYS_TABLE_NAME], table->name));
+	OK(fields[SYS_TABLES_NUM_COLUMN]->store(table->n_cols));
 
-	OK(fields[SYS_TABLE_FLAG]->store(table->flags));
+	OK(fields[SYS_TABLES_SPACE]->store(table->space));
 
-	OK(fields[SYS_TABLE_NUM_COLUMN]->store(table->n_cols));
+	OK(field_store_string(fields[SYS_TABLES_FILE_FORMAT], file_format));
 
-	OK(fields[SYS_TABLE_SPACE]->store(table->space));
+	OK(field_store_string(fields[SYS_TABLES_ROW_FORMAT], row_format));
+
+	OK(fields[SYS_TABLES_ZIP_PAGE_SIZE]->store(zip_size));
 
 	OK(schema_table_store_record(thd, table_to_fill));
 
@@ -5614,7 +6122,7 @@ innodb_sys_tables_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_tables =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_sys_tables =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -5658,11 +6166,16 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_tables =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
+/**  SYS_TABLESTATS  ***********************************************/
 /* Fields of the dynamic table INFORMATION_SCHEMA.SYS_TABLESTATS */
 static ST_FIELD_INFO	innodb_sys_tablestats_fields_info[] =
 {
@@ -5772,24 +6285,37 @@ i_s_dict_fill_sys_tablestats(
 
 	OK(field_store_string(fields[SYS_TABLESTATS_NAME], table->name));
 
+	dict_table_stats_lock(table, RW_S_LATCH);
+
 	if (table->stat_initialized) {
 		OK(field_store_string(fields[SYS_TABLESTATS_INIT],
 				      "Initialized"));
+
+		OK(fields[SYS_TABLESTATS_NROW]->store(table->stat_n_rows,
+						      TRUE));
+
+		OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(
+				table->stat_clustered_index_size));
+
+		OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(
+				table->stat_sum_of_other_index_sizes));
+
+		OK(fields[SYS_TABLESTATS_MODIFIED]->store(
+				(ulint) table->stat_modified_counter));
 	} else {
 		OK(field_store_string(fields[SYS_TABLESTATS_INIT],
 				      "Uninitialized"));
-	}
 
-	OK(fields[SYS_TABLESTATS_NROW]->store(table->stat_n_rows, TRUE));
+		OK(fields[SYS_TABLESTATS_NROW]->store(0, TRUE));
 
-	OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(
-		table->stat_clustered_index_size));
+		OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(0));
 
-	OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(
-		table->stat_sum_of_other_index_sizes));
+		OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(0));
 
-	OK(fields[SYS_TABLESTATS_MODIFIED]->store(
-		table->stat_modified_counter));
+		OK(fields[SYS_TABLESTATS_MODIFIED]->store(0));
+	}
+
+	dict_table_stats_unlock(table, RW_S_LATCH);
 
 	OK(fields[SYS_TABLESTATS_AUTONINC]->store(table->autoinc, TRUE));
 
@@ -5889,7 +6415,7 @@ innodb_sys_tablestats_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_tablestats =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_sys_tablestats =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -5933,11 +6459,16 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_tablestats =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
+/**  SYS_INDEXES  **************************************************/
 /* Fields of the dynamic table INFORMATION_SCHEMA.SYS_INDEXES */
 static ST_FIELD_INFO	innodb_sysindex_fields_info[] =
 {
@@ -6022,17 +6553,12 @@ i_s_dict_fill_sys_indexes(
 	TABLE*		table_to_fill)	/*!< in/out: fill this table */
 {
 	Field**		fields;
-	const char*	name_ptr = index->name;
 
 	DBUG_ENTER("i_s_dict_fill_sys_indexes");
 
 	fields = table_to_fill->field;
 
-	if (name_ptr[0] == TEMP_INDEX_PREFIX) {
-		name_ptr++;
-	}
-
-	OK(field_store_string(fields[SYS_INDEX_NAME], name_ptr));
+	OK(field_store_index_name(fields[SYS_INDEX_NAME], index->name));
 
 	OK(fields[SYS_INDEX_ID]->store(longlong(index->id), TRUE));
 
@@ -6144,7 +6670,7 @@ innodb_sys_indexes_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_indexes =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_sys_indexes =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -6188,12 +6714,17 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_indexes =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
-/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_COLUMNS */
+/**  SYS_COLUMNS  **************************************************/
+/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_COLUMNS */
 static ST_FIELD_INFO	innodb_sys_columns_fields_info[] =
 {
 #define SYS_COLUMN_TABLE_ID		0
@@ -6379,7 +6910,7 @@ innodb_sys_columns_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_columns =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_sys_columns =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -6423,11 +6954,17 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_columns =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
-/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_fields */
+
+/**  SYS_FIELDS  ***************************************************/
+/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_FIELDS */
 static ST_FIELD_INFO	innodb_sys_fields_fields_info[] =
 {
 #define SYS_FIELD_INDEX_ID	0
@@ -6586,7 +7123,7 @@ innodb_sys_fields_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_fields =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_sys_fields =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -6630,12 +7167,17 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_fields =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
-/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign */
+/**  SYS_FOREIGN        ********************************************/
+/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_FOREIGN */
 static ST_FIELD_INFO	innodb_sys_foreign_fields_info[] =
 {
 #define SYS_FOREIGN_ID		0
@@ -6720,6 +7262,7 @@ i_s_dict_fill_sys_foreign(
 
 	DBUG_RETURN(0);
 }
+
 /*******************************************************************//**
 Function to populate INFORMATION_SCHEMA.innodb_sys_foreign table. Loop
 through each record in SYS_FOREIGN, and extract the foreign key
@@ -6786,6 +7329,7 @@ i_s_sys_foreign_fill_table(
 
 	DBUG_RETURN(0);
 }
+
 /*******************************************************************//**
 Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign
 @return 0 on success */
@@ -6807,7 +7351,7 @@ innodb_sys_foreign_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_foreign =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_sys_foreign =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -6851,11 +7395,17 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_foreign =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
-/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign_cols */
+
+/**  SYS_FOREIGN_COLS   ********************************************/
+/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_FOREIGN_COLS */
 static ST_FIELD_INFO	innodb_sys_foreign_cols_fields_info[] =
 {
 #define SYS_FOREIGN_COL_ID		0
@@ -7021,7 +7571,7 @@ innodb_sys_foreign_cols_init(
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_foreign_cols =
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_sys_foreign_cols =
 {
 	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
 	/* int */
@@ -7065,8 +7615,470 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_foreign_cols =
 	/* struct st_mysql_sys_var** */
 	STRUCT_FLD(system_vars, NULL),
 
-        /* Maria extension */
-	STRUCT_FLD(version_info, INNODB_VERSION_STR),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
+};
+
+/**  SYS_TABLESPACES    ********************************************/
+/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES */
+static ST_FIELD_INFO	innodb_sys_tablespaces_fields_info[] =
+{
+#define SYS_TABLESPACES_SPACE		0
+	{STRUCT_FLD(field_name,		"SPACE"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_NAME		1
+	{STRUCT_FLD(field_name,		"NAME"),
+	 STRUCT_FLD(field_length,	MAX_FULL_NAME_LEN + 1),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_FLAGS		2
+	{STRUCT_FLD(field_name,		"FLAG"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_FILE_FORMAT	3
+	{STRUCT_FLD(field_name,		"FILE_FORMAT"),
+	 STRUCT_FLD(field_length,	10),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_ROW_FORMAT	4
+	{STRUCT_FLD(field_name,		"ROW_FORMAT"),
+	 STRUCT_FLD(field_length,	22),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_PAGE_SIZE	5
+	{STRUCT_FLD(field_name,		"PAGE_SIZE"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_ZIP_PAGE_SIZE	6
+	{STRUCT_FLD(field_name,		"ZIP_PAGE_SIZE"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	END_OF_ST_FIELD_INFO
+
+};
+
+/**********************************************************************//**
+Function to fill INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES with information
+collected by scanning SYS_TABLESPACESS table.
+@return 0 on success */
+static
+int
+i_s_dict_fill_sys_tablespaces(
+/*==========================*/
+	THD*		thd,		/*!< in: thread */
+	ulint		space,		/*!< in: space ID */
+	const char*	name,		/*!< in: tablespace name */
+	ulint		flags,		/*!< in: tablespace flags */
+	TABLE*		table_to_fill)	/*!< in/out: fill this table */
+{
+	Field**	fields;
+	ulint	atomic_blobs	= FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
+	ulint	page_size	= fsp_flags_get_page_size(flags);;
+	ulint	zip_size	= fsp_flags_get_zip_size(flags);
+	const char* file_format;
+	const char* row_format;
+
+	DBUG_ENTER("i_s_dict_fill_sys_tablespaces");
+
+	file_format = trx_sys_file_format_id_to_name(atomic_blobs);
+	if (!atomic_blobs) {
+		row_format = "Compact or Redundant";
+	} else if DICT_TF_GET_ZIP_SSIZE(flags) {
+		row_format = "Compressed";
+	} else {
+		row_format = "Dynamic";
+	}
+
+	fields = table_to_fill->field;
+
+	OK(fields[SYS_TABLESPACES_SPACE]->store(space));
+
+	OK(field_store_string(fields[SYS_TABLESPACES_NAME], name));
+
+	OK(fields[SYS_TABLESPACES_FLAGS]->store(flags));
+
+	OK(field_store_string(fields[SYS_TABLESPACES_FILE_FORMAT],
+			      file_format));
+
+	OK(field_store_string(fields[SYS_TABLESPACES_ROW_FORMAT],
+			      row_format));
+
+	OK(fields[SYS_TABLESPACES_PAGE_SIZE]->store(page_size));
+
+	OK(fields[SYS_TABLESPACES_ZIP_PAGE_SIZE]->store(zip_size));
+
+	OK(schema_table_store_record(thd, table_to_fill));
+
+	DBUG_RETURN(0);
+}
+/*******************************************************************//**
+Function to populate INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES table.
+Loop through each record in SYS_TABLESPACES, and extract the column
+information and fill the INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES table.
+@return 0 on success */
+static
+int
+i_s_sys_tablespaces_fill_table(
+/*===========================*/
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	Item*		)	/*!< in: condition (not used) */
+{
+	btr_pcur_t	pcur;
+	const rec_t*	rec;
+	mem_heap_t*	heap;
+	mtr_t		mtr;
+
+	DBUG_ENTER("i_s_sys_tablespaces_fill_table");
+
+	/* deny access to user without PROCESS_ACL privilege */
+	if (check_global_access(thd, PROCESS_ACL)) {
+		DBUG_RETURN(0);
+	}
+
+	heap = mem_heap_create(1000);
+	mutex_enter(&dict_sys->mutex);
+	mtr_start(&mtr);
+
+	rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
+
+	while (rec) {
+		const char*	err_msg;
+		ulint		space;
+		const char*	name;
+		ulint		flags;
+
+		/* Extract necessary information from a SYS_TABLESPACES row */
+		err_msg = dict_process_sys_tablespaces(
+			heap, rec, &space, &name, &flags);
+
+		mtr_commit(&mtr);
+		mutex_exit(&dict_sys->mutex);
+
+		if (!err_msg) {
+			i_s_dict_fill_sys_tablespaces(
+				thd, space, name, flags,
+				tables->table);
+		} else {
+			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+					    ER_CANT_FIND_SYSTEM_REC, "%s",
+					    err_msg);
+		}
+
+		mem_heap_empty(heap);
+
+		/* Get the next record */
+		mutex_enter(&dict_sys->mutex);
+		mtr_start(&mtr);
+		rec = dict_getnext_system(&pcur, &mtr);
+	}
+
+	mtr_commit(&mtr);
+	mutex_exit(&dict_sys->mutex);
+	mem_heap_free(heap);
+
+	DBUG_RETURN(0);
+}
+/*******************************************************************//**
+Bind the dynamic table INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES
+@return 0 on success */
+static
+int
+innodb_sys_tablespaces_init(
+/*========================*/
+	void*	p)	/*!< in/out: table schema object */
+{
+	ST_SCHEMA_TABLE*	schema;
+
+	DBUG_ENTER("innodb_sys_tablespaces_init");
+
+	schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = innodb_sys_tablespaces_fields_info;
+	schema->fill_table = i_s_sys_tablespaces_fill_table;
+
+	DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_sys_tablespaces =
+{
+	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
+	/* int */
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+	/* pointer to type-specific plugin descriptor */
+	/* void* */
+	STRUCT_FLD(info, &i_s_info),
+
+	/* plugin name */
+	/* const char* */
+	STRUCT_FLD(name, "INNODB_SYS_TABLESPACES"),
+
+	/* plugin author (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(author, plugin_author),
+
+	/* general descriptive text (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(descr, "InnoDB SYS_TABLESPACES"),
+
+	/* the plugin license (PLUGIN_LICENSE_XXX) */
+	/* int */
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+	/* the function to invoke when plugin is loaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(init, innodb_sys_tablespaces_init),
+
+	/* the function to invoke when plugin is unloaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(deinit, i_s_common_deinit),
+
+	/* plugin version (for SHOW PLUGINS) */
+	/* unsigned int */
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+	/* struct st_mysql_show_var* */
+	STRUCT_FLD(status_vars, NULL),
+
+	/* struct st_mysql_sys_var** */
+	STRUCT_FLD(system_vars, NULL),
+
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
 };
 
+/**  SYS_DATAFILES  ************************************************/
+/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_DATAFILES */
+static ST_FIELD_INFO	innodb_sys_datafiles_fields_info[] =
+{
+#define SYS_DATAFILES_SPACE		0
+	{STRUCT_FLD(field_name,		"SPACE"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define SYS_DATAFILES_PATH		1
+	{STRUCT_FLD(field_name,		"PATH"),
+	 STRUCT_FLD(field_length,	OS_FILE_MAX_PATH),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	END_OF_ST_FIELD_INFO
+};
+
+/**********************************************************************//**
+Function to fill INFORMATION_SCHEMA.INNODB_SYS_DATAFILES with information
+collected by scanning SYS_DATAFILESS table.
+@return 0 on success */
+static
+int
+i_s_dict_fill_sys_datafiles(
+/*========================*/
+	THD*		thd,		/*!< in: thread */
+	ulint		space,		/*!< in: space ID */
+	const char*	path,		/*!< in: absolute path */
+	TABLE*		table_to_fill)	/*!< in/out: fill this table */
+{
+	Field**		fields;
+
+	DBUG_ENTER("i_s_dict_fill_sys_datafiles");
+
+	fields = table_to_fill->field;
+
+	OK(field_store_ulint(fields[SYS_DATAFILES_SPACE], space));
+
+	OK(field_store_string(fields[SYS_DATAFILES_PATH], path));
+
+	OK(schema_table_store_record(thd, table_to_fill));
+
+	DBUG_RETURN(0);
+}
+/*******************************************************************//**
+Function to populate INFORMATION_SCHEMA.INNODB_SYS_DATAFILES table.
+Loop through each record in SYS_DATAFILES, and extract the column
+information and fill the INFORMATION_SCHEMA.INNODB_SYS_DATAFILES table.
+@return 0 on success */
+static
+int
+i_s_sys_datafiles_fill_table(
+/*=========================*/
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	Item*		)	/*!< in: condition (not used) */
+{
+	btr_pcur_t	pcur;
+	const rec_t*	rec;
+	mem_heap_t*	heap;
+	mtr_t		mtr;
+
+	DBUG_ENTER("i_s_sys_datafiles_fill_table");
+
+	/* deny access to user without PROCESS_ACL privilege */
+	if (check_global_access(thd, PROCESS_ACL)) {
+		DBUG_RETURN(0);
+	}
+
+	heap = mem_heap_create(1000);
+	mutex_enter(&dict_sys->mutex);
+	mtr_start(&mtr);
+
+	rec = dict_startscan_system(&pcur, &mtr, SYS_DATAFILES);
+
+	while (rec) {
+		const char*	err_msg;
+		ulint		space;
+		const char*	path;
+
+		/* Extract necessary information from a SYS_DATAFILES row */
+		err_msg = dict_process_sys_datafiles(
+			heap, rec, &space, &path);
+
+		mtr_commit(&mtr);
+		mutex_exit(&dict_sys->mutex);
+
+		if (!err_msg) {
+			i_s_dict_fill_sys_datafiles(
+				thd, space, path, tables->table);
+		} else {
+			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+					    ER_CANT_FIND_SYSTEM_REC, "%s",
+					    err_msg);
+		}
+
+		mem_heap_empty(heap);
+
+		/* Get the next record */
+		mutex_enter(&dict_sys->mutex);
+		mtr_start(&mtr);
+		rec = dict_getnext_system(&pcur, &mtr);
+	}
+
+	mtr_commit(&mtr);
+	mutex_exit(&dict_sys->mutex);
+	mem_heap_free(heap);
+
+	DBUG_RETURN(0);
+}
+/*******************************************************************//**
+Bind the dynamic table INFORMATION_SCHEMA.INNODB_SYS_DATAFILES
+@return 0 on success */
+static
+int
+innodb_sys_datafiles_init(
+/*======================*/
+	void*	p)	/*!< in/out: table schema object */
+{
+	ST_SCHEMA_TABLE*	schema;
+
+	DBUG_ENTER("innodb_sys_datafiles_init");
+
+	schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = innodb_sys_datafiles_fields_info;
+	schema->fill_table = i_s_sys_datafiles_fill_table;
+
+	DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_sys_datafiles =
+{
+	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
+	/* int */
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+	/* pointer to type-specific plugin descriptor */
+	/* void* */
+	STRUCT_FLD(info, &i_s_info),
+
+	/* plugin name */
+	/* const char* */
+	STRUCT_FLD(name, "INNODB_SYS_DATAFILES"),
+
+	/* plugin author (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(author, plugin_author),
+
+	/* general descriptive text (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(descr, "InnoDB SYS_DATAFILES"),
+
+	/* the plugin license (PLUGIN_LICENSE_XXX) */
+	/* int */
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+	/* the function to invoke when plugin is loaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(init, innodb_sys_datafiles_init),
+
+	/* the function to invoke when plugin is unloaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(deinit, i_s_common_deinit),
+
+	/* plugin version (for SHOW PLUGINS) */
+	/* unsigned int */
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+	/* struct st_mysql_show_var* */
+	STRUCT_FLD(status_vars, NULL),
+
+	/* struct st_mysql_sys_var** */
+	STRUCT_FLD(system_vars, NULL),
+
+	/* reserved for dependency checking */
+	/* void* */
+	STRUCT_FLD(__reserved1, NULL),
+
+	/* Plugin flags */
+	/* unsigned long */
+	STRUCT_FLD(flags, 0UL),
+};
diff --git a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
index 7fc7b091795..9e3e651706a 100644
--- a/storage/innobase/handler/i_s.h
+++ b/storage/innobase/handler/i_s.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,30 +28,34 @@ Created July 18, 2007 Vasil Dimov
 
 const char plugin_author[] = "Oracle Corporation";
 
-extern struct st_maria_plugin	i_s_innodb_trx;
-extern struct st_maria_plugin	i_s_innodb_locks;
-extern struct st_maria_plugin	i_s_innodb_lock_waits;
-extern struct st_maria_plugin	i_s_innodb_cmp;
-extern struct st_maria_plugin	i_s_innodb_cmp_reset;
-extern struct st_maria_plugin	i_s_innodb_cmpmem;
-extern struct st_maria_plugin	i_s_innodb_cmpmem_reset;
-extern struct st_maria_plugin   i_s_innodb_metrics;
-extern struct st_maria_plugin	i_s_innodb_ft_default_stopword;
-extern struct st_maria_plugin	i_s_innodb_ft_inserted;
-extern struct st_maria_plugin	i_s_innodb_ft_deleted;
-extern struct st_maria_plugin	i_s_innodb_ft_being_deleted;
-extern struct st_maria_plugin	i_s_innodb_ft_index_cache;
-extern struct st_maria_plugin	i_s_innodb_ft_index_table;
-extern struct st_maria_plugin	i_s_innodb_ft_config;
-extern struct st_maria_plugin	i_s_innodb_buffer_page;
-extern struct st_maria_plugin	i_s_innodb_buffer_page_lru;
-extern struct st_maria_plugin	i_s_innodb_buffer_stats;
-extern struct st_maria_plugin	i_s_innodb_sys_tables;
-extern struct st_maria_plugin	i_s_innodb_sys_tablestats;
-extern struct st_maria_plugin	i_s_innodb_sys_indexes;
-extern struct st_maria_plugin	i_s_innodb_sys_columns;
-extern struct st_maria_plugin	i_s_innodb_sys_fields;
-extern struct st_maria_plugin	i_s_innodb_sys_foreign;
-extern struct st_maria_plugin	i_s_innodb_sys_foreign_cols;
+extern struct st_mysql_plugin	i_s_innodb_trx;
+extern struct st_mysql_plugin	i_s_innodb_locks;
+extern struct st_mysql_plugin	i_s_innodb_lock_waits;
+extern struct st_mysql_plugin	i_s_innodb_cmp;
+extern struct st_mysql_plugin	i_s_innodb_cmp_reset;
+extern struct st_mysql_plugin	i_s_innodb_cmp_per_index;
+extern struct st_mysql_plugin	i_s_innodb_cmp_per_index_reset;
+extern struct st_mysql_plugin	i_s_innodb_cmpmem;
+extern struct st_mysql_plugin	i_s_innodb_cmpmem_reset;
+extern struct st_mysql_plugin   i_s_innodb_metrics;
+extern struct st_mysql_plugin	i_s_innodb_ft_default_stopword;
+extern struct st_mysql_plugin	i_s_innodb_ft_inserted;
+extern struct st_mysql_plugin	i_s_innodb_ft_deleted;
+extern struct st_mysql_plugin	i_s_innodb_ft_being_deleted;
+extern struct st_mysql_plugin	i_s_innodb_ft_index_cache;
+extern struct st_mysql_plugin	i_s_innodb_ft_index_table;
+extern struct st_mysql_plugin	i_s_innodb_ft_config;
+extern struct st_mysql_plugin	i_s_innodb_buffer_page;
+extern struct st_mysql_plugin	i_s_innodb_buffer_page_lru;
+extern struct st_mysql_plugin	i_s_innodb_buffer_stats;
+extern struct st_mysql_plugin	i_s_innodb_sys_tables;
+extern struct st_mysql_plugin	i_s_innodb_sys_tablestats;
+extern struct st_mysql_plugin	i_s_innodb_sys_indexes;
+extern struct st_mysql_plugin	i_s_innodb_sys_columns;
+extern struct st_mysql_plugin	i_s_innodb_sys_fields;
+extern struct st_mysql_plugin	i_s_innodb_sys_foreign;
+extern struct st_mysql_plugin	i_s_innodb_sys_foreign_cols;
+extern struct st_mysql_plugin	i_s_innodb_sys_tablespaces;
+extern struct st_mysql_plugin	i_s_innodb_sys_datafiles;
 
 #endif /* i_s_h */
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index cd9de39f3c6..168da732bc0 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -25,6 +25,10 @@ Created 7/19/1997 Heikki Tuuri
 
 #include "ibuf0ibuf.h"
 
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+UNIV_INTERN my_bool	srv_ibuf_disable_background_merge;
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
 /** Number of bits describing a single page */
 #define IBUF_BITS_PER_PAGE	4
 #if IBUF_BITS_PER_PAGE % 2
@@ -56,6 +60,7 @@ Created 7/19/1997 Heikki Tuuri
 #include "log0recv.h"
 #include "que0que.h"
 #include "srv0start.h" /* srv_shutdown_state */
+#include "ha_prototypes.h"
 
 /*	STRUCTURE OF AN INSERT BUFFER RECORD
 
@@ -284,16 +289,16 @@ type, counter, and some flags. */
 
 
 /** The mutex used to block pessimistic inserts to ibuf trees */
-static mutex_t	ibuf_pessimistic_insert_mutex;
+static ib_mutex_t	ibuf_pessimistic_insert_mutex;
 
 /** The mutex protecting the insert buffer structs */
-static mutex_t	ibuf_mutex;
+static ib_mutex_t	ibuf_mutex;
 
 /** The mutex protecting the insert buffer bitmaps */
-static mutex_t	ibuf_bitmap_mutex;
+static ib_mutex_t	ibuf_bitmap_mutex;
 
 /** The area in pages from which contract looks for page numbers for merge */
-#define	IBUF_MERGE_AREA			8
+#define	IBUF_MERGE_AREA			8UL
 
 /** Inside the merge area, pages which have at most 1 per this number less
 buffered entries compared to maximum volume that can buffered for a single
@@ -507,7 +512,7 @@ ibuf_init_at_db_start(void)
 	dict_index_t*	index;
 	ulint		n_used;
 	page_t*		header_page;
-	ulint		error;
+	dberr_t		error;
 
 	ibuf = static_cast<ibuf_t*>(mem_zalloc(sizeof(ibuf_t)));
 
@@ -2485,6 +2490,73 @@ ibuf_get_merge_page_nos_func(
 	return(sum_volumes);
 }
 
+/*******************************************************************//**
+Get the matching records for space id.
+@return	current rec or NULL */
+static	__attribute__((nonnull, warn_unused_result))
+const rec_t*
+ibuf_get_user_rec(
+/*===============*/
+	btr_pcur_t*	pcur,		/*!< in: the current cursor */
+	mtr_t*		mtr)		/*!< in: mini transaction */
+{
+	do {
+		const rec_t* rec = btr_pcur_get_rec(pcur);
+
+		if (page_rec_is_user_rec(rec)) {
+			return(rec);
+		}
+	} while (btr_pcur_move_to_next(pcur, mtr));
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Reads page numbers for a space id from an ibuf tree.
+@return a lower limit for the combined volume of records which will be
+merged */
+static	__attribute__((nonnull, warn_unused_result))
+ulint
+ibuf_get_merge_pages(
+/*=================*/
+	btr_pcur_t*	pcur,	/*!< in/out: cursor */
+	ulint		space,	/*!< in: space for which to merge */
+	ulint		limit,	/*!< in: max page numbers to read */
+	ulint*		pages,	/*!< out: pages read */
+	ulint*		spaces,	/*!< out: spaces read */
+	ib_int64_t*	versions,/*!< out: space versions read */
+	ulint*		n_pages,/*!< out: number of pages read */
+	mtr_t*		mtr)	/*!< in: mini transaction */
+{
+	const rec_t*	rec;
+	ulint		volume = 0;
+	ib_int64_t	version = fil_space_get_version(space);
+
+	ut_a(space != ULINT_UNDEFINED);
+
+	*n_pages = 0;
+
+	while ((rec = ibuf_get_user_rec(pcur, mtr)) != 0
+	       && ibuf_rec_get_space(mtr, rec) == space
+	       && *n_pages < limit) {
+
+		ulint	page_no = ibuf_rec_get_page_no(mtr, rec);
+
+		if (*n_pages == 0 || pages[*n_pages - 1] != page_no) {
+			spaces[*n_pages] = space;
+			pages[*n_pages] = page_no;
+			versions[*n_pages] = version;
+			++*n_pages;
+		}
+
+		volume += ibuf_rec_get_volume(mtr, rec);
+
+		btr_pcur_move_to_next(pcur, mtr);
+	}
+
+	return(volume);
+}
+
 /*********************************************************************//**
 Contracts insert buffer trees by reading pages to the buffer pool.
 @return a lower limit for the combined size in bytes of entries which
@@ -2492,32 +2564,22 @@ will be merged from ibuf trees to the pages read, 0 if ibuf is
 empty */
 static
 ulint
-ibuf_contract_ext(
-/*==============*/
-	ulint*	n_pages,/*!< out: number of pages to which merged */
-	ibool	sync)	/*!< in: TRUE if the caller wants to wait for the
-			issued read with the highest tablespace address
-			to complete */
+ibuf_merge_pages(
+/*=============*/
+	ulint*	n_pages,	/*!< out: number of pages to which merged */
+	bool	sync)		/*!< in: TRUE if the caller wants to wait for
+				the issued read with the highest tablespace
+				address to complete */
 {
+	mtr_t		mtr;
 	btr_pcur_t	pcur;
+	ulint		sum_sizes;
 	ulint		page_nos[IBUF_MAX_N_PAGES_MERGED];
 	ulint		space_ids[IBUF_MAX_N_PAGES_MERGED];
 	ib_int64_t	space_versions[IBUF_MAX_N_PAGES_MERGED];
-	ulint		sum_sizes;
-	mtr_t		mtr;
 
 	*n_pages = 0;
 
-	/* We perform a dirty read of ibuf->empty, without latching
-	the insert buffer root page. We trust this dirty read except
-	when a slow shutdown is being executed. During a slow
-	shutdown, the insert buffer merge must be completed. */
-
-	if (UNIV_UNLIKELY(ibuf->empty)
-	    && UNIV_LIKELY(!srv_shutdown_state)) {
-		return(0);
-	}
-
 	ibuf_mtr_start(&mtr);
 
 	/* Open a cursor to a randomly chosen leaf of the tree, at a random
@@ -2554,18 +2616,159 @@ ibuf_contract_ext(
 	ibuf_mtr_commit(&mtr);
 	btr_pcur_close(&pcur);
 
-	buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos,
-				  *n_pages);
+	buf_read_ibuf_merge_pages(
+		sync, space_ids, space_versions, page_nos, *n_pages);
 
 	return(sum_sizes + 1);
 }
 
 /*********************************************************************//**
+Get the table instance from the table id.
+@return table instance */
+static __attribute__((warn_unused_result))
+dict_table_t*
+ibuf_get_table(
+/*===========*/
+	table_id_t	table_id)	/*!< in: valid table id */
+{
+	rw_lock_s_lock_func(&dict_operation_lock, 0, __FILE__, __LINE__);
+
+	dict_table_t*	table = dict_table_open_on_id(table_id, FALSE, FALSE);
+
+	rw_lock_s_unlock_gen(&dict_operation_lock, 0);
+
+	return(table);
+}
+
+/*********************************************************************//**
 Contracts insert buffer trees by reading pages to the buffer pool.
 @return a lower limit for the combined size in bytes of entries which
 will be merged from ibuf trees to the pages read, 0 if ibuf is
 empty */
-UNIV_INTERN
+static
+ulint
+ibuf_merge_space(
+/*=============*/
+	ulint		space,	/*!< in: tablespace id to merge */
+	ulint*		n_pages)/*!< out: number of pages to which merged */
+{
+	mtr_t		mtr;
+	btr_pcur_t	pcur;
+	mem_heap_t*	heap = mem_heap_create(512);
+	dtuple_t*	tuple = ibuf_search_tuple_build(space, 0, heap);
+
+	ibuf_mtr_start(&mtr);
+
+	/* Position the cursor on the first matching record. */
+
+	btr_pcur_open(
+		ibuf->index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur,
+		&mtr);
+
+	mem_heap_free(heap);
+
+	ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
+
+	ulint		sum_sizes = 0;
+	ulint		pages[IBUF_MAX_N_PAGES_MERGED];
+	ulint		spaces[IBUF_MAX_N_PAGES_MERGED];
+	ib_int64_t	versions[IBUF_MAX_N_PAGES_MERGED];
+
+	if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) {
+		/* If a B-tree page is empty, it must be the root page
+		and the whole B-tree must be empty. InnoDB does not
+		allow empty B-tree pages other than the root. */
+		ut_ad(ibuf->empty);
+		ut_ad(page_get_space_id(btr_pcur_get_page(&pcur))
+		      == IBUF_SPACE_ID);
+		ut_ad(page_get_page_no(btr_pcur_get_page(&pcur))
+		      == FSP_IBUF_TREE_ROOT_PAGE_NO);
+
+	} else {
+
+		sum_sizes = ibuf_get_merge_pages(
+			&pcur, space, IBUF_MAX_N_PAGES_MERGED,
+			&pages[0], &spaces[0], &versions[0], n_pages,
+			&mtr);
+
+		++sum_sizes;
+	}
+
+	ibuf_mtr_commit(&mtr);
+
+	btr_pcur_close(&pcur);
+
+	if (sum_sizes > 0) {
+
+		ut_a(*n_pages > 0 || sum_sizes == 1);
+
+#ifdef UNIV_DEBUG
+		ut_ad(*n_pages <= UT_ARR_SIZE(pages));
+
+		for (ulint i = 0; i < *n_pages; ++i) {
+			ut_ad(spaces[i] == space);
+			ut_ad(i == 0 || versions[i] == versions[i - 1]);
+		}
+#endif /* UNIV_DEBUG */
+
+		buf_read_ibuf_merge_pages(
+			TRUE, spaces, versions, pages, *n_pages);
+	}
+
+	return(sum_sizes);
+}
+
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+static __attribute__((nonnull, warn_unused_result))
+ulint
+ibuf_merge(
+/*=======*/
+	table_id_t	table_id,	/*!< in: if merge should be
+					done only for a specific
+					table, for all tables this
+					should be 0 */
+	ulint*		n_pages,	/*!< out: number of pages to
+					which merged */
+	bool		sync)		/*!< in: TRUE if the caller
+					wants to wait for the issued
+					read with the highest
+					tablespace address to complete */
+{
+	dict_table_t*	table;
+
+	*n_pages = 0;
+
+	/* We perform a dirty read of ibuf->empty, without latching
+	the insert buffer root page. We trust this dirty read except
+	when a slow shutdown is being executed. During a slow
+	shutdown, the insert buffer merge must be completed. */
+
+	if (ibuf->empty && !srv_shutdown_state) {
+		return(0);
+	} else if (table_id == 0) {
+		return(ibuf_merge_pages(n_pages, sync));
+	} else if ((table = ibuf_get_table(table_id)) == 0) {
+		/* Table has been dropped. */
+		return(0);
+	}
+
+	ulint	volume = ibuf_merge_space(table->space, n_pages);
+
+	dict_table_close(table, FALSE, FALSE);
+
+	return(volume);
+}
+
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+static
 ulint
 ibuf_contract(
 /*==========*/
@@ -2575,7 +2778,7 @@ ibuf_contract(
 {
 	ulint	n_pages;
 
-	return(ibuf_contract_ext(&n_pages, sync));
+	return(ibuf_merge(0, &n_pages, sync));
 }
 
 /*********************************************************************//**
@@ -2587,17 +2790,26 @@ UNIV_INTERN
 ulint
 ibuf_contract_in_background(
 /*========================*/
-	ibool	full)	/*!< in: TRUE if the caller wants to do a full
-			contract based on PCT_IO(100). If FALSE then
-			the size of contract batch is determined based
-			on the current size of the ibuf tree. */
+	table_id_t	table_id,	/*!< in: if merge should be done only
+					for a specific table, for all tables
+					this should be 0 */
+	ibool		full)		/*!< in: TRUE if the caller wants to
+					do a full contract based on PCT_IO(100).
+					If FALSE then the size of contract
+					batch is determined based on the
+					current size of the ibuf tree. */
 {
 	ulint	sum_bytes	= 0;
 	ulint	sum_pages	= 0;
-	ulint	n_bytes;
 	ulint	n_pag2;
 	ulint	n_pages;
 
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+	if (srv_ibuf_disable_background_merge && table_id == 0) {
+		return(0);
+	}
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
 	if (full) {
 		/* Caller has requested a full batch */
 		n_pages = PCT_IO(100);
@@ -2620,7 +2832,9 @@ ibuf_contract_in_background(
 	}
 
 	while (sum_pages < n_pages) {
-		n_bytes = ibuf_contract_ext(&n_pag2, FALSE);
+		ulint	n_bytes;
+
+		n_bytes = ibuf_merge(table_id, &n_pag2, FALSE);
 
 		if (n_bytes == 0) {
 			return(sum_bytes);
@@ -3061,7 +3275,7 @@ ibuf_update_max_tablespace_id(void)
 	ibuf_mtr_start(&mtr);
 
 	btr_pcur_open_at_index_side(
-		FALSE, ibuf->index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+		false, ibuf->index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
 
 	ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
 
@@ -3223,8 +3437,8 @@ ibuf_get_entry_counter_func(
 Buffer an operation in the insert/delete buffer, instead of doing it
 directly to the disk page, if this is possible.
 @return	DB_SUCCESS, DB_STRONG_FAIL or other error */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 ibuf_insert_low(
 /*============*/
 	ulint		mode,	/*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
@@ -3246,7 +3460,9 @@ ibuf_insert_low(
 	btr_pcur_t	pcur;
 	btr_cur_t*	cursor;
 	dtuple_t*	ibuf_entry;
+	mem_heap_t*	offsets_heap	= NULL;
 	mem_heap_t*	heap;
+	ulint*		offsets		= NULL;
 	ulint		buffered;
 	lint		min_n_recs;
 	rec_t*		ins_rec;
@@ -3254,7 +3470,7 @@ ibuf_insert_low(
 	page_t*		bitmap_page;
 	buf_block_t*	block;
 	page_t*		root;
-	ulint		err;
+	dberr_t		err;
 	ibool		do_merge;
 	ulint		space_ids[IBUF_MAX_N_PAGES_MERGED];
 	ib_int64_t	space_versions[IBUF_MAX_N_PAGES_MERGED];
@@ -3294,7 +3510,7 @@ ibuf_insert_low(
 		return(DB_STRONG_FAIL);
 	}
 
-	heap = mem_heap_create(512);
+	heap = mem_heap_create(1024);
 
 	/* Build the entry which contains the space id and the page number
 	as the first fields and the type information for other fields, and
@@ -3464,9 +3680,11 @@ fail_exit:
 	cursor = btr_pcur_get_btr_cur(&pcur);
 
 	if (mode == BTR_MODIFY_PREV) {
-		err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
-						ibuf_entry, &ins_rec,
-						&dummy_big_rec, 0, thr, &mtr);
+		err = btr_cur_optimistic_insert(
+			BTR_NO_LOCKING_FLAG,
+			cursor, &offsets, &offsets_heap,
+			ibuf_entry, &ins_rec,
+			&dummy_big_rec, 0, thr, &mtr);
 		block = btr_cur_get_block(cursor);
 		ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
 
@@ -3493,13 +3711,15 @@ fail_exit:
 
 		err = btr_cur_optimistic_insert(
 			BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
-			cursor, ibuf_entry, &ins_rec,
+			cursor, &offsets, &offsets_heap,
+			ibuf_entry, &ins_rec,
 			&dummy_big_rec, 0, thr, &mtr);
 
 		if (err == DB_FAIL) {
 			err = btr_cur_pessimistic_insert(
 				BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
-				cursor, ibuf_entry, &ins_rec,
+				cursor, &offsets, &offsets_heap,
+				ibuf_entry, &ins_rec,
 				&dummy_big_rec, 0, thr, &mtr);
 		}
 
@@ -3512,6 +3732,10 @@ fail_exit:
 		ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
 	}
 
+	if (offsets_heap) {
+		mem_heap_free(offsets_heap);
+	}
+
 	if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
 		/* Update the page max trx id field */
 		page_update_max_trx_id(block, NULL,
@@ -3568,7 +3792,7 @@ ibuf_insert(
 	ulint		page_no,/*!< in: page number where to insert */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
-	ulint		err;
+	dberr_t		err;
 	ulint		entry_size;
 	ibool		no_counter;
 	/* Read the settable global variable ibuf_use only once in
@@ -3699,7 +3923,7 @@ skip_watch:
 /********************************************************************//**
 During merge, inserts to an index page a secondary index entry extracted
 from the insert buffer. */
-static
+static __attribute__((nonnull))
 void
 ibuf_insert_to_index_page_low(
 /*==========================*/
@@ -3707,6 +3931,8 @@ ibuf_insert_to_index_page_low(
 	buf_block_t*	block,	/*!< in/out: index page where the buffered
 				entry should be placed */
 	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint**		offsets,/*!< out: offsets on *rec */
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
 	mtr_t*		mtr,	/*!< in/out: mtr */
 	page_cur_t*	page_cur)/*!< in/out: cursor positioned on the record
 				after which to insert the buffered entry */
@@ -3718,8 +3944,8 @@ ibuf_insert_to_index_page_low(
 	const page_t*	bitmap_page;
 	ulint		old_bits;
 
-	if (UNIV_LIKELY
-	    (page_cur_tuple_insert(page_cur, entry, index, 0, mtr) != NULL)) {
+	if (page_cur_tuple_insert(
+		    page_cur, entry, index, offsets, &heap, 0, mtr) != NULL) {
 		return;
 	}
 
@@ -3730,8 +3956,8 @@ ibuf_insert_to_index_page_low(
 
 	/* This time the record must fit */
 
-	if (UNIV_LIKELY
-	    (page_cur_tuple_insert(page_cur, entry, index, 0, mtr) != NULL)) {
+	if (page_cur_tuple_insert(page_cur, entry, index,
+				  offsets, &heap, 0, mtr) != NULL) {
 		return;
 	}
 
@@ -3785,6 +4011,8 @@ ibuf_insert_to_index_page(
 	ulint		low_match;
 	page_t*		page		= buf_block_get_frame(block);
 	rec_t*		rec;
+	ulint*		offsets;
+	mem_heap_t*	heap;
 
 	ut_ad(ibuf_inside(mtr));
 	ut_ad(dtuple_check_typed(entry));
@@ -3835,10 +4063,14 @@ dump:
 	low_match = page_cur_search(block, index, entry,
 				    PAGE_CUR_LE, &page_cur);
 
+	heap = mem_heap_create(
+		sizeof(upd_t)
+		+ REC_OFFS_HEADER_SIZE * sizeof(*offsets)
+		+ dtuple_get_n_fields(entry)
+		* (sizeof(upd_field_t) + sizeof *offsets));
+
 	if (UNIV_UNLIKELY(low_match == dtuple_get_n_fields(entry))) {
-		mem_heap_t*	heap;
 		upd_t*		update;
-		ulint*		offsets;
 		page_zip_des_t*	page_zip;
 
 		rec = page_cur_get_rec(&page_cur);
@@ -3847,12 +4079,10 @@ dump:
 		row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */
 		ut_ad(rec_get_deleted_flag(rec, page_is_comp(page)));
 
-		heap = mem_heap_create(1024);
-
 		offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED,
 					  &heap);
 		update = row_upd_build_sec_rec_difference_binary(
-			index, entry, rec, NULL, heap);
+			rec, index, offsets, entry, heap);
 
 		page_zip = buf_block_get_page_zip(block);
 
@@ -3862,9 +4092,7 @@ dump:
 			Bug #56680 was fixed. */
 			btr_cur_set_deleted_flag_for_ibuf(
 				rec, page_zip, FALSE, mtr);
-updated_in_place:
-			mem_heap_free(heap);
-			return;
+			goto updated_in_place;
 		}
 
 		/* Copy the info bits. Clear the delete-mark. */
@@ -3908,15 +4136,20 @@ updated_in_place:
 		lock_rec_store_on_page_infimum(block, rec);
 		page_cur_delete_rec(&page_cur, index, offsets, mtr);
 		page_cur_move_to_prev(&page_cur);
-		mem_heap_free(heap);
 
-		ibuf_insert_to_index_page_low(entry, block, index, mtr,
+		ibuf_insert_to_index_page_low(entry, block, index,
+					      &offsets, heap, mtr,
 					      &page_cur);
 		lock_rec_restore_from_page_infimum(block, rec, block);
 	} else {
-		ibuf_insert_to_index_page_low(entry, block, index, mtr,
+		offsets = NULL;
+		ibuf_insert_to_index_page_low(entry, block, index,
+					      &offsets, heap, mtr,
 					      &page_cur);
 	}
+
+updated_in_place:
+	mem_heap_free(heap);
 }
 
 /****************************************************************//**
@@ -3950,7 +4183,7 @@ ibuf_set_del_mark(
 		/* Delete mark the old index record. According to a
 		comment in row_upd_sec_index_entry(), it can already
 		have been delete marked if a lock wait occurred in
-		row_ins_index_entry() in a previous invocation of
+		row_ins_sec_index_entry() in a previous invocation of
 		row_upd_sec_index_entry(). */
 
 		if (UNIV_LIKELY
@@ -4128,7 +4361,7 @@ ibuf_restore_pos(
 		ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
 
 		fputs("InnoDB: Validating insert buffer tree:\n", stderr);
-		if (!btr_validate_index(ibuf->index, NULL)) {
+		if (!btr_validate_index(ibuf->index, 0)) {
 			ut_error;
 		}
 
@@ -4160,7 +4393,7 @@ ibuf_delete_rec(
 {
 	ibool		success;
 	page_t*		root;
-	ulint		err;
+	dberr_t		err;
 
 	ut_ad(ibuf_inside(mtr));
 	ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
@@ -4183,7 +4416,8 @@ ibuf_delete_rec(
 	}
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
-	success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
+	success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur),
+					    0, mtr);
 
 	if (success) {
 		if (UNIV_UNLIKELY(!page_get_n_recs(btr_pcur_get_page(pcur)))) {
@@ -4241,7 +4475,7 @@ ibuf_delete_rec(
 
 	root = ibuf_tree_root_get(mtr);
 
-	btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
+	btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), 0,
 				   RB_NONE, mtr);
 	ut_a(err == DB_SUCCESS);
 
@@ -4829,4 +5063,109 @@ ibuf_print(
 
 	mutex_exit(&ibuf_mutex);
 }
+
+/******************************************************************//**
+Checks the insert buffer bitmaps on IMPORT TABLESPACE.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+ibuf_check_bitmap_on_import(
+/*========================*/
+	const trx_t*	trx,		/*!< in: transaction */
+	ulint		space_id)	/*!< in: tablespace identifier */
+{
+	ulint	zip_size;
+	ulint	page_size;
+	ulint	size;
+	ulint	page_no;
+
+	ut_ad(space_id);
+	ut_ad(trx->mysql_thd);
+
+	zip_size = fil_space_get_zip_size(space_id);
+
+	if (zip_size == ULINT_UNDEFINED) {
+		return(DB_TABLE_NOT_FOUND);
+	}
+
+	size = fil_space_get_size(space_id);
+
+	if (size == 0) {
+		return(DB_TABLE_NOT_FOUND);
+	}
+
+	mutex_enter(&ibuf_mutex);
+
+	page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
+
+	for (page_no = 0; page_no < size; page_no += page_size) {
+		mtr_t	mtr;
+		page_t*	bitmap_page;
+		ulint	i;
+
+		if (trx_is_interrupted(trx)) {
+			mutex_exit(&ibuf_mutex);
+			return(DB_INTERRUPTED);
+		}
+
+		mtr_start(&mtr);
+
+		mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+		ibuf_enter(&mtr);
+
+		bitmap_page = ibuf_bitmap_get_map_page(
+			space_id, page_no, zip_size, &mtr);
+
+		for (i = FSP_IBUF_BITMAP_OFFSET + 1; i < page_size; i++) {
+			const ulint	offset = page_no + i;
+
+			if (ibuf_bitmap_page_get_bits(
+				    bitmap_page, offset, zip_size,
+				    IBUF_BITMAP_IBUF, &mtr)) {
+
+				mutex_exit(&ibuf_mutex);
+				ibuf_exit(&mtr);
+				mtr_commit(&mtr);
+
+				ib_errf(trx->mysql_thd,
+					IB_LOG_LEVEL_ERROR,
+					 ER_INNODB_INDEX_CORRUPT,
+					 "Space %u page %u"
+					 " is wrongly flagged to belong to the"
+					 " insert buffer",
+					 (unsigned) space_id,
+					 (unsigned) offset);
+
+				return(DB_CORRUPTION);
+			}
+
+			if (ibuf_bitmap_page_get_bits(
+				    bitmap_page, offset, zip_size,
+				    IBUF_BITMAP_BUFFERED, &mtr)) {
+
+				ib_errf(trx->mysql_thd,
+					IB_LOG_LEVEL_WARN,
+					ER_INNODB_INDEX_CORRUPT,
+					"Buffered changes"
+					" for space %u page %u are lost",
+					(unsigned) space_id,
+					(unsigned) offset);
+
+				/* Tolerate this error, so that
+				slightly corrupted tables can be
+				imported and dumped.  Clear the bit. */
+				ibuf_bitmap_page_set_bits(
+					bitmap_page, offset, zip_size,
+					IBUF_BITMAP_BUFFERED, FALSE, &mtr);
+			}
+		}
+
+		ibuf_exit(&mtr);
+		mtr_commit(&mtr);
+	}
+
+	mutex_exit(&ibuf_mutex);
+	return(DB_SUCCESS);
+}
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/api0api.h b/storage/innobase/include/api0api.h
new file mode 100644
index 00000000000..5b7bfdbdde5
--- /dev/null
+++ b/storage/innobase/include/api0api.h
@@ -0,0 +1,1282 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/api0api.h
+InnoDB Native API
+
+2008-08-01 Created by Sunny Bains.
+3/20/2011 Jimmy Yang extracted from Embedded InnoDB
+*******************************************************/
+
+#ifndef api0api_h
+#define api0api_h
+
+#include "db0err.h"
+#include <stdio.h>
+
+#ifdef _MSC_VER
+#define strncasecmp		_strnicmp
+#define strcasecmp		_stricmp
+#endif
+
+#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
+#define UNIV_NO_IGNORE		__attribute__ ((warn_unused_result))
+#else
+#define UNIV_NO_IGNORE
+#endif /* __GNUC__ && __GNUC__ > 2 && !__INTEL_COMPILER */
+
+/* See comment about ib_bool_t as to why the two macros are unsigned long. */
+/** The boolean value of "true" used internally within InnoDB */
+#define IB_TRUE			0x1UL
+/** The boolean value of "false" used internally within InnoDB */
+#define IB_FALSE		0x0UL
+
+/* Basic types used by the InnoDB API. */
+/** All InnoDB error codes are represented by ib_err_t */
+typedef enum dberr_t		ib_err_t;
+/** Representation of a byte within InnoDB */
+typedef unsigned char		ib_byte_t;
+/** Representation of an unsigned long int within InnoDB */
+typedef unsigned long int	ib_ulint_t;
+
+/* We assume C99 support except when using VisualStudio. */
+#if !defined(_MSC_VER)
+#include <stdint.h>
+#endif /* _MSC_VER */
+
+/* Integer types used by the API. Microsft VS defines its own types
+and we use the Microsoft types when building with Visual Studio. */
+#if defined(_MSC_VER)
+/** A signed 8 bit integral type. */
+typedef __int8			ib_i8_t;
+#else
+/** A signed 8 bit integral type. */
+typedef int8_t                  ib_i8_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 8 bit integral type. */
+typedef unsigned __int8		ib_u8_t;
+#else
+/** An unsigned 8 bit integral type. */
+typedef uint8_t                 ib_u8_t;
+#endif
+
+#if defined(_MSC_VER)
+/** A signed 16 bit integral type. */
+typedef __int16			ib_i16_t;
+#else
+/** A signed 16 bit integral type. */
+typedef int16_t                 ib_i16_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 16 bit integral type. */
+typedef unsigned __int16	ib_u16_t;
+#else
+/** An unsigned 16 bit integral type. */
+typedef uint16_t                ib_u16_t;
+#endif
+
+#if defined(_MSC_VER)
+/** A signed 32 bit integral type. */
+typedef __int32			ib_i32_t;
+#else
+/** A signed 32 bit integral type. */
+typedef int32_t                 ib_i32_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 32 bit integral type. */
+typedef unsigned __int32	ib_u32_t;
+#else
+/** An unsigned 32 bit integral type. */
+typedef uint32_t                ib_u32_t;
+#endif
+
+#if defined(_MSC_VER)
+/** A signed 64 bit integral type. */
+typedef __int64			ib_i64_t;
+#else
+/** A signed 64 bit integral type. */
+typedef int64_t                 ib_i64_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 64 bit integral type. */
+typedef unsigned __int64	ib_u64_t;
+#else
+/** An unsigned 64 bit integral type. */
+typedef uint64_t                ib_u64_t;
+#endif
+
+typedef void*			ib_opaque_t;
+typedef ib_opaque_t		ib_charset_t;
+typedef ib_ulint_t		ib_bool_t;
+typedef ib_u64_t		ib_id_u64_t;
+
+/** @enum ib_cfg_type_t Possible types for a configuration variable. */
+typedef enum {
+	IB_CFG_IBOOL,			/*!< The configuration parameter is
+					of type ibool */
+
+	/* XXX Can we avoid having different types for ulint and ulong?
+	- On Win64 "unsigned long" is 32 bits
+	- ulong is always defined as "unsigned long"
+	- On Win64 ulint is defined as 64 bit integer
+	=> On Win64 ulint != ulong.
+	If we typecast all ulong and ulint variables to the smaller type
+	ulong, then we will cut the range of the ulint variables.
+	This is not a problem for most ulint variables because their max
+	allowed values do not exceed 2^32-1 (e.g. log_groups is ulint
+	but its max allowed value is 10). BUT buffer_pool_size and
+	log_file_size allow up to 2^64-1. */
+
+	IB_CFG_ULINT,			/*!< The configuration parameter is
+					of type ulint */
+
+	IB_CFG_ULONG,			/*!< The configuration parameter is
+					of type ulong */
+
+	IB_CFG_TEXT,			/*!< The configuration parameter is
+					of type char* */
+
+	IB_CFG_CB			/*!< The configuration parameter is
+					a callback parameter */
+} ib_cfg_type_t;
+
+/** @enum ib_col_type_t  column types that are supported. */
+typedef enum {
+	IB_VARCHAR =	1,		/*!< Character varying length. The
+					column is not padded. */
+
+	IB_CHAR =	2,		/*!< Fixed length character string. The
+					column is padded to the right. */
+
+	IB_BINARY =	3,		/*!< Fixed length binary, similar to
+					IB_CHAR but the column is not padded
+					to the right. */
+
+	IB_VARBINARY =	4,		/*!< Variable length binary */
+
+	IB_BLOB	=	5,		/*!< Binary large object, or
+					a TEXT type */
+
+	IB_INT =	6,		/*!< Integer: can be any size
+					from 1 - 8 bytes. If the size is
+					1, 2, 4 and 8 bytes then you can use
+					the typed read and write functions. For
+					other sizes you will need to use the
+					ib_col_get_value() function and do the
+					conversion yourself. */
+
+	IB_SYS =	8,		/*!< System column, this column can
+					be one of DATA_TRX_ID, DATA_ROLL_PTR
+					or DATA_ROW_ID. */
+
+	IB_FLOAT =	9,		/*!< C (float)  floating point value. */
+
+	IB_DOUBLE =	10,		/*!> C (double) floating point value. */
+
+	IB_DECIMAL =	11,		/*!< Decimal stored as an ASCII
+					string */
+
+	IB_VARCHAR_ANYCHARSET =	12,	/*!< Any charset, varying length */
+
+	IB_CHAR_ANYCHARSET =	13	/*!< Any charset, fixed length */
+
+} ib_col_type_t;
+
+/** @enum ib_tbl_fmt_t InnoDB table format types */
+typedef enum {
+	IB_TBL_REDUNDANT,		/*!< Redundant row format, the column
+					type and length is stored in the row.*/
+
+	IB_TBL_COMPACT,			/*!< Compact row format, the column
+					type is not stored in the row. The
+					length is stored in the row but the
+					storage format uses a compact format
+					to store the length of the column data
+					and record data storage format also
+					uses less storage. */
+
+	IB_TBL_DYNAMIC,			/*!< Compact row format. BLOB prefixes
+					are not stored in the clustered index */
+
+	IB_TBL_COMPRESSED		/*!< Similar to dynamic format but
+					with pages compressed */
+} ib_tbl_fmt_t;
+
+/** @enum ib_col_attr_t InnoDB column attributes */
+typedef enum {
+	IB_COL_NONE = 0,		/*!< No special attributes. */
+
+	IB_COL_NOT_NULL = 1,		/*!< Column data can't be NULL. */
+
+	IB_COL_UNSIGNED = 2,		/*!< Column is IB_INT and unsigned. */
+
+	IB_COL_NOT_USED = 4,		/*!< Future use, reserved. */
+
+	IB_COL_CUSTOM1 = 8,		/*!< Custom precision type, this is
+					a bit that is ignored by InnoDB and so
+					can be set and queried by users. */
+
+	IB_COL_CUSTOM2 = 16,		/*!< Custom precision type, this is
+					a bit that is ignored by InnoDB and so
+					can be set and queried by users. */
+
+	IB_COL_CUSTOM3 = 32		/*!< Custom precision type, this is
+					a bit that is ignored by InnoDB and so
+					can be set and queried by users. */
+} ib_col_attr_t;
+
+/* Note: must match lock0types.h */
+/** @enum ib_lck_mode_t InnoDB lock modes. */
+typedef enum {
+	IB_LOCK_IS = 0,			/*!< Intention shared, an intention
+					lock should be used to lock tables */
+
+	IB_LOCK_IX,			/*!< Intention exclusive, an intention
+					lock should be used to lock tables */
+
+	IB_LOCK_S,			/*!< Shared locks should be used to
+					lock rows */
+
+	IB_LOCK_X,			/*!< Exclusive locks should be used to
+					lock rows*/
+
+	IB_LOCK_TABLE_X,		/*!< exclusive table lock */
+
+	IB_LOCK_NONE,			/*!< This is used internally to note
+					consistent read */
+
+	IB_LOCK_NUM = IB_LOCK_NONE	/*!< number of lock modes */
+} ib_lck_mode_t;
+
+typedef enum {
+	IB_CLUSTERED = 1,	/*!< clustered index */
+	IB_UNIQUE = 2		/*!< unique index */
+} ib_index_type_t;
+
+/** @enum ib_srch_mode_t InnoDB cursor search modes for ib_cursor_moveto().
+Note: Values must match those found in page0cur.h */
+typedef enum {
+	IB_CUR_G = 1,			/*!< If search key is not found then
+					position the cursor on the row that
+					is greater than the search key */
+
+	IB_CUR_GE = 2,			/*!< If the search key not found then
+					position the cursor on the row that
+					is greater than or equal to the search
+					key */
+
+	IB_CUR_L = 3,			/*!< If search key is not found then
+					position the cursor on the row that
+					is less than the search key */
+
+	IB_CUR_LE = 4			/*!< If search key is not found then
+					position the cursor on the row that
+					is less than or equal to the search
+					key */
+} ib_srch_mode_t;
+
+/** @enum ib_match_mode_t Various match modes used by ib_cursor_moveto() */
+typedef enum {
+	IB_CLOSEST_MATCH,		/*!< Closest match possible */
+
+	IB_EXACT_MATCH,			/*!< Search using a complete key
+					value */
+
+	IB_EXACT_PREFIX			/*!< Search using a key prefix which
+					must match to rows: the prefix may
+					contain an incomplete field (the
+					last field in prefix may be just
+					a prefix of a fixed length column) */
+} ib_match_mode_t;
+
+/** @struct ib_col_meta_t InnoDB column meta data. */
+typedef struct {
+	ib_col_type_t	type;		/*!< Type of the column */
+
+	ib_col_attr_t	attr;		/*!< Column attributes */
+
+	ib_u32_t	type_len;	/*!< Length of type */
+
+	ib_u16_t	client_type;	/*!< 16 bits of data relevant only to
+					the client. InnoDB doesn't care */
+
+	ib_charset_t*	charset;	/*!< Column charset */
+} ib_col_meta_t;
+
+/* Note: Must be in sync with trx0trx.h */
+/** @enum ib_trx_state_t The transaction state can be queried using the
+ib_trx_state() function. The InnoDB deadlock monitor can roll back a
+transaction and users should be prepared for this, especially where there
+is high contention. The way to determine the state of the transaction is to
+query it's state and check. */
+typedef enum {
+	IB_TRX_NOT_STARTED,		/*!< Has not started yet, the
+					transaction has not ben started yet.*/
+
+	IB_TRX_ACTIVE,			/*!< The transaction is currently
+					active and needs to be either
+					committed or rolled back. */
+
+	IB_TRX_COMMITTED_IN_MEMORY,	/*!< Not committed to disk yet */
+
+	IB_TRX_PREPARED			/*!< Support for 2PC/XA */
+} ib_trx_state_t;
+
+/* Note: Must be in sync with trx0trx.h */
+/** @enum ib_trx_level_t Transaction isolation levels */
+typedef enum {
+	IB_TRX_READ_UNCOMMITTED = 0,	/*!< Dirty read: non-locking SELECTs are
+					performed so that we do not look at a
+					possible earlier version of a record;
+					thus they are not 'consistent' reads
+					under this isolation level; otherwise
+					like level 2 */
+
+	IB_TRX_READ_COMMITTED = 1,	/*!< Somewhat Oracle-like isolation,
+					except that in range UPDATE and DELETE
+					we must block phantom rows with
+					next-key locks; SELECT ... FOR UPDATE
+					and ...  LOCK IN SHARE MODE only lock
+					the index records, NOT the gaps before
+					them, and thus allow free inserting;
+					each consistent read reads its own
+					snapshot */
+
+	IB_TRX_REPEATABLE_READ = 2,	/*!< All consistent reads in the same
+					trx read the same snapshot; full
+					next-key locking used in locking reads
+					to block insertions into gaps */
+
+	IB_TRX_SERIALIZABLE = 3		/*!< All plain SELECTs are converted to
+					LOCK IN SHARE MODE reads */
+} ib_trx_level_t;
+
+/** Generical InnoDB callback prototype. */
+typedef void (*ib_cb_t)(void);
+
+#define IB_CFG_BINLOG_ENABLED	0x1
+#define IB_CFG_MDL_ENABLED	0x2
+#define IB_CFG_DISABLE_ROWLOCK	0x4
+
+/** The first argument to the InnoDB message logging function. By default
+it's set to stderr. You should treat ib_msg_stream_t as a void*, since
+it will probably change in the future. */
+typedef FILE* ib_msg_stream_t;
+
+/** All log messages are written to this function.It should have the same
+behavior as fprintf(3). */
+typedef int (*ib_msg_log_t)(ib_msg_stream_t, const char*, ...);
+
+/* Note: This is to make it easy for API users to have type
+checking for arguments to our functions. Making it ib_opaque_t
+by itself will result in pointer decay resulting in subverting
+of the compiler's type checking. */
+
+/** InnoDB tuple handle. This handle can refer to either a cluster index
+tuple or a secondary index tuple. There are two types of tuples for each
+type of index, making a total of four types of tuple handles. There
+is a tuple for reading the entire row contents and another for searching
+on the index key. */
+typedef struct ib_tuple_t* ib_tpl_t;
+
+/** InnoDB transaction handle, all database operations need to be covered
+by transactions. This handle represents a transaction. The handle can be
+created with ib_trx_begin(), you commit your changes with ib_trx_commit()
+and undo your changes using ib_trx_rollback(). If the InnoDB deadlock
+monitor rolls back the transaction then you need to free the transaction
+using the function ib_trx_release(). You can query the state of an InnoDB
+transaction by calling ib_trx_state(). */
+typedef struct trx_t* ib_trx_t;
+
+/** InnoDB cursor handle */
+typedef struct ib_cursor_t* ib_crsr_t;
+
+/*************************************************************//**
+This function is used to compare two data fields for which the data type
+is such that we must use the client code to compare them.
+
+@param col_meta		column meta data
+@param p1		key
+@oaram p1_len		key length
+@param p2		second key
+@param p2_len		second key length
+@return 1, 0, -1, if a is greater, equal, less than b, respectively */
+
+typedef int (*ib_client_cmp_t)(
+	const ib_col_meta_t*	col_meta,
+	const ib_byte_t*	p1,
+	ib_ulint_t		p1_len,
+	const ib_byte_t*	p2,
+	ib_ulint_t		p2_len);
+
+/* This should be the same as univ.i */
+/** Represents SQL_NULL length */
+#define	IB_SQL_NULL		0xFFFFFFFF
+/** The number of system columns in a row. */
+#define IB_N_SYS_COLS		3
+
+/** The maximum length of a text column. */
+#define MAX_TEXT_LEN		4096
+
+/* MySQL uses 3 byte UTF-8 encoding. */
+/** The maximum length of a column name in a table schema. */
+#define IB_MAX_COL_NAME_LEN	(64 * 3)
+
+/** The maximum length of a table name (plus database name). */
+#define IB_MAX_TABLE_NAME_LEN	(64 * 3) * 2
+
+/*****************************************************************//**
+Start a transaction that's been rolled back. This special function
+exists for the case when InnoDB's deadlock detector has rolledack
+a transaction. While the transaction has been rolled back the handle
+is still valid and can be reused by calling this function. If you
+don't want to reuse the transaction handle then you can free the handle
+by calling ib_trx_release().
+@return	innobase txn handle */
+
+ib_err_t
+ib_trx_start(
+/*=========*/
+	ib_trx_t	ib_trx,		/*!< in: transaction to restart */
+	ib_trx_level_t	ib_trx_level,	/*!< in: trx isolation level */
+	void*		thd);		/*!< in: THD */
+
+/*****************************************************************//**
+Begin a transaction. This will allocate a new transaction handle and
+put the transaction in the active state.
+@return	innobase txn handle */
+
+ib_trx_t
+ib_trx_begin(
+/*=========*/
+	ib_trx_level_t	ib_trx_level);	/*!< in: trx isolation level */
+
+/*****************************************************************//**
+Query the transaction's state. This function can be used to check for
+the state of the transaction in case it has been rolled back by the
+InnoDB deadlock detector. Note that when a transaction is selected as
+a victim for rollback, InnoDB will always return an appropriate error
+code indicating this. @see DB_DEADLOCK, @see DB_LOCK_TABLE_FULL and
+@see DB_LOCK_WAIT_TIMEOUT
+@return	transaction state */
+
+ib_trx_state_t
+ib_trx_state(
+/*=========*/
+	ib_trx_t	ib_trx);	/*!< in: trx handle */
+
+/*****************************************************************//**
+Release the resources of the transaction. If the transaction was
+selected as a victim by InnoDB and rolled back then use this function
+to free the transaction handle.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_trx_release(
+/*===========*/
+	ib_trx_t	ib_trx);	/*!< in: trx handle */
+
+/*****************************************************************//**
+Commit a transaction. This function will release the schema latches too.
+It will also free the transaction handle.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_trx_commit(
+/*==========*/
+	ib_trx_t	ib_trx);	/*!< in: trx handle */
+
+/*****************************************************************//**
+Rollback a transaction. This function will release the schema latches too.
+It will also free the transaction handle.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_trx_rollback(
+/*============*/
+	ib_trx_t	ib_trx);	/*!< in: trx handle */
+
+/*****************************************************************//**
+Open an InnoDB table and return a cursor handle to it.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_table_using_id(
+/*==========================*/
+	ib_id_u64_t	table_id,	/*!< in: table id of table to open */
+	ib_trx_t	ib_trx,		/*!< in: Current transaction handle
+					can be NULL */
+	ib_crsr_t*	ib_crsr);	/*!< out,own: InnoDB cursor */
+
+/*****************************************************************//**
+Open an InnoDB index and return a cursor handle to it.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_index_using_id(
+/*==========================*/
+	ib_id_u64_t	index_id,	/*!< in: index id of index to open */
+	ib_trx_t	ib_trx,		/*!< in: Current transaction handle
+					can be NULL */
+	ib_crsr_t*	ib_crsr);	/*!< out: InnoDB cursor */
+
+/*****************************************************************//**
+Open an InnoDB secondary index cursor and return a cursor handle to it.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_index_using_name(
+/*============================*/
+	ib_crsr_t	ib_open_crsr,	/*!< in: open/active cursor */
+	const char*	index_name,	/*!< in: secondary index name */
+	ib_crsr_t*	ib_crsr,	/*!< out,own: InnoDB index cursor */
+	int*		idx_type,	/*!< out: index is cluster index */
+	ib_id_u64_t*	idx_id);	/*!< out: index id */
+
+/*****************************************************************//**
+Open an InnoDB table by name and return a cursor handle to it.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_table(
+/*=================*/
+	const char*	name,		/*!< in: table name */
+	ib_trx_t	ib_trx,		/*!< in: Current transaction handle
+					can be NULL */
+	ib_crsr_t*	ib_crsr);	/*!< out,own: InnoDB cursor */
+
+/*****************************************************************//**
+Reset the cursor.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_reset(
+/*============*/
+	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
+
+
+/*****************************************************************//**
+set a cursor trx to NULL*/
+
+void
+ib_cursor_clear_trx(
+/*================*/
+	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+Close an InnoDB table and free the cursor.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_close(
+/*============*/
+	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+Close the table, decrement n_ref_count count.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_close_table(
+/*==================*/
+	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+update the cursor with new transactions and also reset the cursor
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_new_trx(
+/*==============*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor */
+	ib_trx_t	ib_trx);	/*!< in: transaction */
+
+/*****************************************************************//**
+Commit the transaction in a cursor
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_commit_trx(
+/*=================*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor */
+	ib_trx_t	ib_trx);	/*!< in: transaction */
+
+/********************************************************************//**
+Open a table using the table name, if found then increment table ref count.
+@return table instance if found */
+
+void*
+ib_open_table_by_name(
+/*==================*/
+	const char*	name);		/*!< in: table name to lookup */
+
+/*****************************************************************//**
+Insert a row to a table.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_insert_row(
+/*=================*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor instance */
+	const ib_tpl_t	ib_tpl);	/*!< in: tuple to insert */
+
+/*****************************************************************//**
+Update a row in a table.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_update_row(
+/*=================*/
+	ib_crsr_t	ib_crsr,	/*!< in: InnoDB cursor instance */
+	const ib_tpl_t	ib_old_tpl,	/*!< in: Old tuple in table */
+	const ib_tpl_t	ib_new_tpl);	/*!< in: New tuple to update */
+
+/*****************************************************************//**
+Delete a row in a table.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_delete_row(
+/*=================*/
+	ib_crsr_t	ib_crsr);	/*!< in: cursor instance */
+
+/*****************************************************************//**
+Read current row.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_read_row(
+/*===============*/
+	ib_crsr_t	ib_crsr,	/*!< in: InnoDB cursor instance */
+	ib_tpl_t	ib_tpl);	/*!< out: read cols into this tuple */
+
+/*****************************************************************//**
+Move cursor to the first record in the table.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_first(
+/*============*/
+	ib_crsr_t	ib_crsr);	/*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Move cursor to the last record in the table.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_last(
+/*===========*/
+	ib_crsr_t	ib_crsr);	/*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Move cursor to the next record in the table.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_next(
+/*===========*/
+	ib_crsr_t	ib_crsr);	/*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Search for key.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_moveto(
+/*=============*/
+	ib_crsr_t	ib_crsr,	/*!< in: InnoDB cursor instance */
+	ib_tpl_t	ib_tpl,		/*!< in: Key to search for */
+	ib_srch_mode_t	ib_srch_mode);	/*!< in: search mode */
+
+/*****************************************************************//**
+Set the match mode for ib_cursor_move(). */
+
+void
+ib_cursor_set_match_mode(
+/*=====================*/
+	ib_crsr_t	ib_crsr,	/*!< in: Cursor instance */
+	ib_match_mode_t	match_mode);	/*!< in: ib_cursor_moveto match mode */
+
+/*****************************************************************//**
+Set a column of the tuple. Make a copy using the tuple's heap.
+@return	DB_SUCCESS or error code */
+
+ib_err_t
+ib_col_set_value(
+/*=============*/
+	ib_tpl_t	ib_tpl,		/*!< in: tuple instance */
+	ib_ulint_t	col_no,		/*!< in: column index in tuple */
+	const void*	src,		/*!< in: data value */
+	ib_ulint_t	len);		/*!< in: data value len */
+
+/*****************************************************************//**
+Get the size of the data available in the column the tuple.
+@return	bytes avail or IB_SQL_NULL */
+
+ib_ulint_t
+ib_col_get_len(
+/*===========*/
+	ib_tpl_t	ib_tpl,		/*!< in: tuple instance */
+	ib_ulint_t	i);		/*!< in: column index in tuple */
+
+/*****************************************************************//**
+Copy a column value from the tuple.
+@return	bytes copied or IB_SQL_NULL */
+
+ib_ulint_t
+ib_col_copy_value(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: tuple instance */
+	ib_ulint_t	i,		/*!< in: column index in tuple */
+	void*		dst,		/*!< out: copied data value */
+	ib_ulint_t	len);		/*!< in: max data value len to copy */
+
+/*************************************************************//**
+Read a signed int 8 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i8(
+/*=============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_i8_t*	ival);		/*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 8 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u8(
+/*=============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_u8_t*	ival);		/*!< out: integer value */
+
+/*************************************************************//**
+Read a signed int 16 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i16(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_i16_t*	ival);		/*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 16 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u16(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_u16_t*	ival);		/*!< out: integer value */
+
+/*************************************************************//**
+Read a signed int 32 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i32(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_i32_t*	ival);		/*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 32 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u32(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_u32_t*	ival);		/*!< out: integer value */
+
+/*************************************************************//**
+Read a signed int 64 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i64(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_i64_t*	ival);		/*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 64 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u64(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_u64_t*	ival);		/*!< out: integer value */
+
+/*****************************************************************//**
+Get a column value pointer from the tuple.
+@return	NULL or pointer to buffer */
+
+const void*
+ib_col_get_value(
+/*=============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i);		/*!< in: column number */
+
+/*****************************************************************//**
+Get a column type, length and attributes from the tuple.
+@return	len of column data */
+
+ib_ulint_t
+ib_col_get_meta(
+/*============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_col_meta_t*	ib_col_meta);	/*!< out: column meta data */
+
+/*****************************************************************//**
+"Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple.
+@return	new tuple, or NULL */
+
+ib_tpl_t
+ib_tuple_clear(
+/*============*/
+	ib_tpl_t	ib_tpl);	/*!< in: InnoDB tuple */
+
+/*****************************************************************//**
+Create a new cluster key search tuple and copy the contents of  the
+secondary index key tuple columns that refer to the cluster index record
+to the cluster key. It does a deep copy of the column data.
+@return	DB_SUCCESS or error code */
+
+ib_err_t
+ib_tuple_get_cluster_key(
+/*=====================*/
+	ib_crsr_t	ib_crsr,	/*!< in: secondary index cursor */
+	ib_tpl_t*	ib_dst_tpl,	/*!< out,own: destination tuple */
+	const ib_tpl_t	ib_src_tpl);	/*!< in: source tuple */
+
+/*****************************************************************//**
+Copy the contents of  source tuple to destination tuple. The tuples
+must be of the same type and belong to the same table/index.
+@return	DB_SUCCESS or error code */
+
+ib_err_t
+ib_tuple_copy(
+/*==========*/
+	ib_tpl_t	ib_dst_tpl,	/*!< in: destination tuple */
+	const ib_tpl_t	ib_src_tpl);	/*!< in: source tuple */
+
+/*****************************************************************//**
+Create an InnoDB tuple used for index/table search.
+@return tuple for current index */
+
+ib_tpl_t
+ib_sec_search_tuple_create(
+/*=======================*/
+	ib_crsr_t	ib_crsr);	/*!< in: Cursor instance */
+
+/*****************************************************************//**
+Create an InnoDB tuple used for index/table search.
+@return	tuple for current index */
+
+ib_tpl_t
+ib_sec_read_tuple_create(
+/*=====================*/
+	ib_crsr_t	ib_crsr);	/*!< in: Cursor instance */
+
+/*****************************************************************//**
+Create an InnoDB tuple used for table key operations.
+@return	tuple for current table */
+
+ib_tpl_t
+ib_clust_search_tuple_create(
+/*=========================*/
+	ib_crsr_t	ib_crsr);	/*!< in: Cursor instance */
+
+/*****************************************************************//**
+Create an InnoDB tuple for table row operations.
+@return	tuple for current table */
+
+ib_tpl_t
+ib_clust_read_tuple_create(
+/*=======================*/
+	ib_crsr_t	ib_crsr);	/*!< in: Cursor instance */
+
+/*****************************************************************//**
+Return the number of user columns in the tuple definition.
+@return	number of user columns */
+
+ib_ulint_t
+ib_tuple_get_n_user_cols(
+/*=====================*/
+	const ib_tpl_t	ib_tpl);	/*!< in: Tuple for current table */
+
+/*****************************************************************//**
+Return the number of columns in the tuple definition.
+@return	number of columns */
+
+ib_ulint_t
+ib_tuple_get_n_cols(
+/*================*/
+	const ib_tpl_t	ib_tpl);	/*!< in: Tuple for current table */
+
+/*****************************************************************//**
+Destroy an InnoDB tuple. */
+
+void
+ib_tuple_delete(
+/*============*/
+	ib_tpl_t	ib_tpl);	/*!< in,own: Tuple instance to delete */
+
+/*****************************************************************//**
+Truncate a table. The cursor handle will be closed and set to NULL
+on success.
+@return	DB_SUCCESS or error code */
+
+ib_err_t
+ib_cursor_truncate(
+/*===============*/
+	ib_crsr_t*	ib_crsr,	/*!< in/out: cursor for table
+					to truncate */
+	ib_id_u64_t*	table_id);	/*!< out: new table id */
+
+/*****************************************************************//**
+Get a table id.
+@return	DB_SUCCESS if found */
+
+ib_err_t
+ib_table_get_id(
+/*============*/
+	const char*	table_name,	/*!< in: table to find */
+	ib_id_u64_t*	table_id);	/*!< out: table id if found */
+
+/*****************************************************************//**
+Get an index id.
+@return	DB_SUCCESS if found */
+
+ib_err_t
+ib_index_get_id(
+/*============*/
+	const char*	table_name,	/*!< in: find index for this table */
+	const char*	index_name,	/*!< in: index to find */
+	ib_id_u64_t*	index_id);	/*!< out: index id if found */
+
+/*****************************************************************//**
+Check if cursor is positioned.
+@return	IB_TRUE if positioned */
+
+ib_bool_t
+ib_cursor_is_positioned(
+/*====================*/
+	const ib_crsr_t	ib_crsr);	/*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Checks if the data dictionary is latched in exclusive mode by a
+user transaction.
+@return TRUE if exclusive latch */
+
+ib_bool_t
+ib_schema_lock_is_exclusive(
+/*========================*/
+	const ib_trx_t	ib_trx);	/*!< in: transaction */
+
+/*****************************************************************//**
+Lock an InnoDB cursor/table.
+@return	DB_SUCCESS or error code */
+
+ib_err_t
+ib_cursor_lock(
+/*===========*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor */
+	ib_lck_mode_t	ib_lck_mode);	/*!< in: InnoDB lock mode */
+
+/*****************************************************************//**
+Set the Lock an InnoDB table using the table id.
+@return	DB_SUCCESS or error code */
+
+ib_err_t
+ib_table_lock(
+/*===========*/
+	ib_trx_t	ib_trx,		/*!< in/out: transaction */
+	ib_id_u64_t	table_id,	/*!< in: table id */
+	ib_lck_mode_t	ib_lck_mode);	/*!< in: InnoDB lock mode */
+
+/*****************************************************************//**
+Set the Lock mode of the cursor.
+@return	DB_SUCCESS or error code */
+
+ib_err_t
+ib_cursor_set_lock_mode(
+/*====================*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor */
+	ib_lck_mode_t	ib_lck_mode);	/*!< in: InnoDB lock mode */
+
+/*****************************************************************//**
+Set need to access clustered index record flag. */
+
+void
+ib_cursor_set_cluster_access(
+/*=========================*/
+	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i8(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_i8_t		val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i16(
+/*=================*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_i16_t	val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i32(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_i32_t	val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i64(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_i64_t	val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u8(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_u8_t		val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u16(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_u16_t	val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u32(
+/*=================*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_u32_t	val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u64(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_u64_t	val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Inform the cursor that it's the start of an SQL statement. */
+
+void
+ib_cursor_stmt_begin(
+/*=================*/
+	ib_crsr_t	ib_crsr);	/*!< in: cursor */
+
+/*****************************************************************//**
+Write a double value to a column.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_write_double(
+/*==================*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	int		col_no,		/*!< in: column number */
+	double		val);		/*!< in: value to write */
+
+/*************************************************************//**
+Read a double column value from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_double(
+/*=================*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	col_no,		/*!< in: column number */
+	double*		dval);		/*!< out: double value */
+
+/*****************************************************************//**
+Write a float value to a column.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_write_float(
+/*=================*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	float		val);		/*!< in: value to write */
+
+/*************************************************************//**
+Read a float value from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_float(
+/*================*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	col_no,		/*!< in: column number */
+	float*		fval);		/*!< out: float value */
+
+/*****************************************************************//**
+Get a column type, length and attributes from the tuple.
+@return len of column data */
+
+const char*
+ib_col_get_name(
+/*============*/
+	ib_crsr_t	ib_crsr,	/*!< in: InnoDB cursor instance */
+	ib_ulint_t	i);		/*!< in: column index in tuple */
+
+/*****************************************************************//**
+Get an index field name from the cursor.
+@return name of the field */
+
+const char*
+ib_get_idx_field_name(
+/*==================*/
+	ib_crsr_t	ib_crsr,	/*!< in: InnoDB cursor instance */
+	ib_ulint_t	i);		/*!< in: column index in tuple */
+
+/*****************************************************************//**
+Truncate a table.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_table_truncate(
+/*==============*/
+	const char*	table_name,	/*!< in: table name */
+	ib_id_u64_t*	table_id);	/*!< out: new table id */
+
+/*****************************************************************//**
+Frees a possible InnoDB trx object associated with the current THD.
+@return DB_SUCCESS or error number */
+
+ib_err_t
+ib_close_thd(
+/*=========*/
+	void*		thd);		/*!< in: handle to the MySQL
+					thread of the user whose resources
+					should be free'd */
+
+/*****************************************************************//**
+Get generic configure status
+@return configure status*/
+
+int
+ib_cfg_get_cfg();
+/*============*/
+
+/*****************************************************************//**
+Check whether the table name conforms to our requirements. Currently
+we only do a simple check for the presence of a '/'.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_table_name_check(
+/*================*/
+	const char*	name);		/*!< in: table name to check */
+
+/*****************************************************************//**
+Return isolation configuration set by "innodb_api_trx_level"
+@return trx isolation level*/
+
+ib_trx_state_t
+ib_cfg_trx_level();
+/*==============*/
+
+/*****************************************************************//**
+Return configure value for background commit interval (in seconds)
+@return background commit interval (in seconds) */
+
+ib_ulint_t
+ib_cfg_bk_commit_interval();
+/*=======================*/
+
+/*****************************************************************//**
+Get a trx start time.
+@return trx start_time */
+
+ib_u64_t
+ib_trx_get_start_time(
+/*==================*/
+	ib_trx_t	ib_trx);	/*!< in: transaction */
+
+#endif /* api0api_h */
diff --git a/storage/innobase/include/api0misc.h b/storage/innobase/include/api0misc.h
new file mode 100644
index 00000000000..fcd748390d1
--- /dev/null
+++ b/storage/innobase/include/api0misc.h
@@ -0,0 +1,78 @@
+/*****************************************************************************
+
+Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/api0misc.h
+InnoDB Native API
+
+3/20/2011 Jimmy Yang extracted from Embedded InnoDB
+2008 Created by Sunny Bains
+*******************************************************/
+
+#ifndef api0misc_h
+#define	api0misc_h
+
+#include "univ.i"
+#include "os0file.h"
+#include "que0que.h"
+#include "trx0trx.h"
+
+/** Whether binlog is enabled for applications using InnoDB APIs */
+extern my_bool                  ib_binlog_enabled;
+
+/** Whether MySQL MDL is enabled for applications using InnoDB APIs */
+extern my_bool                  ib_mdl_enabled;
+
+/** Whether InnoDB row lock is disabled for applications using InnoDB APIs */
+extern my_bool                  ib_disable_row_lock;
+
+/** configure value for transaction isolation level */
+extern ulong			ib_trx_level_setting;
+
+/** configure value for background commit interval (in seconds) */
+extern ulong			ib_bk_commit_interval;
+
+/********************************************************************
+Handles user errors and lock waits detected by the database engine.
+@return	TRUE if it was a lock wait and we should continue running
+the query thread */
+UNIV_INTERN
+ibool
+ib_handle_errors(
+/*=============*/
+	dberr_t*	new_err,	/*!< out: possible new error
+					encountered in lock wait, or if
+					no new error, the value of
+					trx->error_state at the entry of this
+					function */
+	trx_t*		trx,		/*!< in: transaction */
+	que_thr_t*	thr,		/*!< in: query thread */
+	trx_savept_t*	savept);	/*!< in: savepoint or NULL */
+
+/*************************************************************************
+Sets a lock on a table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+ib_trx_lock_table_with_retry(
+/*=========================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	dict_table_t*	table,		/*!< in: table to lock */
+	enum lock_mode	mode);		/*!< in: lock mode */
+
+#endif /* api0misc_h */
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index 5592995d4b2..b99b0c0cd7b 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -92,6 +93,17 @@ insert/delete buffer when the record is not in the buffer pool. */
 buffer when the record is not in the buffer pool. */
 #define BTR_DELETE		8192
 
+/** In the case of BTR_SEARCH_LEAF or BTR_MODIFY_LEAF, the caller is
+already holding an S latch on the index tree */
+#define BTR_ALREADY_S_LATCHED	16384
+
+#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode)	\
+	((latch_mode) & ~(BTR_INSERT			\
+			  | BTR_DELETE_MARK		\
+			  | BTR_DELETE			\
+			  | BTR_ESTIMATE		\
+			  | BTR_IGNORE_SEC_UNIQUE	\
+			  | BTR_ALREADY_S_LATCHED))
 #endif /* UNIV_HOTBACKUP */
 
 /**************************************************************//**
@@ -118,7 +130,7 @@ btr_corruption_report(
 #ifdef UNIV_BLOB_DEBUG
 # include "ut0rbt.h"
 /** An index->blobs entry for keeping track of off-page column references */
-struct btr_blob_dbg_struct
+struct btr_blob_dbg_t
 {
 	unsigned	blob_page_no:32;	/*!< first BLOB page number */
 	unsigned	ref_page_no:32;		/*!< referring page number */
@@ -207,8 +219,32 @@ UNIV_INTERN
 page_t*
 btr_root_get(
 /*=========*/
+	const dict_index_t*	index,	/*!< in: index tree */
+	mtr_t*			mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
+
+/**************************************************************//**
+Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
+@return error code, or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+btr_root_adjust_on_import(
+/*======================*/
+	const dict_index_t*	index)	/*!< in: index tree */
+	__attribute__((nonnull, warn_unused_result));
+
+/**************************************************************//**
+Gets the height of the B-tree (the level of the root, when the leaf
+level is assumed to be 0). The caller must hold an S or X latch on
+the index.
+@return	tree height (level of the root) */
+UNIV_INTERN
+ulint
+btr_height_get(
+/*===========*/
 	dict_index_t*	index,	/*!< in: index tree */
-	mtr_t*		mtr);	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull, warn_unused_result));
 /**************************************************************//**
 Gets a buffer page and declares its latching order level. */
 UNIV_INLINE
@@ -269,7 +305,8 @@ UNIV_INLINE
 index_id_t
 btr_page_get_index_id(
 /*==================*/
-	const page_t*	page);	/*!< in: index page */
+	const page_t*	page)	/*!< in: index page */
+	__attribute__((nonnull, pure, warn_unused_result));
 #ifndef UNIV_HOTBACKUP
 /********************************************************//**
 Gets the node level field in an index page.
@@ -278,16 +315,9 @@ UNIV_INLINE
 ulint
 btr_page_get_level_low(
 /*===================*/
-	const page_t*	page);	/*!< in: index page */
-/********************************************************//**
-Gets the node level field in an index page.
-@return	level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level(
-/*===============*/
-	const page_t*	page,	/*!< in: index page */
-	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+	const page_t*	page)	/*!< in: index page */
+	__attribute__((nonnull, pure, warn_unused_result));
+#define btr_page_get_level(page, mtr) btr_page_get_level_low(page)
 /********************************************************//**
 Gets the next index page number.
 @return	next page number */
@@ -296,7 +326,8 @@ ulint
 btr_page_get_next(
 /*==============*/
 	const page_t*	page,	/*!< in: index page */
-	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************//**
 Gets the previous index page number.
 @return	prev page number */
@@ -305,7 +336,8 @@ ulint
 btr_page_get_prev(
 /*==============*/
 	const page_t*	page,	/*!< in: index page */
-	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+	__attribute__((nonnull, warn_unused_result));
 /*************************************************************//**
 Gets pointer to the previous user record in the tree. It is assumed
 that the caller has appropriate latches on the page and its neighbor.
@@ -315,8 +347,9 @@ rec_t*
 btr_get_prev_user_rec(
 /*==================*/
 	rec_t*	rec,	/*!< in: record on leaf level */
-	mtr_t*	mtr);	/*!< in: mtr holding a latch on the page, and if
+	mtr_t*	mtr)	/*!< in: mtr holding a latch on the page, and if
 			needed, also to the previous page */
+	__attribute__((nonnull, warn_unused_result));
 /*************************************************************//**
 Gets pointer to the next user record in the tree. It is assumed
 that the caller has appropriate latches on the page and its neighbor.
@@ -326,8 +359,9 @@ rec_t*
 btr_get_next_user_rec(
 /*==================*/
 	rec_t*	rec,	/*!< in: record on leaf level */
-	mtr_t*	mtr);	/*!< in: mtr holding a latch on the page, and if
+	mtr_t*	mtr)	/*!< in: mtr holding a latch on the page, and if
 			needed, also to the next page */
+	__attribute__((nonnull, warn_unused_result));
 /**************************************************************//**
 Releases the latch on a leaf page and bufferunfixes it. */
 UNIV_INLINE
@@ -337,7 +371,8 @@ btr_leaf_page_release(
 	buf_block_t*	block,		/*!< in: buffer block */
 	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF or
 					BTR_MODIFY_LEAF */
-	mtr_t*		mtr);		/*!< in: mtr */
+	mtr_t*		mtr)		/*!< in: mtr */
+	__attribute__((nonnull));
 /**************************************************************//**
 Gets the child node file address in a node pointer.
 NOTE: the offsets array must contain all offsets for the record since
@@ -350,7 +385,8 @@ ulint
 btr_node_ptr_get_child_page_no(
 /*===========================*/
 	const rec_t*	rec,	/*!< in: node pointer record */
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
 /************************************************************//**
 Creates the root node for a new index tree.
 @return	page number of the created root, FIL_NULL if did not succeed */
@@ -364,7 +400,8 @@ btr_create(
 				or 0 for uncompressed pages */
 	index_id_t	index_id,/*!< in: index id */
 	dict_index_t*	index,	/*!< in: index */
-	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+	__attribute__((nonnull));
 /************************************************************//**
 Frees a B-tree except the root page, which MUST be freed after this
 by calling btr_free_root. */
@@ -386,7 +423,8 @@ btr_free_root(
 	ulint	zip_size,	/*!< in: compressed page size in bytes
 				or 0 for uncompressed pages */
 	ulint	root_page_no,	/*!< in: root page number */
-	mtr_t*	mtr);		/*!< in/out: mini-transaction */
+	mtr_t*	mtr)		/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
 /*************************************************************//**
 Makes tree one level higher by splitting the root, and inserts
 the tuple. It is assumed that mtr contains an x-latch on the tree.
@@ -398,13 +436,18 @@ UNIV_INTERN
 rec_t*
 btr_root_raise_and_insert(
 /*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
 	btr_cur_t*	cursor,	/*!< in: cursor at which to insert: must be
 				on the root page; when the function returns,
 				the cursor is positioned on the predecessor
 				of the inserted record */
+	ulint**		offsets,/*!< out: offsets on inserted record */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap
+				that can be emptied, or NULL */
 	const dtuple_t*	tuple,	/*!< in: tuple to insert */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	mtr_t*		mtr);	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull, warn_unused_result));
 /*************************************************************//**
 Reorganizes an index page.
 IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
@@ -418,7 +461,8 @@ btr_page_reorganize(
 /*================*/
 	buf_block_t*	block,	/*!< in: page to be reorganized */
 	dict_index_t*	index,	/*!< in: record descriptor */
-	mtr_t*		mtr);	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
 /*************************************************************//**
 Decides if the page should be split at the convergence point of
 inserts converging to left.
@@ -428,9 +472,10 @@ ibool
 btr_page_get_split_rec_to_left(
 /*===========================*/
 	btr_cur_t*	cursor,	/*!< in: cursor at which to insert */
-	rec_t**		split_rec);/*!< out: if split recommended,
+	rec_t**		split_rec)/*!< out: if split recommended,
 				the first record on upper half page,
 				or NULL if tuple should be first */
+	__attribute__((nonnull, warn_unused_result));
 /*************************************************************//**
 Decides if the page should be split at the convergence point of
 inserts converging to right.
@@ -440,9 +485,10 @@ ibool
 btr_page_get_split_rec_to_right(
 /*============================*/
 	btr_cur_t*	cursor,	/*!< in: cursor at which to insert */
-	rec_t**		split_rec);/*!< out: if split recommended,
+	rec_t**		split_rec)/*!< out: if split recommended,
 				the first record on upper half page,
 				or NULL if tuple should be first */
+	__attribute__((nonnull, warn_unused_result));
 /*************************************************************//**
 Splits an index page to halves and inserts the tuple. It is assumed
 that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
@@ -456,12 +502,17 @@ UNIV_INTERN
 rec_t*
 btr_page_split_and_insert(
 /*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
 	btr_cur_t*	cursor,	/*!< in: cursor at which to insert; when the
 				function returns, the cursor is positioned
 				on the predecessor of the inserted record */
+	ulint**		offsets,/*!< out: offsets on inserted record */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap
+				that can be emptied, or NULL */
 	const dtuple_t*	tuple,	/*!< in: tuple to insert */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	mtr_t*		mtr);	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull, warn_unused_result));
 /*******************************************************//**
 Inserts a data tuple to a tree on a non-leaf level. It is assumed
 that mtr holds an x-latch on the tree. */
@@ -469,14 +520,16 @@ UNIV_INTERN
 void
 btr_insert_on_non_leaf_level_func(
 /*==============================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
 	dict_index_t*	index,	/*!< in: index */
 	ulint		level,	/*!< in: level, must be > 0 */
 	dtuple_t*	tuple,	/*!< in: the record to be inserted */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr);	/*!< in: mtr */
-# define btr_insert_on_non_leaf_level(i,l,t,m)				\
-	btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
+# define btr_insert_on_non_leaf_level(f,i,l,t,m)			\
+	btr_insert_on_non_leaf_level_func(f,i,l,t,__FILE__,__LINE__,m)
 #endif /* !UNIV_HOTBACKUP */
 /****************************************************************//**
 Sets a record as the predefined minimum record. */
@@ -485,7 +538,8 @@ void
 btr_set_min_rec_mark(
 /*=================*/
 	rec_t*	rec,	/*!< in/out: record */
-	mtr_t*	mtr);	/*!< in: mtr */
+	mtr_t*	mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
 #ifndef UNIV_HOTBACKUP
 /*************************************************************//**
 Deletes on the upper level the node pointer to a page. */
@@ -495,7 +549,8 @@ btr_node_ptr_delete(
 /*================*/
 	dict_index_t*	index,	/*!< in: index tree */
 	buf_block_t*	block,	/*!< in: page whose node pointer is deleted */
-	mtr_t*		mtr);	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
 #ifdef UNIV_DEBUG
 /************************************************************//**
 Checks that the node pointer to a page is appropriate.
@@ -506,7 +561,8 @@ btr_check_node_ptr(
 /*===============*/
 	dict_index_t*	index,	/*!< in: index tree */
 	buf_block_t*	block,	/*!< in: index page */
-	mtr_t*		mtr);	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull, warn_unused_result));
 #endif /* UNIV_DEBUG */
 /*************************************************************//**
 Tries to merge the page first to the left immediate brother if such a
@@ -540,7 +596,8 @@ btr_discard_page(
 /*=============*/
 	btr_cur_t*	cursor,	/*!< in: cursor on the page to discard: not on
 				the root page */
-	mtr_t*		mtr);	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
 #endif /* !UNIV_HOTBACKUP */
 /****************************************************************//**
 Parses the redo log record for setting an index record as the predefined
@@ -554,7 +611,8 @@ btr_parse_set_min_rec_mark(
 	byte*	end_ptr,/*!< in: buffer end */
 	ulint	comp,	/*!< in: nonzero=compact page format */
 	page_t*	page,	/*!< in: page or NULL */
-	mtr_t*	mtr);	/*!< in: mtr or NULL */
+	mtr_t*	mtr)	/*!< in: mtr or NULL */
+	__attribute__((nonnull(1,2), warn_unused_result));
 /***********************************************************//**
 Parses a redo log record of reorganizing a page.
 @return	end of log record or NULL */
@@ -565,8 +623,10 @@ btr_parse_page_reorganize(
 	byte*		ptr,	/*!< in: buffer */
 	byte*		end_ptr,/*!< in: buffer end */
 	dict_index_t*	index,	/*!< in: record descriptor */
+	bool		compressed,/*!< in: true if compressed page */
 	buf_block_t*	block,	/*!< in: page to be reorganized, or NULL */
-	mtr_t*		mtr);	/*!< in: mtr or NULL */
+	mtr_t*		mtr)	/*!< in: mtr or NULL */
+	__attribute__((nonnull(1,2,3), warn_unused_result));
 #ifndef UNIV_HOTBACKUP
 /**************************************************************//**
 Gets the number of pages in a B-tree.
@@ -612,7 +672,8 @@ btr_page_free(
 /*==========*/
 	dict_index_t*	index,	/*!< in: index tree */
 	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
-	mtr_t*		mtr);	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
 /**************************************************************//**
 Frees a file page used in an index tree. Can be used also to BLOB
 external storage pages, because the page level 0 can be given as an
@@ -624,7 +685,8 @@ btr_page_free_low(
 	dict_index_t*	index,	/*!< in: index tree */
 	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
 	ulint		level,	/*!< in: page level */
-	mtr_t*		mtr);	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
 #ifdef UNIV_BTR_PRINT
 /*************************************************************//**
 Prints size info of a B-tree. */
@@ -632,7 +694,8 @@ UNIV_INTERN
 void
 btr_print_size(
 /*===========*/
-	dict_index_t*	index);	/*!< in: index tree */
+	dict_index_t*	index)	/*!< in: index tree */
+	__attribute__((nonnull));
 /**************************************************************//**
 Prints directories and other info of all nodes in the index. */
 UNIV_INTERN
@@ -640,8 +703,9 @@ void
 btr_print_index(
 /*============*/
 	dict_index_t*	index,	/*!< in: index */
-	ulint		width);	/*!< in: print this many entries from start
+	ulint		width)	/*!< in: print this many entries from start
 				and end */
+	__attribute__((nonnull));
 #endif /* UNIV_BTR_PRINT */
 /************************************************************//**
 Checks the size and number of fields in a record based on the definition of
@@ -653,18 +717,20 @@ btr_index_rec_validate(
 /*===================*/
 	const rec_t*		rec,		/*!< in: index record */
 	const dict_index_t*	index,		/*!< in: index */
-	ibool			dump_on_error);	/*!< in: TRUE if the function
+	ibool			dump_on_error)	/*!< in: TRUE if the function
 						should print hex dump of record
 						and page on error */
+	__attribute__((nonnull, warn_unused_result));
 /**************************************************************//**
 Checks the consistency of an index tree.
 @return	TRUE if ok */
 UNIV_INTERN
-ibool
+bool
 btr_validate_index(
 /*===============*/
-	dict_index_t*	index,	/*!< in: index */
-	trx_t*		trx);	/*!< in: transaction or NULL */
+	dict_index_t*	index,			/*!< in: index */
+	const trx_t*	trx)			/*!< in: transaction or 0 */
+	__attribute__((nonnull(1), warn_unused_result));
 
 #define BTR_N_LEAF_PAGES	1
 #define BTR_TOTAL_SIZE		2
diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic
index 6f7a66b12ac..00f50b5dcaf 100644
--- a/storage/innobase/include/btr0btr.ic
+++ b/storage/innobase/include/btr0btr.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -126,22 +126,6 @@ btr_page_get_level_low(
 }
 
 /********************************************************//**
-Gets the node level field in an index page.
-@return	level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level(
-/*===============*/
-	const page_t*	page,	/*!< in: index page */
-	mtr_t*		mtr __attribute__((unused)))
-				/*!< in: mini-transaction handle */
-{
-	ut_ad(page && mtr);
-
-	return(btr_page_get_level_low(page));
-}
-
-/********************************************************//**
 Sets the node level field in an index page. */
 UNIV_INLINE
 void
@@ -278,6 +262,7 @@ btr_node_ptr_get_child_page_no(
 			" in a node ptr record at offset %lu\n",
 			(ulong) page_offset(rec));
 		buf_page_print(page_align(rec), 0, 0);
+		ut_ad(0);
 	}
 
 	return(page_no);
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index f437575579e..edba1d1d77f 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -31,14 +31,22 @@ Created 10/16/1994 Heikki Tuuri
 #include "page0cur.h"
 #include "btr0types.h"
 
-/* Mode flags for btr_cur operations; these can be ORed */
-#define BTR_NO_UNDO_LOG_FLAG	1	/* do no undo logging */
-#define BTR_NO_LOCKING_FLAG	2	/* do no record lock checking */
-#define BTR_KEEP_SYS_FLAG	4	/* sys fields will be found from the
-					update vector or inserted entry */
-#define BTR_KEEP_POS_FLAG	8	/* btr_cur_pessimistic_update()
-					must keep cursor position when
-					moving columns to big_rec */
+/** Mode flags for btr_cur operations; these can be ORed */
+enum {
+	/** do no undo logging */
+	BTR_NO_UNDO_LOG_FLAG = 1,
+	/** do no record lock checking */
+	BTR_NO_LOCKING_FLAG = 2,
+	/** sys fields will be found in the update vector or inserted
+	entry */
+	BTR_KEEP_SYS_FLAG = 4,
+	/** btr_cur_pessimistic_update() must keep cursor position
+	when moving columns to big_rec */
+	BTR_KEEP_POS_FLAG = 8,
+	/** the caller is creating the index or wants to bypass the
+	index->info.online creation log */
+	BTR_CREATE_FLAG = 16
+};
 
 #ifndef UNIV_HOTBACKUP
 #include "que0types.h"
@@ -164,16 +172,19 @@ UNIV_INTERN
 void
 btr_cur_open_at_index_side_func(
 /*============================*/
-	ibool		from_left,	/*!< in: TRUE if open to the low end,
-					FALSE if to the high end */
+	bool		from_left,	/*!< in: true if open to the low end,
+					false if to the high end */
 	dict_index_t*	index,		/*!< in: index */
 	ulint		latch_mode,	/*!< in: latch mode */
-	btr_cur_t*	cursor,		/*!< in: cursor */
+	btr_cur_t*	cursor,		/*!< in/out: cursor */
+	ulint		level,		/*!< in: level to search for
+					(0=leaf) */
 	const char*	file,		/*!< in: file name */
 	ulint		line,		/*!< in: line where called */
-	mtr_t*		mtr);		/*!< in: mtr */
-#define btr_cur_open_at_index_side(f,i,l,c,m)				\
-	btr_cur_open_at_index_side_func(f,i,l,c,__FILE__,__LINE__,m)
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
+#define btr_cur_open_at_index_side(f,i,l,c,lv,m)			\
+	btr_cur_open_at_index_side_func(f,i,l,c,lv,__FILE__,__LINE__,m)
 /**********************************************************************//**
 Positions a cursor at a randomly chosen position within a B-tree. */
 UNIV_INTERN
@@ -196,7 +207,7 @@ one record on the page, the insert will always succeed; this is to
 prevent trying to split a page with just one record.
 @return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
 UNIV_INTERN
-ulint
+dberr_t
 btr_cur_optimistic_insert(
 /*======================*/
 	ulint		flags,	/*!< in: undo logging and locking flags: if not
@@ -204,6 +215,8 @@ btr_cur_optimistic_insert(
 				specified */
 	btr_cur_t*	cursor,	/*!< in: cursor on page after which to insert;
 				cursor stays valid */
+	ulint**		offsets,/*!< out: offsets on *rec */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	dtuple_t*	entry,	/*!< in/out: entry to insert */
 	rec_t**		rec,	/*!< out: pointer to inserted record if
 				succeed */
@@ -212,11 +225,12 @@ btr_cur_optimistic_insert(
 				NULL */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
 	que_thr_t*	thr,	/*!< in: query thread or NULL */
-	mtr_t*		mtr);	/*!< in: mtr; if this function returns
+	mtr_t*		mtr)	/*!< in: mtr; if this function returns
 				DB_SUCCESS on a leaf page of a secondary
 				index in a compressed tablespace, the
 				mtr must be committed before latching
 				any further pages */
+	__attribute__((nonnull(2,3,4,5,6,7,10), warn_unused_result));
 /*************************************************************//**
 Performs an insert on a page of an index tree. It is assumed that mtr
 holds an x-latch on the tree and on the cursor page. If the insert is
@@ -224,7 +238,7 @@ made on the leaf level, to avoid deadlocks, mtr must also own x-latches
 to brothers of page, if those brothers exist.
 @return	DB_SUCCESS or error number */
 UNIV_INTERN
-ulint
+dberr_t
 btr_cur_pessimistic_insert(
 /*=======================*/
 	ulint		flags,	/*!< in: undo logging and locking flags: if not
@@ -235,6 +249,9 @@ btr_cur_pessimistic_insert(
 				insertion will certainly succeed */
 	btr_cur_t*	cursor,	/*!< in: cursor after which to insert;
 				cursor stays valid */
+	ulint**		offsets,/*!< out: offsets on *rec */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap
+				that can be emptied, or NULL */
 	dtuple_t*	entry,	/*!< in/out: entry to insert */
 	rec_t**		rec,	/*!< out: pointer to inserted record if
 				succeed */
@@ -243,7 +260,8 @@ btr_cur_pessimistic_insert(
 				NULL */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
 	que_thr_t*	thr,	/*!< in: query thread or NULL */
-	mtr_t*		mtr);	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull(2,3,4,5,6,7,10), warn_unused_result));
 /*************************************************************//**
 See if there is enough place in the page modification log to log
 an update-in-place.
@@ -264,19 +282,23 @@ btr_cur_update_alloc_zip(
 Updates a record when the update causes no size changes in its fields.
 @return	DB_SUCCESS or error number */
 UNIV_INTERN
-ulint
+dberr_t
 btr_cur_update_in_place(
 /*====================*/
 	ulint		flags,	/*!< in: undo logging and locking flags */
 	btr_cur_t*	cursor,	/*!< in: cursor on the record to update;
 				cursor stays valid and positioned on the
 				same record */
+	const ulint*	offsets,/*!< in: offsets on cursor->page_cur.rec */
 	const upd_t*	update,	/*!< in: update vector */
 	ulint		cmpl_info,/*!< in: compiler info on secondary index
 				updates */
-	que_thr_t*	thr,	/*!< in: query thread */
-	mtr_t*		mtr);	/*!< in: mtr; must be committed before
+	que_thr_t*	thr,	/*!< in: query thread, or NULL if
+				appropriate flags are set */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	mtr_t*		mtr)	/*!< in: mtr; must be committed before
 				latching any further pages */
+	__attribute__((warn_unused_result, nonnull(2,3,4,8)));
 /*************************************************************//**
 Tries to update a record on a page in an index tree. It is assumed that mtr
 holds an x-latch on the page. The operation does not succeed if there is too
@@ -286,20 +308,25 @@ so that tree compression is recommended.
 DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
 there is not enough space left on the compressed page */
 UNIV_INTERN
-ulint
+dberr_t
 btr_cur_optimistic_update(
 /*======================*/
 	ulint		flags,	/*!< in: undo logging and locking flags */
 	btr_cur_t*	cursor,	/*!< in: cursor on the record to update;
 				cursor stays valid and positioned on the
 				same record */
+	ulint**		offsets,/*!< out: offsets on cursor->page_cur.rec */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	const upd_t*	update,	/*!< in: update vector; this must also
 				contain trx id and roll ptr fields */
 	ulint		cmpl_info,/*!< in: compiler info on secondary index
 				updates */
-	que_thr_t*	thr,	/*!< in: query thread */
-	mtr_t*		mtr);	/*!< in: mtr; must be committed before
+	que_thr_t*	thr,	/*!< in: query thread, or NULL if
+				appropriate flags are set */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	mtr_t*		mtr)	/*!< in: mtr; must be committed before
 				latching any further pages */
+	__attribute__((warn_unused_result, nonnull(2,3,4,5,9)));
 /*************************************************************//**
 Performs an update of a record on a page of a tree. It is assumed
 that mtr holds an x-latch on the tree and on the cursor page. If the
@@ -307,7 +334,7 @@ update is made on the leaf level, to avoid deadlocks, mtr must also
 own x-latches to brothers of page, if those brothers exist.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 btr_cur_pessimistic_update(
 /*=======================*/
 	ulint		flags,	/*!< in: undo logging, locking, and rollback
@@ -315,7 +342,13 @@ btr_cur_pessimistic_update(
 	btr_cur_t*	cursor,	/*!< in/out: cursor on the record to update;
 				cursor may become invalid if *big_rec == NULL
 				|| !(flags & BTR_KEEP_POS_FLAG) */
-	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
+	ulint**		offsets,/*!< out: offsets on cursor->page_cur.rec */
+	mem_heap_t**	offsets_heap,
+				/*!< in/out: pointer to memory heap
+				that can be emptied, or NULL */
+	mem_heap_t*	entry_heap,
+				/*!< in/out: memory heap for allocating
+				big_rec and the index tuple */
 	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
 				be stored externally by the caller, or NULL */
 	const upd_t*	update,	/*!< in: update vector; this is allowed also
@@ -323,9 +356,12 @@ btr_cur_pessimistic_update(
 				the values in update vector have no effect */
 	ulint		cmpl_info,/*!< in: compiler info on secondary index
 				updates */
-	que_thr_t*	thr,	/*!< in: query thread */
-	mtr_t*		mtr);	/*!< in: mtr; must be committed before
+	que_thr_t*	thr,	/*!< in: query thread, or NULL if
+				appropriate flags are set */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	mtr_t*		mtr)	/*!< in: mtr; must be committed before
 				latching any further pages */
+	__attribute__((warn_unused_result, nonnull(2,3,4,5,6,7,11)));
 /***********************************************************//**
 Marks a clustered index record deleted. Writes an undo log record to
 undo log on this delete marking. Writes in the trx id field the id
@@ -333,15 +369,13 @@ of the deleting transaction, and in the roll ptr field pointer to the
 undo log record created.
 @return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
 UNIV_INTERN
-ulint
+dberr_t
 btr_cur_del_mark_set_clust_rec(
 /*===========================*/
-	ulint		flags,	/*!< in: undo logging and locking flags */
 	buf_block_t*	block,	/*!< in/out: buffer block of the record */
 	rec_t*		rec,	/*!< in/out: record */
 	dict_index_t*	index,	/*!< in: clustered index of the record */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
-	ibool		val,	/*!< in: value to set */
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr)	/*!< in: mtr */
 	__attribute__((nonnull));
@@ -349,7 +383,7 @@ btr_cur_del_mark_set_clust_rec(
 Sets a secondary index record delete mark to TRUE or FALSE.
 @return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
 UNIV_INTERN
-ulint
+dberr_t
 btr_cur_del_mark_set_sec_rec(
 /*=========================*/
 	ulint		flags,	/*!< in: locking flag */
@@ -382,16 +416,27 @@ but no latch on the whole tree.
 @return	TRUE if success, i.e., the page did not become too empty */
 UNIV_INTERN
 ibool
-btr_cur_optimistic_delete(
-/*======================*/
+btr_cur_optimistic_delete_func(
+/*===========================*/
 	btr_cur_t*	cursor,	/*!< in: cursor on the record to delete;
 				cursor stays valid: if deletion succeeds,
 				on function exit it points to the successor
 				of the deleted record */
-	mtr_t*		mtr);	/*!< in: mtr; if this function returns
+# ifdef UNIV_DEBUG
+	ulint		flags,	/*!< in: BTR_CREATE_FLAG or 0 */
+# endif /* UNIV_DEBUG */
+	mtr_t*		mtr)	/*!< in: mtr; if this function returns
 				TRUE on a leaf page of a secondary
 				index, the mtr must be committed
 				before latching any further pages */
+	__attribute__((nonnull, warn_unused_result));
+# ifdef UNIV_DEBUG
+#  define btr_cur_optimistic_delete(cursor, flags, mtr)		\
+	btr_cur_optimistic_delete_func(cursor, flags, mtr)
+# else /* UNIV_DEBUG */
+#  define btr_cur_optimistic_delete(cursor, flags, mtr)		\
+	btr_cur_optimistic_delete_func(cursor, mtr)
+# endif /* UNIV_DEBUG */
 /*************************************************************//**
 Removes the record on which the tree cursor is positioned. Tries
 to compress the page if its fillfactor drops below a threshold
@@ -404,7 +449,7 @@ UNIV_INTERN
 ibool
 btr_cur_pessimistic_delete(
 /*=======================*/
-	ulint*		err,	/*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+	dberr_t*		err,	/*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
 				the latter may occur because we may have
 				to update node pointers on upper levels,
 				and in the case of variable length keys
@@ -417,8 +462,10 @@ btr_cur_pessimistic_delete(
 				if compression does not occur, the cursor
 				stays valid: it points to successor of
 				deleted record on function exit */
+	ulint		flags,	/*!< in: BTR_CREATE_FLAG or 0 */
 	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
-	mtr_t*		mtr);	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
 Parses a redo log record of updating a record in-place.
@@ -472,9 +519,10 @@ btr_estimate_n_rows_in_range(
 	ulint		mode2);	/*!< in: search mode for range end */
 /*******************************************************************//**
 Estimates the number of different key values in a given index, for
-each n-column prefix of the index where n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals[] and
-the number of pages that were sampled is saved in index->stat_n_sample_sizes[].
+each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
+The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
+0..n_uniq-1) and the number of pages that were sampled is saved in
+index->stat_n_sample_sizes[].
 If innodb_stats_method is nulls_ignored, we also record the number of
 non-null values for each prefix and stored the estimates in
 array index->stat_n_non_null_key_vals. */
@@ -528,7 +576,7 @@ The fields are stored on pages allocated from leaf node
 file segment of the index tree.
 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 UNIV_INTERN
-enum db_err
+dberr_t
 btr_store_big_rec_extern_fields(
 /*============================*/
 	dict_index_t*	index,		/*!< in: index of rec; the index tree
@@ -662,8 +710,7 @@ limit, merging it to a neighbor is tried */
 /** A slot in the path array. We store here info on a search path down the
 tree. Each slot contains data on a single level of the tree. */
 
-typedef struct btr_path_struct	btr_path_t;
-struct btr_path_struct{
+struct btr_path_t{
 	ulint	nth_rec;	/*!< index of the record
 				where the page cursor stopped on
 				this level (index in alphabetical
@@ -700,7 +747,7 @@ enum btr_cur_method {
 
 /** The tree cursor: the definition appears here only for the compiler
 to know struct size! */
-struct btr_cur_struct {
+struct btr_cur_t {
 	dict_index_t*	index;		/*!< index where positioned */
 	page_cur_t	page_cur;	/*!< page cursor */
 	purge_node_t*	purge_node;	/*!< purge node, for BTR_DELETE */
@@ -737,7 +784,7 @@ struct btr_cur_struct {
 					for comparison to the adjacent user
 					record if that record is on a
 					different leaf page! (See the note in
-					row_ins_duplicate_key.) */
+					row_ins_duplicate_error_in_clust.) */
 	ulint		up_bytes;	/*!< number of matched bytes to the
 					right at the time cursor positioned;
 					only used internally in searches: not
@@ -822,6 +869,11 @@ srv_printf_innodb_monitor(). */
 extern ulint	btr_cur_n_sea_old;
 #endif /* !UNIV_HOTBACKUP */
 
+#ifdef UNIV_DEBUG
+/* Flag to limit optimistic insert records */
+extern uint	btr_cur_limit_optimistic_insert_debug;
+#endif /* UNIV_DEBUG */
+
 #ifndef UNIV_NONINL
 #include "btr0cur.ic"
 #endif
diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic
index 540417e3062..080866c7465 100644
--- a/storage/innobase/include/btr0cur.ic
+++ b/storage/innobase/include/btr0cur.ic
@@ -27,6 +27,16 @@ Created 10/16/1994 Heikki Tuuri
 #include "btr0btr.h"
 
 #ifdef UNIV_DEBUG
+# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\
+if (btr_cur_limit_optimistic_insert_debug\
+    && (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\
+        CODE;\
+}
+#else
+# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)
+#endif /* UNIV_DEBUG */
+
+#ifdef UNIV_DEBUG
 /*********************************************************//**
 Returns the page cursor component of a tree cursor.
 @return	pointer to page cursor component */
@@ -135,6 +145,9 @@ btr_cur_compress_recommendation(
 
 	page = btr_cur_get_page(cursor);
 
+	LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
+				      return(FALSE));
+
 	if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
 	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
 		&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h
index a8eaac4690b..973fae382ab 100644
--- a/storage/innobase/include/btr0pcur.h
+++ b/storage/innobase/include/btr0pcur.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -146,13 +146,16 @@ UNIV_INLINE
 void
 btr_pcur_open_at_index_side(
 /*========================*/
-	ibool		from_left,	/*!< in: TRUE if open to the low end,
-					FALSE if to the high end */
+	bool		from_left,	/*!< in: true if open to the low end,
+					false if to the high end */
 	dict_index_t*	index,		/*!< in: index */
 	ulint		latch_mode,	/*!< in: latch mode */
-	btr_pcur_t*	pcur,		/*!< in: cursor */
-	ibool		do_init,	/*!< in: TRUE if should be initialized */
-	mtr_t*		mtr);		/*!< in: mtr */
+	btr_pcur_t*	pcur,		/*!< in/out: cursor */
+	bool		init_pcur,	/*!< in: whether to initialize pcur */
+	ulint		level,		/*!< in: level to search for
+					(0=leaf) */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
 /**************************************************************//**
 Gets the up_match value for a pcur after a search.
 @return number of matched fields at the cursor or to the right if
@@ -209,8 +212,17 @@ btr_pcur_open_at_rnd_pos_func(
 #define btr_pcur_open_at_rnd_pos(i,l,c,m)				\
 	btr_pcur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
 /**************************************************************//**
-Frees the possible old_rec_buf buffer of a persistent cursor and sets the
-latch mode of the persistent cursor to BTR_NO_LATCHES. */
+Frees the possible memory heap of a persistent cursor and sets the latch
+mode of the persistent cursor to BTR_NO_LATCHES.
+WARNING: this function does not release the latch on the page where the
+cursor is currently positioned. The latch is acquired by the
+"move to next/previous" family of functions. Since recursive shared locks
+are not allowed, you must take care (if using the cursor in S-mode) to
+manually release the latch by either calling
+btr_leaf_page_release(btr_pcur_get_block(&pcur), pcur.latch_mode, mtr)
+or by committing the mini-transaction right after btr_pcur_close().
+A subsequent attempt to crawl the same page in the same mtr would cause
+an assertion failure. */
 UNIV_INLINE
 void
 btr_pcur_close(
@@ -452,14 +464,14 @@ btr_pcur_move_to_prev_on_page(
 /* The persistent B-tree cursor structure. This is used mainly for SQL
 selects, updates, and deletes. */
 
-struct btr_pcur_struct{
+struct btr_pcur_t{
 	btr_cur_t	btr_cur;	/*!< a B-tree cursor */
 	ulint		latch_mode;	/*!< see TODO note below!
 					BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
 					BTR_MODIFY_TREE, or BTR_NO_LATCHES,
 					depending on the latching state of
 					the page and tree where the cursor is
-					positioned; the last value means that
+					positioned; BTR_NO_LATCHES means that
 					the cursor is not currently positioned:
 					we say then that the cursor is
 					detached; it can be restored to
diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic
index a27033c4a7c..79afd7c322e 100644
--- a/storage/innobase/include/btr0pcur.ic
+++ b/storage/innobase/include/btr0pcur.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -429,7 +429,7 @@ btr_pcur_open_low(
 
 	btr_pcur_init(cursor);
 
-	cursor->latch_mode = latch_mode;
+	cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
 	cursor->search_mode = mode;
 
 	/* Search with the tree cursor */
@@ -496,28 +496,26 @@ UNIV_INLINE
 void
 btr_pcur_open_at_index_side(
 /*========================*/
-	ibool		from_left,	/*!< in: TRUE if open to the low end,
-					FALSE if to the high end */
+	bool		from_left,	/*!< in: true if open to the low end,
+					false if to the high end */
 	dict_index_t*	index,		/*!< in: index */
 	ulint		latch_mode,	/*!< in: latch mode */
-	btr_pcur_t*	pcur,		/*!< in: cursor */
-	ibool		do_init,	/*!< in: TRUE if should be initialized */
-	mtr_t*		mtr)		/*!< in: mtr */
+	btr_pcur_t*	pcur,		/*!< in/out: cursor */
+	bool		init_pcur,	/*!< in: whether to initialize pcur */
+	ulint		level,		/*!< in: level to search for
+					(0=leaf) */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
-	pcur->latch_mode = latch_mode;
+	pcur->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
 
-	if (from_left) {
-		pcur->search_mode = PAGE_CUR_G;
-	} else {
-		pcur->search_mode = PAGE_CUR_L;
-	}
+	pcur->search_mode = from_left ? PAGE_CUR_G : PAGE_CUR_L;
 
-	if (do_init) {
+	if (init_pcur) {
 		btr_pcur_init(pcur);
 	}
 
 	btr_cur_open_at_index_side(from_left, index, latch_mode,
-				   btr_pcur_get_btr_cur(pcur), mtr);
+				   btr_pcur_get_btr_cur(pcur), level, mtr);
 	pcur->pos_state = BTR_PCUR_IS_POSITIONED;
 
 	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
@@ -556,7 +554,16 @@ btr_pcur_open_at_rnd_pos_func(
 
 /**************************************************************//**
 Frees the possible memory heap of a persistent cursor and sets the latch
-mode of the persistent cursor to BTR_NO_LATCHES. */
+mode of the persistent cursor to BTR_NO_LATCHES.
+WARNING: this function does not release the latch on the page where the
+cursor is currently positioned. The latch is acquired by the
+"move to next/previous" family of functions. Since recursive shared locks
+are not allowed, you must take care (if using the cursor in S-mode) to
+manually release the latch by either calling
+btr_leaf_page_release(btr_pcur_get_block(&pcur), pcur.latch_mode, mtr)
+or by committing the mini-transaction right after btr_pcur_close().
+A subsequent attempt to crawl the same page in the same mtr would cause
+an assertion failure. */
 UNIV_INLINE
 void
 btr_pcur_close(
diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
index 5316c3efd39..fea117d0aaf 100644
--- a/storage/innobase/include/btr0sea.h
+++ b/storage/innobase/include/btr0sea.h
@@ -68,7 +68,8 @@ UNIV_INLINE
 btr_search_t*
 btr_search_get_info(
 /*================*/
-	dict_index_t*	index);	/*!< in: index */
+	dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull));
 /*****************************************************************//**
 Creates and initializes a search info struct.
 @return	own: search info struct */
@@ -193,7 +194,7 @@ btr_search_validate(void);
 #endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
 
 /** The search info struct in an index */
-struct btr_search_struct{
+struct btr_search_t{
 	ulint	ref_count;	/*!< Number of blocks in this index tree
 				that have search index built
 				i.e. block->index points to this index.
@@ -242,16 +243,13 @@ struct btr_search_struct{
 #endif /* UNIV_SEARCH_PERF_STAT */
 #ifdef UNIV_DEBUG
 	ulint	magic_n;	/*!< magic number @see BTR_SEARCH_MAGIC_N */
-/** value of btr_search_struct::magic_n, used in assertions */
+/** value of btr_search_t::magic_n, used in assertions */
 # define BTR_SEARCH_MAGIC_N	1112765
 #endif /* UNIV_DEBUG */
 };
 
 /** The hash index system */
-typedef struct btr_search_sys_struct	btr_search_sys_t;
-
-/** The hash index system */
-struct btr_search_sys_struct{
+struct btr_search_sys_t{
 	hash_table_t*	hash_index;	/*!< the adaptive hash index,
 					mapping dtuple_fold values
 					to rec_t pointers on index pages */
diff --git a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic
index 49ba0fd3f0b..0bd869be136 100644
--- a/storage/innobase/include/btr0sea.ic
+++ b/storage/innobase/include/btr0sea.ic
@@ -45,8 +45,6 @@ btr_search_get_info(
 /*================*/
 	dict_index_t*	index)	/*!< in: index */
 {
-	ut_ad(index);
-
 	return(index->search_info);
 }
 
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
index 09f97b3cabd..c1a4531f861 100644
--- a/storage/innobase/include/btr0types.h
+++ b/storage/innobase/include/btr0types.h
@@ -33,11 +33,11 @@ Created 2/17/1996 Heikki Tuuri
 #include "sync0rw.h"
 
 /** Persistent cursor */
-typedef struct btr_pcur_struct		btr_pcur_t;
+struct btr_pcur_t;
 /** B-tree cursor */
-typedef struct btr_cur_struct		btr_cur_t;
+struct btr_cur_t;
 /** B-tree search information for the adaptive hash index */
-typedef struct btr_search_struct	btr_search_t;
+struct btr_search_t;
 
 #ifndef UNIV_HOTBACKUP
 
@@ -68,7 +68,7 @@ extern char	btr_search_enabled;
 #ifdef UNIV_BLOB_DEBUG
 # include "buf0types.h"
 /** An index->blobs entry for keeping track of off-page column references */
-typedef struct btr_blob_dbg_struct btr_blob_dbg_t;
+struct btr_blob_dbg_t;
 
 /** Insert to index->blobs a reference to an off-page column.
 @param index	the index tree
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 2284f21e3ab..74a6e203808 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -89,8 +89,6 @@ extern ibool		buf_debug_prints;/*!< If this is set TRUE, the program
 					prints info whenever read or flush
 					occurs */
 #endif /* UNIV_DEBUG */
-extern ulint srv_buf_pool_write_requests; /*!< variable to count write request
-					  issued */
 extern ulint srv_buf_pool_instances;
 extern ulint srv_buf_pool_curr_size;
 #else /* !UNIV_HOTBACKUP */
@@ -102,7 +100,7 @@ extern buf_block_t*	back_block2;	/*!< second block, for page reorganize */
 #define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
 
 /** @brief States of a control block
-@see buf_page_struct
+@see buf_page_t
 
 The enumeration values must be 0..7. */
 enum buf_page_state {
@@ -132,7 +130,7 @@ enum buf_page_state {
 
 /** This structure defines information we will fetch from each buffer pool. It
 will be used to print table IO stats */
-struct buf_pool_info_struct{
+struct buf_pool_info_t{
 	/* General buffer pool info */
 	ulint	pool_unique_id;		/*!< Buffer Pool ID */
 	ulint	pool_size;		/*!< Buffer Pool size in pages */
@@ -203,7 +201,12 @@ struct buf_pool_info_struct{
 					interval */
 };
 
-typedef struct buf_pool_info_struct	buf_pool_info_t;
+/** The occupied bytes of lists in all buffer pools */
+struct buf_pools_list_size_t {
+	ulint	LRU_bytes;		/*!< LRU size in bytes */
+	ulint	unzip_LRU_bytes;	/*!< unzip_LRU size in bytes */
+	ulint	flush_list_bytes;	/*!< flush_list size in bytes */
+};
 
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
@@ -222,9 +225,9 @@ buf_pool_mutex_exit_all(void);
 
 /********************************************************************//**
 Creates the buffer pool.
-@return	own: buf_pool object, NULL if not enough memory or error */
+@return	DB_SUCCESS if success, DB_ERROR if not enough memory or error */
 UNIV_INTERN
-ulint
+dberr_t
 buf_pool_init(
 /*=========*/
 	ulint	size,		/*!< in: Size of the total pool in bytes */
@@ -629,9 +632,12 @@ UNIV_INTERN
 ibool
 buf_page_is_corrupted(
 /*==================*/
+	bool		check_lsn,	/*!< in: true if we need to check the
+					and complain about the LSN */
 	const byte*	read_buf,	/*!< in: a database page */
-	ulint		zip_size);	/*!< in: size of compressed page;
+	ulint		zip_size)	/*!< in: size of compressed page;
 					0 for uncompressed pages */
+	__attribute__((nonnull, warn_unused_result));
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Gets the space id, page offset, and byte offset within page of a
@@ -881,7 +887,7 @@ buf_page_belongs_to_unzip_LRU(
 Gets the mutex of a block.
 @return	pointer to mutex protecting bpage */
 UNIV_INLINE
-mutex_t*
+ib_mutex_t*
 buf_page_get_mutex(
 /*===============*/
 	const buf_page_t*	bpage)	/*!< in: pointer to control block */
@@ -1010,8 +1016,7 @@ UNIV_INLINE
 void
 buf_page_set_accessed(
 /*==================*/
-	buf_page_t*	bpage,		/*!< in/out: control block */
-	ulint		time_ms)	/*!< in: ut_time_ms() */
+	buf_page_t*	bpage)		/*!< in/out: control block */
 	__attribute__((nonnull));
 /*********************************************************************//**
 Gets the buf_block_t handle of a buffered file block if an uncompressed
@@ -1152,7 +1157,7 @@ UNIV_INTERN
 buf_page_t*
 buf_page_init_for_read(
 /*===================*/
-	ulint*		err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
+	dberr_t*	err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
 	ulint		mode,	/*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
 	ulint		space,	/*!< in: space id */
 	ulint		zip_size,/*!< in: compressed page size, or 0 */
@@ -1164,9 +1169,9 @@ buf_page_init_for_read(
 /********************************************************************//**
 Completes an asynchronous read or write request of a file page to or from
 the buffer pool.
-@return TRUE if successful */
+@return true if successful */
 UNIV_INTERN
-ibool
+bool
 buf_page_io_complete(
 /*=================*/
 	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
@@ -1368,6 +1373,14 @@ buf_get_total_list_len(
 	ulint*		free_len,	/*!< out: length of all free lists */
 	ulint*		flush_list_len);/*!< out: length of all flush lists */
 /********************************************************************//**
+Get total list size in bytes from all buffer pools. */
+UNIV_INTERN
+void
+buf_get_total_list_size_in_bytes(
+/*=============================*/
+	buf_pools_list_size_t*	buf_pools_list_size);	/*!< out: list sizes
+							in all buffer pools */
+/********************************************************************//**
 Get total buffer pool statistics. */
 UNIV_INTERN
 void
@@ -1385,6 +1398,16 @@ buf_get_nth_chunk_block(
 	ulint		n,		/*!< in: nth chunk in the buffer pool */
 	ulint*		chunk_size);	/*!< in: chunk size */
 
+/********************************************************************//**
+Calculate the checksum of a page from compressed table and update the page. */
+UNIV_INTERN
+void
+buf_flush_update_zip_checksum(
+/*==========================*/
+	buf_frame_t*	page,		/*!< in/out: Page to update */
+	ulint		zip_size,	/*!< in: Compressed page size */
+	lsn_t		lsn);		/*!< in: Lsn to stamp on the page */
+
 #endif /* !UNIV_HOTBACKUP */
 
 /** The common buffer control block structure
@@ -1393,10 +1416,10 @@ for compressed and uncompressed frames */
 /** Number of bits used for buffer page states. */
 #define BUF_PAGE_STATE_BITS	3
 
-struct buf_page_struct{
+struct buf_page_t{
 	/** @name General fields
 	None of these bit-fields must be modified without holding
-	buf_page_get_mutex() [buf_block_struct::mutex or
+	buf_page_get_mutex() [buf_block_t::mutex or
 	buf_pool->zip_mutex], since they can be stored in the same
 	machine word.  Some of these fields are additionally protected
 	by buf_pool->mutex. */
@@ -1527,7 +1550,7 @@ struct buf_page_struct{
 	/* @} */
 	/** @name LRU replacement algorithm fields
 	These fields are protected by buf_pool->mutex only (not
-	buf_pool->zip_mutex or buf_block_struct::mutex). */
+	buf_pool->zip_mutex or buf_block_t::mutex). */
 	/* @{ */
 
 	UT_LIST_NODE_T(buf_page_t) LRU;
@@ -1547,23 +1570,24 @@ struct buf_page_struct{
 					to read this for heuristic
 					purposes without holding any
 					mutex or latch */
-	unsigned	access_time:32;	/*!< time of first access, or
-					0 if the block was never accessed
-					in the buffer pool */
 	/* @} */
+	unsigned	access_time;	/*!< time of first access, or
+					0 if the block was never accessed
+					in the buffer pool. Protected by
+					block mutex */
 # if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
 	ibool		file_page_was_freed;
 					/*!< this is set to TRUE when
 					fsp frees a page in buffer pool;
 					protected by buf_pool->zip_mutex
-					or buf_block_struct::mutex. */
+					or buf_block_t::mutex. */
 # endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
 };
 
 /** The buffer control block structure */
 
-struct buf_block_struct{
+struct buf_block_t{
 
 	/** @name General fields */
 	/* @{ */
@@ -1587,7 +1611,7 @@ struct buf_block_struct{
 					decompressed LRU list;
 					used in debugging */
 #endif /* UNIV_DEBUG */
-	mutex_t		mutex;		/*!< mutex protecting this block:
+	ib_mutex_t		mutex;		/*!< mutex protecting this block:
 					state (also protected by the buffer
 					pool mutex), io_fix, buf_fix_count,
 					and accessed; we introduce this new
@@ -1646,8 +1670,8 @@ struct buf_block_struct{
 	/** @name Hash search fields
 	These 5 fields may only be modified when we have
 	an x-latch on btr_search_latch AND
-	- we are holding an s-latch or x-latch on buf_block_struct::lock or
-	- we know that buf_block_struct::buf_fix_count == 0.
+	- we are holding an s-latch or x-latch on buf_block_t::lock or
+	- we know that buf_block_t::buf_fix_count == 0.
 
 	An exception to this is when we init or create a page
 	in the buffer pool in buf0buf.cc.
@@ -1706,7 +1730,7 @@ Compute the hash fold value for blocks in buf_pool->zip_hash. */
 /* @} */
 
 /** @brief The buffer pool statistics structure. */
-struct buf_pool_stat_struct{
+struct buf_pool_stat_t{
 	ulint	n_page_gets;	/*!< number of page gets performed;
 				also successful searches through
 				the adaptive hash index are
@@ -1730,10 +1754,12 @@ struct buf_pool_stat_struct{
 				young because the first access
 				was not long enough ago, in
 				buf_page_peek_if_too_old() */
+	ulint	LRU_bytes;	/*!< LRU size in bytes */
+	ulint	flush_list_bytes;/*!< flush_list size in bytes */
 };
 
 /** Statistics of buddy blocks of a given size. */
-struct buf_buddy_stat_struct {
+struct buf_buddy_stat_t {
 	/** Number of blocks allocated from the buddy system. */
 	ulint		used;
 	/** Number of blocks relocated by the buddy system. */
@@ -1747,13 +1773,13 @@ struct buf_buddy_stat_struct {
 NOTE! The definition appears here only for other modules of this
 directory (buf) to see it. Do not use from outside! */
 
-struct buf_pool_struct{
+struct buf_pool_t{
 
 	/** @name General fields */
 	/* @{ */
-	mutex_t		mutex;		/*!< Buffer pool mutex of this
+	ib_mutex_t		mutex;		/*!< Buffer pool mutex of this
 					instance */
-	mutex_t		zip_mutex;	/*!< Zip mutex of this buffer
+	ib_mutex_t		zip_mutex;	/*!< Zip mutex of this buffer
 					pool instance, protects compressed
 					only pages (of type buf_page_t, not
 					buf_block_t */
@@ -1807,7 +1833,7 @@ struct buf_pool_struct{
 
 	/* @{ */
 
-	mutex_t		flush_list_mutex;/*!< mutex protecting the
+	ib_mutex_t		flush_list_mutex;/*!< mutex protecting the
 					flush list access. This mutex
 					protects flush_list, flush_rbt
 					and bpage::list pointers when
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index 88c29ab5603..b310efdf451 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -31,13 +31,13 @@ Created 11/5/1995 Heikki Tuuri
 *******************************************************/
 
 #include "mtr0mtr.h"
+#ifndef UNIV_HOTBACKUP
 #include "buf0flu.h"
 #include "buf0lru.h"
 #include "buf0rea.h"
 
-#ifndef UNIV_HOTBACKUP
 /** A chunk of buffers. The buffer pool is allocated in chunks. */
-struct buf_chunk_struct{
+struct buf_chunk_t{
 	ulint		mem_size;	/*!< allocated size of the chunk */
 	ulint		size;		/*!< size of frames[] and blocks[] */
 	void*		mem;		/*!< pointer to the memory area which
@@ -339,7 +339,7 @@ buf_page_belongs_to_unzip_LRU(
 Gets the mutex of a block.
 @return	pointer to mutex protecting bpage */
 UNIV_INLINE
-mutex_t*
+ib_mutex_t*
 buf_page_get_mutex(
 /*===============*/
 	const buf_page_t*	bpage)	/*!< in: pointer to control block */
@@ -419,6 +419,8 @@ buf_page_get_io_fix(
 /*================*/
 	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
 {
+	ut_ad(bpage != NULL);
+
 	enum buf_io_fix	io_fix = (enum buf_io_fix) bpage->io_fix;
 #ifdef UNIV_DEBUG
 	switch (io_fix) {
@@ -614,18 +616,18 @@ UNIV_INLINE
 void
 buf_page_set_accessed(
 /*==================*/
-	buf_page_t*	bpage,		/*!< in/out: control block */
-	ulint		time_ms)	/*!< in: ut_time_ms() */
+	buf_page_t*	bpage)		/*!< in/out: control block */
 {
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(!buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 #endif
 	ut_a(buf_page_in_file(bpage));
 
 	if (!bpage->access_time) {
 		/* Make this the time of the first access. */
-		bpage->access_time = time_ms;
+		bpage->access_time = ut_time_ms();
 	}
 }
 
@@ -942,7 +944,7 @@ buf_page_get_newest_modification(
 					page frame */
 {
 	lsn_t		lsn;
-	mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
 
 	mutex_enter(block_mutex);
 
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
index fcc56d91405..357ba697f6a 100644
--- a/storage/innobase/include/buf0dblwr.h
+++ b/storage/innobase/include/buf0dblwr.h
@@ -29,7 +29,6 @@ Created 2011/12/19 Inaam Rana
 #include "univ.i"
 #include "ut0byte.h"
 #include "log0log.h"
-#include "buf0types.h"
 
 #ifndef UNIV_HOTBACKUP
 
@@ -113,8 +112,8 @@ buf_dblwr_write_single_page(
 	buf_page_t*	bpage);	/*!< in: buffer block to write */
 
 /** Doublewrite control struct */
-struct buf_dblwr_struct{
-	mutex_t	mutex;		/*!< mutex protecting the first_free field and
+struct buf_dblwr_t{
+	ib_mutex_t	mutex;	/*!< mutex protecting the first_free field and
 				write_buf */
 	ulint	block1;		/*!< the page number of the first
 				doublewrite block (64 pages) */
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index faf577f718b..94f4e6dedd1 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -95,23 +95,27 @@ void
 buf_flush_sync_datafiles(void);
 /*==========================*/
 /*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush_list of
+This utility flushes dirty blocks from the end of the flush list of
 all buffer pool instances.
 NOTE: The calling thread is not allowed to own any latches on pages!
-@return number of blocks for which the write request was queued;
-ULINT_UNDEFINED if there was a flush of the same type already running */
+@return true if a batch was queued successfully for each buffer pool
+instance. false if another batch of same type was already running in
+at least one of the buffer pool instance */
 UNIV_INTERN
-ulint
+bool
 buf_flush_list(
-/*============*/
+/*===========*/
 	ulint		min_n,		/*!< in: wished minimum mumber of blocks
 					flushed (it is not guaranteed that the
 					actual number is that big, though) */
-	lsn_t		lsn_limit);	/*!< in the case BUF_FLUSH_LIST all
+	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
 					blocks whose oldest_modification is
 					smaller than this should be flushed
 					(if their number does not exceed
 					min_n), otherwise ignored */
+	ulint*		n_processed);	/*!< out: the number of pages
+					which were processed is passed
+					back to caller. Ignored if NULL */
 /******************************************************************//**
 This function picks up a single dirty page from the tail of the LRU
 list, flushes it, removes it from page_hash and LRU list and puts
@@ -176,31 +180,6 @@ buf_flush_ready_for_replace(
 /*========================*/
 	buf_page_t*	bpage);	/*!< in: buffer control block, must be
 				buf_page_in_file(bpage) and in the LRU list */
-
-/** @brief Statistics for selecting flush rate based on redo log
-generation speed.
-
-These statistics are generated for heuristics used in estimating the
-rate at which we should flush the dirty blocks to avoid bursty IO
-activity. Note that the rate of flushing not only depends on how many
-dirty pages we have in the buffer pool but it is also a fucntion of
-how much redo the workload is generating and at what rate. */
-
-struct buf_flush_stat_struct
-{
-	lsn_t	redo;		/**< amount of redo generated. */
-	ulint	n_flushed;	/**< number of pages flushed. */
-};
-
-/** Statistics for selecting flush rate of dirty pages. */
-typedef struct buf_flush_stat_struct buf_flush_stat_t;
-/*********************************************************************
-Update the historical stats that we are collecting for flush rate
-heuristics at the end of each interval. */
-UNIV_INTERN
-void
-buf_flush_stat_update(void);
-/*=======================*/
 /******************************************************************//**
 page_cleaner thread tasked with flushing dirty pages from the buffer
 pools. As of now we'll have only one instance of this thread.
@@ -211,6 +190,23 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 /*==========================================*/
 	void*	arg);		/*!< in: a dummy parameter required by
 				os_thread_create */
+/*********************************************************************//**
+Clears up tail of the LRU lists:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@return total pages flushed */
+UNIV_INTERN
+ulint
+buf_flush_LRU_tail(void);
+/*====================*/
+/*********************************************************************//**
+Wait for any possible LRU flushes that are in progress to end. */
+UNIV_INTERN
+void
+buf_flush_wait_LRU_batch_end(void);
+/*==============================*/
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /******************************************************************//**
@@ -238,6 +234,44 @@ UNIV_INTERN
 void
 buf_flush_free_flush_rbt(void);
 /*==========================*/
+
+/********************************************************************//**
+Writes a flushable page asynchronously from the buffer pool to a file.
+NOTE: in simulated aio we must call
+os_aio_simulated_wake_handler_threads after we have posted a batch of
+writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
+held upon entering this function, and they will be released by this
+function. */
+UNIV_INTERN
+void
+buf_flush_page(
+/*===========*/
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+	buf_page_t*	bpage,		/*!< in: buffer control block */
+	buf_flush	flush_type)	/*!< in: type of flush */
+	__attribute__((nonnull));
+
+#ifdef UNIV_DEBUG
+/******************************************************************//**
+Check if there are any dirty pages that belong to a space id in the flush
+list in a particular buffer pool.
+@return	number of dirty pages present in a single buffer pool */
+UNIV_INTERN
+ulint
+buf_pool_get_dirty_pages_count(
+/*===========================*/
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool */
+	ulint		id);		/*!< in: space id to check */
+/******************************************************************//**
+Check if there are any dirty pages that belong to a space id in the flush list.
+@return	count of dirty pages present in all the buffer pools */
+UNIV_INTERN
+ulint
+buf_flush_get_dirty_pages_count(
+/*============================*/
+	ulint		id);		/*!< in: space id to check */
+#endif /* UNIV_DEBUG */
+
 #endif /* !UNIV_HOTBACKUP */
 
 #ifndef UNIV_NONINL
diff --git a/storage/innobase/include/buf0flu.ic b/storage/innobase/include/buf0flu.ic
index 68a76c0b637..a763cd115fe 100644
--- a/storage/innobase/include/buf0flu.ic
+++ b/storage/innobase/include/buf0flu.ic
@@ -26,6 +26,7 @@ Created 11/5/1995 Heikki Tuuri
 #ifndef UNIV_HOTBACKUP
 #include "buf0buf.h"
 #include "mtr0mtr.h"
+#include "srv0srv.h"
 
 /********************************************************************//**
 Inserts a modified block into the flush list. */
@@ -61,7 +62,7 @@ buf_flush_note_modification(
 {
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
 
-	ut_ad(block);
+	ut_ad(!srv_read_only_mode);
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 	ut_ad(block->page.buf_fix_count > 0);
 #ifdef UNIV_SYNC_DEBUG
@@ -91,7 +92,7 @@ buf_flush_note_modification(
 
 	mutex_exit(&block->mutex);
 
-	++srv_buf_pool_write_requests;
+	srv_stats.buf_pool_write_requests.inc();
 }
 
 /********************************************************************//**
@@ -108,7 +109,7 @@ buf_flush_recv_note_modification(
 {
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
 
-	ut_ad(block);
+	ut_ad(!srv_read_only_mode);
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 	ut_ad(block->page.buf_fix_count > 0);
 #ifdef UNIV_SYNC_DEBUG
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index 527852da758..f7a69e1c9e4 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -31,6 +31,9 @@ Created 11/5/1995 Heikki Tuuri
 #include "ut0byte.h"
 #include "buf0types.h"
 
+// Forward declaration
+struct trx_t;
+
 /******************************************************************//**
 Returns TRUE if less than 25 % of the buffer pool is available. This can be
 used in heuristics to prevent huge transactions eating up the whole buffer
@@ -49,15 +52,19 @@ These are low-level functions
 #define BUF_LRU_OLD_MIN_LEN	512	/* 8 megabytes of 16k pages */
 
 /******************************************************************//**
-Invalidates all pages belonging to a given tablespace when we are deleting
-the data file(s) of that tablespace. A PROBLEM: if readahead is being started,
-what guarantees that it will not try to read in pages after this operation has
-completed? */
+Flushes all dirty pages or removes all pages belonging
+to a given tablespace. A PROBLEM: if readahead is being started, what
+guarantees that it will not try to read in pages after this operation
+has completed? */
 UNIV_INTERN
 void
-buf_LRU_invalidate_tablespace(
+buf_LRU_flush_or_remove_pages(
 /*==========================*/
-	ulint	id);	/*!< in: space id */
+	ulint		id,		/*!< in: space id */
+	buf_remove_t	buf_remove,	/*!< in: remove or flush strategy */
+	const trx_t*	trx);		/*!< to check if the operation must
+					be interrupted */
+
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /********************************************************************//**
 Insert a compressed block into buf_pool->zip_clean in the LRU order. */
@@ -157,7 +164,10 @@ buf_LRU_block_free_non_file_page(
 /*=============================*/
 	buf_block_t*	block);	/*!< in: block, must not contain a file page */
 /******************************************************************//**
-Adds a block to the LRU list. */
+Adds a block to the LRU list. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
 UNIV_INTERN
 void
 buf_LRU_add_block(
@@ -270,15 +280,12 @@ extern uint	buf_LRU_old_threshold_ms;
 These statistics are not 'of' LRU but 'for' LRU.  We keep count of I/O
 and page_zip_decompress() operations.  Based on the statistics we decide
 if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */
-struct buf_LRU_stat_struct
+struct buf_LRU_stat_t
 {
 	ulint	io;	/**< Counter of buffer pool I/O operations. */
 	ulint	unzip;	/**< Counter of page_zip_decompress operations. */
 };
 
-/** Statistics for selecting the LRU list for eviction. */
-typedef struct buf_LRU_stat_struct buf_LRU_stat_t;
-
 /** Current operation counters.  Not protected by any mutex.
 Cleared by buf_LRU_stat_update(). */
 extern buf_LRU_stat_t	buf_LRU_stat_cur;
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
index ba54a8aeeea..5ed210d3b90 100644
--- a/storage/innobase/include/buf0types.h
+++ b/storage/innobase/include/buf0types.h
@@ -27,19 +27,19 @@ Created 11/17/1995 Heikki Tuuri
 #define buf0types_h
 
 /** Buffer page (uncompressed or compressed) */
-typedef	struct buf_page_struct		buf_page_t;
+struct buf_page_t;
 /** Buffer block for which an uncompressed page exists */
-typedef	struct buf_block_struct		buf_block_t;
+struct buf_block_t;
 /** Buffer pool chunk comprising buf_block_t */
-typedef struct buf_chunk_struct		buf_chunk_t;
+struct buf_chunk_t;
 /** Buffer pool comprising buf_chunk_t */
-typedef	struct buf_pool_struct		buf_pool_t;
+struct buf_pool_t;
 /** Buffer pool statistics struct */
-typedef	struct buf_pool_stat_struct	buf_pool_stat_t;
+struct buf_pool_stat_t;
 /** Buffer pool buddy statistics struct */
-typedef	struct buf_buddy_stat_struct	buf_buddy_stat_t;
+struct buf_buddy_stat_t;
 /** Doublewrite memory struct */
-typedef struct buf_dblwr_struct		buf_dblwr_t;
+struct buf_dblwr_t;
 
 /** A buffer frame. @see page_t */
 typedef	byte	buf_frame_t;
@@ -54,6 +54,17 @@ enum buf_flush {
 	BUF_FLUSH_N_TYPES		/*!< index of last element + 1  */
 };
 
+/** Algorithm to remove the pages for a tablespace from the buffer pool.
+See buf_LRU_flush_or_remove_pages(). */
+enum buf_remove_t {
+	BUF_REMOVE_ALL_NO_WRITE,	/*!< Remove all pages from the buffer
+					pool, don't write or sync to disk */
+	BUF_REMOVE_FLUSH_NO_WRITE,	/*!< Remove only, from the flush list,
+					don't write or sync to disk */
+	BUF_REMOVE_FLUSH_WRITE		/*!< Flush dirty pages to disk only
+					don't remove from the buffer pool */
+};
+
 /** Flags for io_fix types */
 enum buf_io_fix {
 	BUF_IO_NONE = 0,		/**< no pending I/O */
@@ -66,7 +77,7 @@ enum buf_io_fix {
 
 /** Alternatives for srv_checksum_algorithm, which can be changed by
 setting innodb_checksum_algorithm */
-enum srv_checksum_algorithm_enum {
+enum srv_checksum_algorithm_t {
 	SRV_CHECKSUM_ALGORITHM_CRC32,		/*!< Write crc32, allow crc32,
 						innodb or none when reading */
 	SRV_CHECKSUM_ALGORITHM_STRICT_CRC32,	/*!< Write crc32, allow crc32
@@ -81,8 +92,6 @@ enum srv_checksum_algorithm_enum {
 						when reading */
 };
 
-typedef enum srv_checksum_algorithm_enum	srv_checksum_algorithm_t;
-
 /** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
 /* @{ */
 /** Zip shift value for the smallest page size */
diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h
index 37364e891f5..a548c7b89b3 100644
--- a/storage/innobase/include/data0data.h
+++ b/storage/innobase/include/data0data.h
@@ -35,7 +35,7 @@ Created 5/30/1994 Heikki Tuuri
 
 /** Storage for overflow data in a big record, that is, a clustered
 index record which needs external storage of data fields */
-typedef struct big_rec_struct		big_rec_t;
+struct big_rec_t;
 
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
@@ -45,7 +45,8 @@ UNIV_INLINE
 dtype_t*
 dfield_get_type(
 /*============*/
-	const dfield_t*	field);	/*!< in: SQL data field */
+	const dfield_t*	field)	/*!< in: SQL data field */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets pointer to the data in a field.
 @return	pointer to data */
@@ -53,7 +54,8 @@ UNIV_INLINE
 void*
 dfield_get_data(
 /*============*/
-	const dfield_t* field);	/*!< in: field */
+	const dfield_t* field)	/*!< in: field */
+	__attribute__((nonnull, warn_unused_result));
 #else /* UNIV_DEBUG */
 # define dfield_get_type(field) (&(field)->type)
 # define dfield_get_data(field) ((field)->data)
@@ -65,7 +67,8 @@ void
 dfield_set_type(
 /*============*/
 	dfield_t*	field,	/*!< in: SQL data field */
-	dtype_t*	type);	/*!< in: pointer to data type struct */
+	const dtype_t*	type)	/*!< in: pointer to data type struct */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Gets length of field data.
 @return	length of data; UNIV_SQL_NULL if SQL null data */
@@ -73,7 +76,8 @@ UNIV_INLINE
 ulint
 dfield_get_len(
 /*===========*/
-	const dfield_t* field);	/*!< in: field */
+	const dfield_t* field)	/*!< in: field */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Sets length in a field. */
 UNIV_INLINE
@@ -81,7 +85,8 @@ void
 dfield_set_len(
 /*===========*/
 	dfield_t*	field,	/*!< in: field */
-	ulint		len);	/*!< in: length or UNIV_SQL_NULL */
+	ulint		len)	/*!< in: length or UNIV_SQL_NULL */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Determines if a field is SQL NULL
 @return	nonzero if SQL null data */
@@ -89,7 +94,8 @@ UNIV_INLINE
 ulint
 dfield_is_null(
 /*===========*/
-	const dfield_t* field);	/*!< in: field */
+	const dfield_t* field)	/*!< in: field */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Determines if a field is externally stored
 @return	nonzero if externally stored */
@@ -97,14 +103,16 @@ UNIV_INLINE
 ulint
 dfield_is_ext(
 /*==========*/
-	const dfield_t* field);	/*!< in: field */
+	const dfield_t* field)	/*!< in: field */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Sets the "external storage" flag */
 UNIV_INLINE
 void
 dfield_set_ext(
 /*===========*/
-	dfield_t*	field);	/*!< in/out: field */
+	dfield_t*	field)	/*!< in/out: field */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Sets pointer to the data and length in a field. */
 UNIV_INLINE
@@ -113,14 +121,16 @@ dfield_set_data(
 /*============*/
 	dfield_t*	field,	/*!< in: field */
 	const void*	data,	/*!< in: data */
-	ulint		len);	/*!< in: length or UNIV_SQL_NULL */
+	ulint		len)	/*!< in: length or UNIV_SQL_NULL */
+	__attribute__((nonnull(1)));
 /*********************************************************************//**
 Sets a data field to SQL NULL. */
 UNIV_INLINE
 void
 dfield_set_null(
 /*============*/
-	dfield_t*	field);	/*!< in/out: field */
+	dfield_t*	field)	/*!< in/out: field */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Writes an SQL null field full of zeros. */
 UNIV_INLINE
@@ -128,7 +138,8 @@ void
 data_write_sql_null(
 /*================*/
 	byte*	data,	/*!< in: pointer to a buffer of size len */
-	ulint	len);	/*!< in: SQL null size in bytes */
+	ulint	len)	/*!< in: SQL null size in bytes */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Copies the data and len fields. */
 UNIV_INLINE
@@ -136,7 +147,8 @@ void
 dfield_copy_data(
 /*=============*/
 	dfield_t*	field1,	/*!< out: field to copy to */
-	const dfield_t*	field2);/*!< in: field to copy from */
+	const dfield_t*	field2)	/*!< in: field to copy from */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Copies a data field to another. */
 UNIV_INLINE
@@ -144,7 +156,8 @@ void
 dfield_copy(
 /*========*/
 	dfield_t*	field1,	/*!< out: field to copy to */
-	const dfield_t*	field2);/*!< in: field to copy from */
+	const dfield_t*	field2)	/*!< in: field to copy from */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Copies the data pointed to by a data field. */
 UNIV_INLINE
@@ -152,7 +165,8 @@ void
 dfield_dup(
 /*=======*/
 	dfield_t*	field,	/*!< in/out: data field */
-	mem_heap_t*	heap);	/*!< in: memory heap where allocated */
+	mem_heap_t*	heap)	/*!< in: memory heap where allocated */
+	__attribute__((nonnull));
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Tests if two data fields are equal.
@@ -187,7 +201,8 @@ UNIV_INLINE
 ulint
 dtuple_get_n_fields(
 /*================*/
-	const dtuple_t*	tuple);	/*!< in: tuple */
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull, warn_unused_result));
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Gets nth field of a tuple.
@@ -208,7 +223,8 @@ UNIV_INLINE
 ulint
 dtuple_get_info_bits(
 /*=================*/
-	const dtuple_t*	tuple);	/*!< in: tuple */
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Sets info bits in a data tuple. */
 UNIV_INLINE
@@ -216,7 +232,8 @@ void
 dtuple_set_info_bits(
 /*=================*/
 	dtuple_t*	tuple,		/*!< in: tuple */
-	ulint		info_bits);	/*!< in: info bits */
+	ulint		info_bits)	/*!< in: info bits */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Gets number of fields used in record comparisons.
 @return	number of fields used in comparisons in rem0cmp.* */
@@ -224,7 +241,8 @@ UNIV_INLINE
 ulint
 dtuple_get_n_fields_cmp(
 /*====================*/
-	const dtuple_t*	tuple);	/*!< in: tuple */
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets number of fields used in record comparisons. */
 UNIV_INLINE
@@ -232,8 +250,9 @@ void
 dtuple_set_n_fields_cmp(
 /*====================*/
 	dtuple_t*	tuple,		/*!< in: tuple */
-	ulint		n_fields_cmp);	/*!< in: number of fields used in
+	ulint		n_fields_cmp)	/*!< in: number of fields used in
 					comparisons in rem0cmp.* */
+	__attribute__((nonnull));
 
 /* Estimate the number of bytes that are going to be allocated when
 creating a new dtuple_t object */
@@ -252,7 +271,8 @@ dtuple_create_from_mem(
 /*===================*/
 	void*	buf,		/*!< in, out: buffer to use */
 	ulint	buf_size,	/*!< in: buffer size */
-	ulint	n_fields);	/*!< in: number of fields */
+	ulint	n_fields)	/*!< in: number of fields */
+	__attribute__((nonnull, warn_unused_result));
 
 /**********************************************************//**
 Creates a data tuple to a memory heap. The default value for number
@@ -265,19 +285,8 @@ dtuple_create(
 	mem_heap_t*	heap,	/*!< in: memory heap where the tuple
 				is created, DTUPLE_EST_ALLOC(n_fields)
 				bytes will be allocated from this heap */
-	ulint		n_fields); /*!< in: number of fields */
-
-/**********************************************************//**
-Wrap data fields in a tuple. The default value for number
-of fields used in record comparisons for this tuple is n_fields.
-@return	data tuple */
-UNIV_INLINE
-const dtuple_t*
-dtuple_from_fields(
-/*===============*/
-	dtuple_t*	tuple,		/*!< in: storage for data tuple */
-	const dfield_t*	fields,		/*!< in: fields */
-	ulint		n_fields);	/*!< in: number of fields */
+	ulint		n_fields)/*!< in: number of fields */
+	__attribute__((nonnull, malloc));
 
 /*********************************************************************//**
 Sets number of fields used in a tuple. Normally this is set in
@@ -287,7 +296,8 @@ void
 dtuple_set_n_fields(
 /*================*/
 	dtuple_t*	tuple,		/*!< in: tuple */
-	ulint		n_fields);	/*!< in: number of fields */
+	ulint		n_fields)	/*!< in: number of fields */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Copies a data tuple to another.  This is a shallow copy; if a deep copy
 is desired, dfield_dup() will have to be invoked on each field.
@@ -297,8 +307,9 @@ dtuple_t*
 dtuple_copy(
 /*========*/
 	const dtuple_t*	tuple,	/*!< in: tuple to copy from */
-	mem_heap_t*	heap);	/*!< in: memory heap
+	mem_heap_t*	heap)	/*!< in: memory heap
 				where the tuple is created */
+	__attribute__((nonnull, malloc));
 /**********************************************************//**
 The following function returns the sum of data lengths of a tuple. The space
 occupied by the field structs or the tuple struct is not counted.
@@ -308,7 +319,8 @@ ulint
 dtuple_get_data_size(
 /*=================*/
 	const dtuple_t*	tuple,	/*!< in: typed data tuple */
-	ulint		comp);	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+	ulint		comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Computes the number of externally stored fields in a data tuple.
 @return	number of fields */
@@ -316,7 +328,8 @@ UNIV_INLINE
 ulint
 dtuple_get_n_ext(
 /*=============*/
-	const dtuple_t*	tuple);	/*!< in: tuple */
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull));
 /************************************************************//**
 Compare two data tuples, respecting the collation of character fields.
 @return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
@@ -326,7 +339,8 @@ int
 dtuple_coll_cmp(
 /*============*/
 	const dtuple_t*	tuple1,	/*!< in: tuple 1 */
-	const dtuple_t*	tuple2);/*!< in: tuple 2 */
+	const dtuple_t*	tuple2)	/*!< in: tuple 2 */
+	__attribute__((nonnull, warn_unused_result));
 /************************************************************//**
 Folds a prefix given as the number of fields of a tuple.
 @return	the folded value */
@@ -339,7 +353,7 @@ dtuple_fold(
 	ulint		n_bytes,/*!< in: number of bytes to fold in an
 				incomplete last field */
 	index_id_t	tree_id)/*!< in: index tree id */
-	__attribute__((pure));
+	__attribute__((nonnull, pure, warn_unused_result));
 /*******************************************************************//**
 Sets types of fields binary in a tuple. */
 UNIV_INLINE
@@ -347,7 +361,8 @@ void
 dtuple_set_types_binary(
 /*====================*/
 	dtuple_t*	tuple,	/*!< in: data tuple */
-	ulint		n);	/*!< in: number of fields to set */
+	ulint		n)	/*!< in: number of fields to set */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Checks if a dtuple contains an SQL null value.
 @return	TRUE if some field is SQL null */
@@ -355,7 +370,8 @@ UNIV_INLINE
 ibool
 dtuple_contains_null(
 /*=================*/
-	const dtuple_t*	tuple);	/*!< in: dtuple */
+	const dtuple_t*	tuple)	/*!< in: dtuple */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************//**
 Checks that a data field is typed. Asserts an error if not.
 @return	TRUE if ok */
@@ -363,7 +379,8 @@ UNIV_INTERN
 ibool
 dfield_check_typed(
 /*===============*/
-	const dfield_t*	field);	/*!< in: data field */
+	const dfield_t*	field)	/*!< in: data field */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************//**
 Checks that a data tuple is typed. Asserts an error if not.
 @return	TRUE if ok */
@@ -371,7 +388,8 @@ UNIV_INTERN
 ibool
 dtuple_check_typed(
 /*===============*/
-	const dtuple_t*	tuple);	/*!< in: tuple */
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************//**
 Checks that a data tuple is typed.
 @return	TRUE if ok */
@@ -379,7 +397,8 @@ UNIV_INTERN
 ibool
 dtuple_check_typed_no_assert(
 /*=========================*/
-	const dtuple_t*	tuple);	/*!< in: tuple */
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull, warn_unused_result));
 #ifdef UNIV_DEBUG
 /**********************************************************//**
 Validates the consistency of a tuple which must be complete, i.e,
@@ -389,7 +408,8 @@ UNIV_INTERN
 ibool
 dtuple_validate(
 /*============*/
-	const dtuple_t*	tuple);	/*!< in: tuple */
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull, warn_unused_result));
 #endif /* UNIV_DEBUG */
 /*************************************************************//**
 Pretty prints a dfield value according to its data type. */
@@ -397,7 +417,8 @@ UNIV_INTERN
 void
 dfield_print(
 /*=========*/
-	const dfield_t*	dfield);/*!< in: dfield */
+	const dfield_t*	dfield)	/*!< in: dfield */
+	__attribute__((nonnull));
 /*************************************************************//**
 Pretty prints a dfield value according to its data type. Also the hex string
 is printed if a string contains non-printable characters. */
@@ -405,7 +426,8 @@ UNIV_INTERN
 void
 dfield_print_also_hex(
 /*==================*/
-	const dfield_t*	dfield);	 /*!< in: dfield */
+	const dfield_t*	dfield)	 /*!< in: dfield */
+	__attribute__((nonnull));
 /**********************************************************//**
 The following function prints the contents of a tuple. */
 UNIV_INTERN
@@ -413,7 +435,8 @@ void
 dtuple_print(
 /*=========*/
 	FILE*		f,	/*!< in: output stream */
-	const dtuple_t*	tuple);	/*!< in: tuple */
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull));
 /**************************************************************//**
 Moves parts of long fields in entry to the big record vector so that
 the size of tuple drops below the maximum record size allowed in the
@@ -428,8 +451,9 @@ dtuple_convert_big_rec(
 /*===================*/
 	dict_index_t*	index,	/*!< in: index */
 	dtuple_t*	entry,	/*!< in/out: index entry */
-	ulint*		n_ext);	/*!< in/out: number of
+	ulint*		n_ext)	/*!< in/out: number of
 				externally stored columns */
+	__attribute__((nonnull, malloc, warn_unused_result));
 /**************************************************************//**
 Puts back to entry the data stored in vector. Note that to ensure the
 fields in entry can accommodate the data, vector must have been created
@@ -440,21 +464,23 @@ dtuple_convert_back_big_rec(
 /*========================*/
 	dict_index_t*	index,	/*!< in: index */
 	dtuple_t*	entry,	/*!< in: entry whose data was put to vector */
-	big_rec_t*	vector);/*!< in, own: big rec vector; it is
+	big_rec_t*	vector)	/*!< in, own: big rec vector; it is
 				freed in this function */
+	__attribute__((nonnull));
 /**************************************************************//**
 Frees the memory in a big rec vector. */
 UNIV_INLINE
 void
 dtuple_big_rec_free(
 /*================*/
-	big_rec_t*	vector);	/*!< in, own: big rec vector; it is
+	big_rec_t*	vector)	/*!< in, own: big rec vector; it is
 				freed in this function */
+	__attribute__((nonnull));
 
 /*######################################################################*/
 
 /** Structure for an SQL data field */
-struct dfield_struct{
+struct dfield_t{
 	void*		data;	/*!< pointer to data */
 	unsigned	ext:1;	/*!< TRUE=externally stored, FALSE=local */
 	unsigned	len:32;	/*!< data length; UNIV_SQL_NULL if SQL null */
@@ -462,7 +488,7 @@ struct dfield_struct{
 };
 
 /** Structure for an SQL data tuple of fields (logical record) */
-struct dtuple_struct {
+struct dtuple_t {
 	ulint		info_bits;	/*!< info bits of an index record:
 					the default is 0; this field is used
 					if an index record is built from
@@ -482,15 +508,13 @@ struct dtuple_struct {
 #ifdef UNIV_DEBUG
 	ulint		magic_n;	/*!< magic number, used in
 					debug assertions */
-/** Value of dtuple_struct::magic_n */
+/** Value of dtuple_t::magic_n */
 # define		DATA_TUPLE_MAGIC_N	65478679
 #endif /* UNIV_DEBUG */
 };
 
 /** A slot for a field in a big rec vector */
-typedef struct big_rec_field_struct	big_rec_field_t;
-/** A slot for a field in a big rec vector */
-struct big_rec_field_struct {
+struct big_rec_field_t {
 	ulint		field_no;	/*!< field number in record */
 	ulint		len;		/*!< stored data length, in bytes */
 	const void*	data;		/*!< stored data */
@@ -498,7 +522,7 @@ struct big_rec_field_struct {
 
 /** Storage format for overflow data in a big record, that is, a
 clustered index record which needs external storage of data fields */
-struct big_rec_struct {
+struct big_rec_t {
 	mem_heap_t*	heap;		/*!< memory heap from which
 					allocated */
 	ulint		n_fields;	/*!< number of stored fields */
diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic
index da50e91e98d..6937d55d211 100644
--- a/storage/innobase/include/data0data.ic
+++ b/storage/innobase/include/data0data.ic
@@ -54,7 +54,7 @@ void
 dfield_set_type(
 /*============*/
 	dfield_t*	field,	/*!< in: SQL data field */
-	dtype_t*	type)	/*!< in: pointer to data type struct */
+	const dtype_t*	type)	/*!< in: pointer to data type struct */
 {
 	ut_ad(field && type);
 
@@ -407,6 +407,8 @@ dtuple_create_from_mem(
 		}
 	}
 #endif
+	UNIV_MEM_ASSERT_W(tuple->fields, n_fields * sizeof *tuple->fields);
+	UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
 	return(tuple);
 }
 
@@ -434,30 +436,6 @@ dtuple_create(
 
 	tuple = dtuple_create_from_mem(buf, buf_size, n_fields);
 
-#ifdef UNIV_DEBUG
-	UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
-#endif
-
-	return(tuple);
-}
-
-/**********************************************************//**
-Wrap data fields in a tuple. The default value for number
-of fields used in record comparisons for this tuple is n_fields.
-@return	data tuple */
-UNIV_INLINE
-const dtuple_t*
-dtuple_from_fields(
-/*===============*/
-	dtuple_t*	tuple,		/*!< in: storage for data tuple */
-	const dfield_t*	fields,		/*!< in: fields */
-	ulint		n_fields)	/*!< in: number of fields */
-{
-	tuple->info_bits = 0;
-	tuple->n_fields = tuple->n_fields_cmp = n_fields;
-	tuple->fields = (dfield_t*) fields;
-	ut_d(tuple->magic_n = DATA_TUPLE_MAGIC_N);
-
 	return(tuple);
 }
 
diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h
index c7fcf316f24..111664b0b52 100644
--- a/storage/innobase/include/data0type.h
+++ b/storage/innobase/include/data0type.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,20 +33,20 @@ extern ulint	data_mysql_default_charset_coll;
 #define DATA_MYSQL_BINARY_CHARSET_COLL 63
 
 /* SQL data type struct */
-typedef struct dtype_struct		dtype_t;
+struct dtype_t;
 
 /* SQL Like operator comparison types */
-enum ib_like_enum {
+enum ib_like_t {
 	IB_LIKE_EXACT,                  /* e.g.  STRING */
 	IB_LIKE_PREFIX,                 /* e.g., STRING% */
 	IB_LIKE_SUFFIX,                 /* e.g., %STRING */
 	IB_LIKE_SUBSTR,                 /* e.g., %STRING% */
 	IB_LIKE_REGEXP                  /* Future */
 };
-typedef enum ib_like_enum               ib_like_t;
 
 /*-------------------------------------------*/
 /* The 'MAIN TYPE' of a column */
+#define DATA_MISSING	0	/* missing column */
 #define	DATA_VARCHAR	1	/* character varying of the
 				latin1_swedish_ci charset-collation; note
 				that the MySQL format for this, DATA_BINARY,
@@ -508,7 +508,7 @@ dtype_read_for_order_and_null_size()
 dtype_new_read_for_order_and_null_size()
 sym_tab_add_null_lit() */
 
-struct dtype_struct{
+struct dtype_t{
 	unsigned	prtype:32;	/*!< precise type; MySQL data
 					type, charset code, flags to
 					indicate nullability,
diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic
index a5e94a8edff..d489bef89a8 100644
--- a/storage/innobase/include/data0type.ic
+++ b/storage/innobase/include/data0type.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -556,35 +556,18 @@ dtype_get_fixed_size_low(
 		} else if (!comp) {
 			return(len);
 		} else {
-			/* We play it safe here and ask MySQL for
-			mbminlen and mbmaxlen.	Although
-			mbminlen and mbmaxlen are
-			initialized if and only if prtype
-			is (in one of the 3 functions in this file),
-			it could be that none of these functions
-			has been called. */
-
+#ifdef UNIV_DEBUG
 			ulint	i_mbminlen, i_mbmaxlen;
 
 			innobase_get_cset_width(
 				dtype_get_charset_coll(prtype),
 				&i_mbminlen, &i_mbmaxlen);
 
-			if (DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen)
-			     != mbminmaxlen) {
-
-				ut_print_timestamp(stderr);
-				fprintf(stderr, "  InnoDB: "
-					"mbminlen=%lu, "
-					"mbmaxlen=%lu, "
-					"type->mbminlen=%lu, "
-					"type->mbmaxlen=%lu\n",
-					(ulong) i_mbminlen,
-					(ulong) i_mbmaxlen,
-					(ulong) DATA_MBMINLEN(mbminmaxlen),
-					(ulong) DATA_MBMAXLEN(mbminmaxlen));
-			}
-			if (i_mbminlen == i_mbmaxlen) {
+			ut_ad(DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen)
+			      == mbminmaxlen);
+#endif /* UNIV_DEBUG */
+			if (DATA_MBMINLEN(mbminmaxlen)
+			    == DATA_MBMAXLEN(mbminmaxlen)) {
 				return(len);
 			}
 		}
diff --git a/storage/innobase/include/data0types.h b/storage/innobase/include/data0types.h
index 7d599ef2c8d..bd2bb577611 100644
--- a/storage/innobase/include/data0types.h
+++ b/storage/innobase/include/data0types.h
@@ -27,10 +27,10 @@ Created 9/21/2000 Heikki Tuuri
 #define data0types_h
 
 /* SQL data field struct */
-typedef struct dfield_struct	dfield_t;
+struct dfield_t;
 
 /* SQL data tuple struct */
-typedef struct dtuple_struct	dtuple_t;
+struct dtuple_t;
 
 #endif
 
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
index 1a3499b09e0..12e9f543e94 100644
--- a/storage/innobase/include/db0err.h
+++ b/storage/innobase/include/db0err.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,7 @@ Created 5/24/1996 Heikki Tuuri
 #define db0err_h
 
 
-enum db_err {
+enum dberr_t {
 	DB_SUCCESS_LOCKED_REC = 9,	/*!< like DB_SUCCESS, but a new
 					explicit record lock was created */
 	DB_SUCCESS = 10,
@@ -68,11 +68,14 @@ enum db_err {
 					from a table failed */
 	DB_NO_SAVEPOINT,		/*!< no savepoint exists with the given
 					name */
-	DB_TABLESPACE_ALREADY_EXISTS,	/*!< we cannot create a new single-table
+	DB_TABLESPACE_EXISTS,		/*!< we cannot create a new single-table
 					tablespace because a file of the same
 					name already exists */
-	DB_TABLESPACE_DELETED,		/*!< tablespace does not exist or is
+	DB_TABLESPACE_DELETED,		/*!< tablespace was deleted or is
 					being dropped right now */
+	DB_TABLESPACE_NOT_FOUND,	/*<! Attempt to delete a tablespace
+					instance that was not found in the
+					tablespace hash table */
 	DB_LOCK_TABLE_FULL,		/*!< lock structs have exhausted the
 					buffer pool (for big transactions,
 					InnoDB stores the lock structs in the
@@ -90,8 +93,8 @@ enum db_err {
 					work with e.g., FT indexes created by
 					a later version of the engine. */
 
-	DB_PRIMARY_KEY_IS_NULL,		/*!< a column in the PRIMARY KEY
-					was found to be NULL */
+	DB_INVALID_NULL,		/*!< a NOT NULL column was found to
+					be NULL during table rebuild */
 
 	DB_STATS_DO_NOT_EXIST,		/*!< an operation that requires the
 					persistent storage, used for recording
@@ -115,6 +118,12 @@ enum db_err {
 	DB_READ_ONLY,			/*!< Update operation attempted in
 					a read-only transaction */
 	DB_FTS_INVALID_DOCID,		/* FTS Doc ID cannot be zero */
+	DB_TABLE_IN_FK_CHECK,		/* table is being used in foreign
+					key check */
+	DB_ONLINE_LOG_TOO_BIG,		/*!< Modification log grew too big
+					during online index creation */
+
+	DB_IO_ERROR,			/*!< Generic IO error */
 
 	/* The following are partial failure codes */
 	DB_FAIL = 1000,
@@ -123,7 +132,23 @@ enum db_err {
 	DB_STRONG_FAIL,
 	DB_ZIP_OVERFLOW,
 	DB_RECORD_NOT_FOUND = 1500,
-	DB_END_OF_INDEX
+	DB_END_OF_INDEX,
+	DB_DICT_CHANGED,		/*!< Some part of table dictionary has
+					changed. Such as index dropped or
+					foreign key dropped */
+
+
+        /* The following are API only error codes. */
+	DB_DATA_MISMATCH = 2000,	/*!< Column update or read failed
+					because the types mismatch */
+
+	DB_SCHEMA_NOT_LOCKED,		/*!< If an API function expects the
+					schema to be locked in exclusive mode
+					and if it's not then that API function
+					will return this error code */
+
+	DB_NOT_FOUND			/*!< Generic error code for "Not found"
+					type of errors */
 };
 
 #endif
diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
index 364aa746638..a994c9d8ff1 100644
--- a/storage/innobase/include/dict0boot.h
+++ b/storage/innobase/include/dict0boot.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -58,6 +58,13 @@ dict_hdr_get_new_id(
 	ulint*		space_id);	/*!< out: space id
 					(not assigned if NULL) */
 /**********************************************************************//**
+Writes the current value of the row id counter to the dictionary header file
+page. */
+UNIV_INTERN
+void
+dict_hdr_flush_row_id(void);
+/*=======================*/
+/**********************************************************************//**
 Returns a new row id.
 @return	the new id */
 UNIV_INLINE
@@ -82,18 +89,32 @@ dict_sys_write_row_id(
 	row_id_t	row_id);/*!< in: row id */
 /*****************************************************************//**
 Initializes the data dictionary memory structures when the database is
-started. This function is also called when the data dictionary is created. */
+started. This function is also called when the data dictionary is created.
+@return DB_SUCCESS or error code. */
 UNIV_INTERN
-void
-dict_boot(void);
+dberr_t
+dict_boot(void)
 /*===========*/
+	__attribute__((warn_unused_result));
+
 /*****************************************************************//**
-Creates and initializes the data dictionary at the database creation. */
+Creates and initializes the data dictionary at the server bootstrap.
+@return DB_SUCCESS or error code. */
 UNIV_INTERN
-void
-dict_create(void);
+dberr_t
+dict_create(void)
 /*=============*/
+	__attribute__((warn_unused_result));
 
+/*********************************************************************//**
+Check if a table id belongs to  system table.
+@return true if the table id belongs to a system table. */
+UNIV_INLINE
+bool
+dict_is_sys_table(
+/*==============*/
+	table_id_t	id)		/*!< in: table id to check */
+	__attribute__((warn_unused_result));
 
 /* Space id and page no where the dictionary header resides */
 #define	DICT_HDR_SPACE		0	/* the SYSTEM tablespace */
@@ -273,6 +294,41 @@ enum dict_fld_sys_foreign_cols_enum {
 	DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME	= 5,
 	DICT_NUM_FIELDS__SYS_FOREIGN_COLS		= 6
 };
+/* The columns in SYS_TABLESPACES */
+enum dict_col_sys_tablespaces_enum {
+	DICT_COL__SYS_TABLESPACES__SPACE		= 0,
+	DICT_COL__SYS_TABLESPACES__NAME			= 1,
+	DICT_COL__SYS_TABLESPACES__FLAGS		= 2,
+	DICT_NUM_COLS__SYS_TABLESPACES			= 3
+};
+/* The field numbers in the SYS_TABLESPACES clustered index */
+enum dict_fld_sys_tablespaces_enum {
+	DICT_FLD__SYS_TABLESPACES__SPACE		= 0,
+	DICT_FLD__SYS_TABLESPACES__DB_TRX_ID		= 1,
+	DICT_FLD__SYS_TABLESPACES__DB_ROLL_PTR		= 2,
+	DICT_FLD__SYS_TABLESPACES__NAME			= 3,
+	DICT_FLD__SYS_TABLESPACES__FLAGS		= 4,
+	DICT_NUM_FIELDS__SYS_TABLESPACES		= 5
+};
+/* The columns in SYS_DATAFILES */
+enum dict_col_sys_datafiles_enum {
+	DICT_COL__SYS_DATAFILES__SPACE			= 0,
+	DICT_COL__SYS_DATAFILES__PATH			= 1,
+	DICT_NUM_COLS__SYS_DATAFILES			= 2
+};
+/* The field numbers in the SYS_DATAFILES clustered index */
+enum dict_fld_sys_datafiles_enum {
+	DICT_FLD__SYS_DATAFILES__SPACE			= 0,
+	DICT_FLD__SYS_DATAFILES__DB_TRX_ID		= 1,
+	DICT_FLD__SYS_DATAFILES__DB_ROLL_PTR		= 2,
+	DICT_FLD__SYS_DATAFILES__PATH			= 3,
+	DICT_NUM_FIELDS__SYS_DATAFILES			= 4
+};
+
+/* A number of the columns above occur in multiple tables.  These are the
+length of thos fields. */
+#define	DICT_FLD_LEN_SPACE	4
+#define	DICT_FLD_LEN_FLAGS	4
 
 /* When a row id which is zero modulo this number (which must be a power of
 two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
diff --git a/storage/innobase/include/dict0boot.ic b/storage/innobase/include/dict0boot.ic
index 0f660ab7555..2b156a4f672 100644
--- a/storage/innobase/include/dict0boot.ic
+++ b/storage/innobase/include/dict0boot.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,15 +24,6 @@ Created 4/18/1996 Heikki Tuuri
 *******************************************************/
 
 /**********************************************************************//**
-Writes the current value of the row id counter to the dictionary header file
-page. */
-UNIV_INTERN
-void
-dict_hdr_flush_row_id(void);
-/*=======================*/
-
-
-/**********************************************************************//**
 Returns a new row id.
 @return	the new id */
 UNIV_INLINE
@@ -90,4 +81,16 @@ dict_sys_write_row_id(
 	mach_write_to_6(field, row_id);
 }
 
+/*********************************************************************//**
+Check if a table id belongs to  system table.
+@return true if the table id belongs to a system table. */
+UNIV_INLINE
+bool
+dict_is_sys_table(
+/*==============*/
+	table_id_t	id)		/*!< in: table id to check */
+{
+	return(id < DICT_HDR_FIRST_ID);
+}
+
 
diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
index 68fc9ba195a..217da0142ee 100644
--- a/storage/innobase/include/dict0crea.h
+++ b/storage/innobase/include/dict0crea.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -42,7 +42,9 @@ tab_create_graph_create(
 /*====================*/
 	dict_table_t*	table,	/*!< in: table to create, built as a memory data
 				structure */
-	mem_heap_t*	heap);	/*!< in: heap where created */
+	mem_heap_t*	heap,	/*!< in: heap where created */
+	bool		commit);/*!< in: true if the commit node should be
+				added to the query graph */
 /*********************************************************************//**
 Creates an index create graph.
 @return	own: index create node */
@@ -52,7 +54,9 @@ ind_create_graph_create(
 /*====================*/
 	dict_index_t*	index,	/*!< in: index to create, built as a memory data
 				structure */
-	mem_heap_t*	heap);	/*!< in: heap where created */
+	mem_heap_t*	heap,	/*!< in: heap where created */
+	bool		commit);/*!< in: true if the commit node should be
+				added to the query graph */
 /***********************************************************//**
 Creates a table. This is a high-level function used in SQL execution graphs.
 @return	query thread to run next or NULL */
@@ -99,11 +103,11 @@ dict_drop_index_tree(
 	mtr_t*	mtr);	/*!< in: mtr having the latch on the record page */
 /****************************************************************//**
 Creates the foreign key constraints system tables inside InnoDB
-at database creation or database start if they are not found or are
+at server bootstrap or server start if they are not found or are
 not of the right form.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 dict_create_or_check_foreign_constraint_tables(void);
 /*================================================*/
 /********************************************************************//**
@@ -115,7 +119,7 @@ given locally for this table, that is, the number is not global, as in the
 old format constraints < 4.0.18 it used to be.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 dict_create_add_foreigns_to_dictionary(
 /*===================================*/
 	ulint		start_id,/*!< in: if we are actually doing ALTER TABLE
@@ -127,11 +131,56 @@ dict_create_add_foreigns_to_dictionary(
 				so far has no constraints for which the name
 				was generated here */
 	dict_table_t*	table,	/*!< in: table */
-	trx_t*		trx);	/*!< in: transaction */
+	trx_t*		trx)	/*!< in: transaction */
+	__attribute__((nonnull, warn_unused_result));
+/****************************************************************//**
+Creates the tablespaces and datafiles system tables inside InnoDB
+at server bootstrap or server start if they are not found or are
+not of the right form.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_create_or_check_sys_tablespace(void);
+/*=====================================*/
+/********************************************************************//**
+Add a single tablespace definition to the data dictionary tables in the
+database.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_tablespace_to_dictionary(
+/*=====================================*/
+	ulint		space,		/*!< in: tablespace id */
+	const char*	name,		/*!< in: tablespace name */
+	ulint		flags,		/*!< in: tablespace flags */
+	const char*	path,		/*!< in: tablespace path */
+	trx_t*		trx,		/*!< in: transaction */
+	bool		commit);	/*!< in: if true then commit the
+					transaction */
+/********************************************************************//**
+Table create node structure */
 
-/* Table create node structure */
+/********************************************************************//**
+Add a single foreign key definition to the data dictionary tables in the
+database. We also generate names to constraints that were not named by the
+user. A generated constraint has a name of the format
+databasename/tablename_ibfk_NUMBER, where the numbers start from 1, and
+are given locally for this table, that is, the number is not global, as in
+the old format constraints < 4.0.18 it used to be.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_foreign_to_dictionary(
+/*==================================*/
+	ulint*		id_nr,	/*!< in/out: number to use in id generation;
+				incremented if used */
+	dict_table_t*	table,	/*!< in: table */
+	dict_foreign_t*	foreign,/*!< in: foreign */
+	trx_t*		trx)	/*!< in/out: dictionary transaction */
+	__attribute__((nonnull, warn_unused_result));
 
-struct tab_node_struct{
+/* Table create node structure */
+struct tab_node_t{
 	que_common_t	common;	/*!< node type: QUE_NODE_TABLE_CREATE */
 	dict_table_t*	table;	/*!< table to create, built as a memory data
 				structure with dict_mem_... functions */
@@ -160,7 +209,7 @@ struct tab_node_struct{
 
 /* Index create node struct */
 
-struct ind_node_struct{
+struct ind_node_t{
 	que_common_t	common;	/*!< node type: QUE_NODE_INDEX_CREATE */
 	dict_index_t*	index;	/*!< index to create, built as a memory data
 				structure with dict_mem_... functions */
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 68008f95c2f..af0a5b31cc4 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -40,6 +41,7 @@ Created 1/8/1996 Heikki Tuuri
 #include "ut0rnd.h"
 #include "ut0byte.h"
 #include "trx0types.h"
+#include "row0types.h"
 
 #ifndef UNIV_HOTBACKUP
 # include "sync0sync.h"
@@ -50,7 +52,8 @@ UNIV_INTERN
 void
 dict_casedn_str(
 /*============*/
-	char*	a);	/*!< in/out: string to put in lower case */
+	char*	a)	/*!< in/out: string to put in lower case */
+	__attribute__((nonnull));
 /********************************************************************//**
 Get the database name length in a table name.
 @return	database name length */
@@ -58,17 +61,53 @@ UNIV_INTERN
 ulint
 dict_get_db_name_len(
 /*=================*/
-	const char*	name);	/*!< in: table name in the form
+	const char*	name)	/*!< in: table name in the form
 				dbname '/' tablename */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Open a table from its database and table name, this is currently used by
+foreign constraint parser to get the referenced table.
+@return complete table name with database and table name, allocated from
+heap memory passed in */
+UNIV_INTERN
+char*
+dict_get_referenced_table(
+/*======================*/
+	const char*	name,		/*!< in: foreign key table name */
+	const char*	database_name,	/*!< in: table db name */
+	ulint		database_name_len,/*!< in: db name length */
+	const char*	table_name,	/*!< in: table name */
+	ulint		table_name_len,	/*!< in: table name length */
+	dict_table_t**	table,		/*!< out: table object or NULL */
+	mem_heap_t*	heap);		/*!< in: heap memory */
+/*********************************************************************//**
+Frees a foreign key struct. */
+UNIV_INTERN
+void
+dict_foreign_free(
+/*==============*/
+	dict_foreign_t*	foreign);	/*!< in, own: foreign key struct */
+/*********************************************************************//**
+Finds the highest [number] for foreign key constraints of the table. Looks
+only at the >= 4.0.18-format id's, which are of the form
+databasename/tablename_ibfk_[number].
+@return highest number, 0 if table has no new format foreign key constraints */
+UNIV_INTERN
+ulint
+dict_table_get_highest_foreign_id(
+/*==============================*/
+	dict_table_t*	table);		/*!< in: table in the dictionary
+					memory cache */
 /********************************************************************//**
 Return the end of table name where we have removed dbname and '/'.
 @return	table name */
-
+UNIV_INTERN
 const char*
 dict_remove_db_name(
 /*================*/
-	const char*	name);	/*!< in: table name in the form
+	const char*	name)	/*!< in: table name in the form
 				dbname '/' tablename */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************************//**
 Returns a table object based on table id.
 @return	table, NULL if does not exist */
@@ -77,7 +116,11 @@ dict_table_t*
 dict_table_open_on_id(
 /*==================*/
 	table_id_t	table_id,	/*!< in: table id */
-	ibool		dict_locked);	/*!< in: TRUE=data dictionary locked */
+	ibool		dict_locked,	/*!< in: TRUE=data dictionary locked */
+	ibool		try_drop)	/*!< in: TRUE=try to drop any orphan
+					indexes after an aborted online
+					index creation */
+	__attribute__((warn_unused_result));
 /********************************************************************//**
 Decrements the count of open handles to a table. */
 UNIV_INTERN
@@ -85,7 +128,11 @@ void
 dict_table_close(
 /*=============*/
 	dict_table_t*	table,		/*!< in/out: table */
-	ibool		dict_locked);	/*!< in: TRUE=data dictionary locked */
+	ibool		dict_locked,	/*!< in: TRUE=data dictionary locked */
+	ibool		try_drop)	/*!< in: TRUE=try to drop any orphan
+					indexes after an aborted online
+					index creation */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Inits the data dictionary module. */
 UNIV_INTERN
@@ -109,7 +156,8 @@ UNIV_INLINE
 ulint
 dict_col_get_mbminlen(
 /*==================*/
-	const dict_col_t*	col);	/*!< in: column */
+	const dict_col_t*	col)	/*!< in: column */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets the maximum number of bytes per character.
 @return maximum multi-byte char size, in bytes */
@@ -117,7 +165,8 @@ UNIV_INLINE
 ulint
 dict_col_get_mbmaxlen(
 /*==================*/
-	const dict_col_t*	col);	/*!< in: column */
+	const dict_col_t*	col)	/*!< in: column */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Sets the minimum and maximum number of bytes per character. */
 UNIV_INLINE
@@ -127,8 +176,9 @@ dict_col_set_mbminmaxlen(
 	dict_col_t*	col,		/*!< in/out: column */
 	ulint		mbminlen,	/*!< in: minimum multi-byte
 					character size, in bytes */
-	ulint		mbmaxlen);	/*!< in: minimum multi-byte
+	ulint		mbmaxlen)	/*!< in: minimum multi-byte
 					character size, in bytes */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Gets the column data type. */
 UNIV_INLINE
@@ -136,7 +186,8 @@ void
 dict_col_copy_type(
 /*===============*/
 	const dict_col_t*	col,	/*!< in: column */
-	dtype_t*		type);	/*!< out: data type */
+	dtype_t*		type)	/*!< out: data type */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Determine bytes of column prefix to be stored in the undo log. Please
 note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
@@ -147,9 +198,9 @@ ulint
 dict_max_field_len_store_undo(
 /*==========================*/
 	dict_table_t*		table,	/*!< in: table */
-	const dict_col_t*	col);	/*!< in: column which index prefix
+	const dict_col_t*	col)	/*!< in: column which index prefix
 					is based on */
-
+	__attribute__((nonnull, warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
@@ -160,7 +211,8 @@ ibool
 dict_col_type_assert_equal(
 /*=======================*/
 	const dict_col_t*	col,	/*!< in: column */
-	const dtype_t*		type);	/*!< in: data type */
+	const dtype_t*		type)	/*!< in: data type */
+	__attribute__((nonnull, warn_unused_result));
 #endif /* UNIV_DEBUG */
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
@@ -170,7 +222,8 @@ UNIV_INLINE
 ulint
 dict_col_get_min_size(
 /*==================*/
-	const dict_col_t*	col);	/*!< in: column */
+	const dict_col_t*	col)	/*!< in: column */
+	__attribute__((nonnull, warn_unused_result));
 /***********************************************************************//**
 Returns the maximum size of the column.
 @return	maximum size */
@@ -178,7 +231,8 @@ UNIV_INLINE
 ulint
 dict_col_get_max_size(
 /*==================*/
-	const dict_col_t*	col);	/*!< in: column */
+	const dict_col_t*	col)	/*!< in: column */
+	__attribute__((nonnull, warn_unused_result));
 /***********************************************************************//**
 Returns the size of a fixed size column, 0 if not a fixed size column.
 @return	fixed size, or 0 */
@@ -187,7 +241,8 @@ ulint
 dict_col_get_fixed_size(
 /*====================*/
 	const dict_col_t*	col,	/*!< in: column */
-	ulint			comp);	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+	ulint			comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+	__attribute__((nonnull, warn_unused_result));
 /***********************************************************************//**
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
 For fixed length types it is the fixed length of the type, otherwise 0.
@@ -197,8 +252,8 @@ ulint
 dict_col_get_sql_null_size(
 /*=======================*/
 	const dict_col_t*	col,	/*!< in: column */
-	ulint			comp);	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
-
+	ulint			comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets the column number.
 @return	col->ind, table column position (starting from 0) */
@@ -206,7 +261,8 @@ UNIV_INLINE
 ulint
 dict_col_get_no(
 /*============*/
-	const dict_col_t*	col);	/*!< in: column */
+	const dict_col_t*	col)	/*!< in: column */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets the column position in the clustered index. */
 UNIV_INLINE
@@ -214,7 +270,8 @@ ulint
 dict_col_get_clust_pos(
 /*===================*/
 	const dict_col_t*	col,		/*!< in: table column */
-	const dict_index_t*	clust_index);	/*!< in: clustered index */
+	const dict_index_t*	clust_index)	/*!< in: clustered index */
+	__attribute__((nonnull, warn_unused_result));
 /****************************************************************//**
 If the given column name is reserved for InnoDB system columns, return
 TRUE.
@@ -223,14 +280,16 @@ UNIV_INTERN
 ibool
 dict_col_name_is_reserved(
 /*======================*/
-	const char*	name);	/*!< in: column name */
+	const char*	name)	/*!< in: column name */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Acquire the autoinc lock. */
 UNIV_INTERN
 void
 dict_table_autoinc_lock(
 /*====================*/
-	dict_table_t*	table);	/*!< in/out: table */
+	dict_table_t*	table)	/*!< in/out: table */
+	__attribute__((nonnull));
 /********************************************************************//**
 Unconditionally set the autoinc counter. */
 UNIV_INTERN
@@ -238,7 +297,8 @@ void
 dict_table_autoinc_initialize(
 /*==========================*/
 	dict_table_t*	table,	/*!< in/out: table */
-	ib_uint64_t	value);	/*!< in: next value to assign to a row */
+	ib_uint64_t	value)	/*!< in: next value to assign to a row */
+	__attribute__((nonnull));
 /********************************************************************//**
 Reads the next autoinc value (== autoinc counter value), 0 if not yet
 initialized.
@@ -247,7 +307,8 @@ UNIV_INTERN
 ib_uint64_t
 dict_table_autoinc_read(
 /*====================*/
-	const dict_table_t*	table);	/*!< in: table */
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Updates the autoinc counter if the value supplied is greater than the
 current value. */
@@ -257,14 +318,16 @@ dict_table_autoinc_update_if_greater(
 /*=================================*/
 
 	dict_table_t*	table,	/*!< in/out: table */
-	ib_uint64_t	value);	/*!< in: value which was assigned to a row */
+	ib_uint64_t	value)	/*!< in: value which was assigned to a row */
+	__attribute__((nonnull));
 /********************************************************************//**
 Release the autoinc lock. */
 UNIV_INTERN
 void
 dict_table_autoinc_unlock(
 /*======================*/
-	dict_table_t*	table);	/*!< in/out: table */
+	dict_table_t*	table)	/*!< in/out: table */
+	__attribute__((nonnull));
 #endif /* !UNIV_HOTBACKUP */
 /**********************************************************************//**
 Adds system columns to a table object. */
@@ -273,7 +336,8 @@ void
 dict_table_add_system_columns(
 /*==========================*/
 	dict_table_t*	table,	/*!< in/out: table */
-	mem_heap_t*	heap);	/*!< in: temporary heap */
+	mem_heap_t*	heap)	/*!< in: temporary heap */
+	__attribute__((nonnull));
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Adds a table object to the dictionary cache. */
@@ -283,26 +347,30 @@ dict_table_add_to_cache(
 /*====================*/
 	dict_table_t*	table,		/*!< in: table */
 	ibool		can_be_evicted,	/*!< in: TRUE if can be evicted*/
-	mem_heap_t*	heap);		/*!< in: temporary heap */
+	mem_heap_t*	heap)		/*!< in: temporary heap */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Removes a table object from the dictionary cache. */
 UNIV_INTERN
 void
 dict_table_remove_from_cache(
 /*=========================*/
-	dict_table_t*	table);	/*!< in, own: table */
+	dict_table_t*	table)	/*!< in, own: table */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Renames a table object.
 @return	TRUE if success */
 UNIV_INTERN
-ibool
+dberr_t
 dict_table_rename_in_cache(
 /*=======================*/
 	dict_table_t*	table,		/*!< in/out: table */
 	const char*	new_name,	/*!< in: new name */
-	ibool		rename_also_foreigns);/*!< in: in ALTER TABLE we want
+	ibool		rename_also_foreigns)
+					/*!< in: in ALTER TABLE we want
 					to preserve the original table name
 					in constraints which reference it */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************************//**
 Removes an index from the dictionary cache. */
 UNIV_INTERN
@@ -310,7 +378,8 @@ void
 dict_index_remove_from_cache(
 /*=========================*/
 	dict_table_t*	table,	/*!< in/out: table */
-	dict_index_t*	index);	/*!< in, own: index */
+	dict_index_t*	index)	/*!< in, own: index */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Change the id of a table object in the dictionary cache. This is used in
 DISCARD TABLESPACE. */
@@ -319,7 +388,16 @@ void
 dict_table_change_id_in_cache(
 /*==========================*/
 	dict_table_t*	table,	/*!< in/out: table object already in cache */
-	table_id_t	new_id);/*!< in: new id to set */
+	table_id_t	new_id)	/*!< in: new id to set */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Removes a foreign constraint struct from the dictionary cache. */
+UNIV_INTERN
+void
+dict_foreign_remove_from_cache(
+/*===========================*/
+	dict_foreign_t*	foreign)	/*!< in, own: foreign constraint */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Adds a foreign key constraint object to the dictionary cache. May free
 the object if there already is an object with the same identifier in.
@@ -327,12 +405,13 @@ At least one of foreign table or referenced table must already be in
 the dictionary cache!
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 dict_foreign_add_to_cache(
 /*======================*/
 	dict_foreign_t*	foreign,	/*!< in, own: foreign key constraint */
-	ibool		check_charsets);/*!< in: TRUE=check charset
+	ibool		check_charsets)	/*!< in: TRUE=check charset
 					compatibility */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Check if the index is referenced by a foreign key, if TRUE return the
 matching instance NULL otherwise.
@@ -343,7 +422,8 @@ dict_foreign_t*
 dict_table_get_referenced_constraint(
 /*=================================*/
 	dict_table_t*	table,	/*!< in: InnoDB table */
-	dict_index_t*	index);	/*!< in: InnoDB index */
+	dict_index_t*	index)	/*!< in: InnoDB index */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Checks if a table is referenced by foreign keys.
 @return	TRUE if table is referenced by a foreign key */
@@ -351,17 +431,19 @@ UNIV_INTERN
 ibool
 dict_table_is_referenced_by_foreign_key(
 /*====================================*/
-	const dict_table_t*	table);	/*!< in: InnoDB table */
+	const dict_table_t*	table)	/*!< in: InnoDB table */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************************//**
-Replace the index in the foreign key list that matches this index's
-definition with an equivalent index. */
+Replace the index passed in with another equivalent index in the
+foreign key lists of the table. */
 UNIV_INTERN
 void
-dict_table_replace_index_in_foreign_list(
-/*=====================================*/
-	dict_table_t*	table,  /*!< in/out: table */
-	dict_index_t*	index,	/*!< in: index to be replaced */
-	const trx_t*	trx);	/*!< in: transaction handle */
+dict_foreign_replace_index(
+/*=======================*/
+	dict_table_t*		table,  /*!< in/out: table */
+	const dict_index_t*	index,	/*!< in: index to be replaced */
+	const trx_t*		trx)	/*!< in: transaction handle */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Determines whether a string starts with the specified keyword.
 @return TRUE if str starts with keyword */
@@ -369,9 +451,10 @@ UNIV_INTERN
 ibool
 dict_str_starts_with_keyword(
 /*=========================*/
-	void*		mysql_thd,	/*!< in: MySQL thread handle */
+	THD*		thd,		/*!< in: MySQL thread handle */
 	const char*	str,		/*!< in: string to scan for keyword */
-	const char*	keyword);	/*!< in: keyword to look for */
+	const char*	keyword)	/*!< in: keyword to look for */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Checks if a index is defined for a foreign key constraint. Index is a part
 of a foreign key constraint if the index is referenced by foreign key
@@ -383,7 +466,8 @@ dict_foreign_t*
 dict_table_get_foreign_constraint(
 /*==============================*/
 	dict_table_t*	table,	/*!< in: InnoDB table */
-	dict_index_t*	index);	/*!< in: InnoDB index */
+	dict_index_t*	index)	/*!< in: InnoDB index */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Scans a table create SQL string and adds to the data dictionary
 the foreign key constraints declared in the string. This function
@@ -393,7 +477,7 @@ bot participating tables. The indexes are allowed to contain more
 fields than mentioned in the constraint.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 dict_create_foreign_constraints(
 /*============================*/
 	trx_t*		trx,		/*!< in: transaction */
@@ -409,15 +493,16 @@ dict_create_foreign_constraints(
 	const char*	name,		/*!< in: table full name in the
 					normalized form
 					database_name/table_name */
-	ibool		reject_fks);	/*!< in: if TRUE, fail with error
+	ibool		reject_fks)	/*!< in: if TRUE, fail with error
 					code DB_CANNOT_ADD_CONSTRAINT if
 					any foreign keys are found. */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************************//**
 Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
 @return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
 constraint id does not match */
 UNIV_INTERN
-ulint
+dberr_t
 dict_foreign_parse_drop_constraints(
 /*================================*/
 	mem_heap_t*	heap,			/*!< in: heap from which we can
@@ -426,8 +511,9 @@ dict_foreign_parse_drop_constraints(
 	dict_table_t*	table,			/*!< in: table */
 	ulint*		n,			/*!< out: number of constraints
 						to drop */
-	const char***	constraints_to_drop);	/*!< out: id's of the
+	const char***	constraints_to_drop)	/*!< out: id's of the
 						constraints to drop */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************************//**
 Returns a table object and increments its open handle count.
 NOTE! This is a high-level function to be used mainly from outside the
@@ -439,43 +525,40 @@ dict_table_t*
 dict_table_open_on_name(
 /*====================*/
 	const char*	table_name,	/*!< in: table name */
-	ibool		dict_locked);	/*!< in: TRUE=data dictionary locked */
-
-/**********************************************************************//**
-Returns a table object and increment its open handle count. Table
-statistics will not be updated if they are not initialized.
-Call this function when dropping a table.
-@return	table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_name_no_stats(
-/*=============================*/
-	const char*	table_name,	/*!< in: table name */
 	ibool		dict_locked,	/*!< in: TRUE=data dictionary locked */
+	ibool		try_drop,	/*!< in: TRUE=try to drop any orphan
+					indexes after an aborted online
+					index creation */
 	dict_err_ignore_t
-			ignore_err);	/*!< in: error to be ignored when
+			ignore_err)	/*!< in: error to be ignored when
 					loading the table */
-/**********************************************************************//**
-Find an index that is equivalent to the one passed in and is not marked
-for deletion.
-@return	index equivalent to foreign->foreign_index, or NULL */
-UNIV_INTERN
-dict_index_t*
-dict_foreign_find_equiv_index(
-/*==========================*/
-	dict_foreign_t*	foreign);/*!< in: foreign key */
-/**********************************************************************//**
-Returns an index object by matching on the name and column names and
-if more than one index matches return the index with the max id
+	__attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
+Tries to find an index whose first fields are the columns in the array,
+in the same order and is not marked for deletion and is not the same
+as types_idx.
 @return	matching index, NULL if not found */
 UNIV_INTERN
 dict_index_t*
-dict_table_get_index_by_max_id(
-/*===========================*/
-	dict_table_t*	table,	/*!< in: table */
-	const char*	name,	/*!< in: the index name to find */
-	const char**	columns,/*!< in: array of column names */
-	ulint		n_cols);/*!< in: number of columns */
+dict_foreign_find_index(
+/*====================*/
+	const dict_table_t*	table,	/*!< in: table */
+	const char**		columns,/*!< in: array of column names */
+	ulint			n_cols,	/*!< in: number of columns */
+	const dict_index_t*	types_idx,
+					/*!< in: NULL or an index
+					whose types the column types
+					must match */
+	ibool			check_charsets,
+					/*!< in: whether to check
+					charsets.  only has an effect
+					if types_idx != NULL */
+	ulint			check_null)
+					/*!< in: nonzero if none of
+					the columns must be declared
+					NOT NULL */
+	__attribute__((nonnull(1,2), warn_unused_result));
 /**********************************************************************//**
 Returns a column's name.
 @return column name. NOTE: not guaranteed to stay valid if table is
@@ -485,29 +568,16 @@ const char*
 dict_table_get_col_name(
 /*====================*/
 	const dict_table_t*	table,	/*!< in: table */
-	ulint			col_nr);/*!< in: column number */
-
+	ulint			col_nr)	/*!< in: column number */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************************//**
-Prints a table definition. */
+Prints a table data. */
 UNIV_INTERN
 void
 dict_table_print(
 /*=============*/
-	dict_table_t*	table);	/*!< in: table */
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print_low(
-/*=================*/
-	dict_table_t*	table);	/*!< in: table */
-/**********************************************************************//**
-Prints a table data when we know the table name. */
-UNIV_INTERN
-void
-dict_table_print_by_name(
-/*=====================*/
-	const char*	name);	/*!< in: table name */
+	dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Outputs info on foreign keys of a table. */
 UNIV_INTERN
@@ -520,7 +590,8 @@ dict_print_info_on_foreign_keys(
 				of SHOW TABLE STATUS */
 	FILE*		file,	/*!< in: file where to print */
 	trx_t*		trx,	/*!< in: transaction */
-	dict_table_t*	table);	/*!< in: table */
+	dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Outputs info on a foreign key of a table in a format suitable for
 CREATE TABLE. */
@@ -531,7 +602,8 @@ dict_print_info_on_foreign_key_in_create_format(
 	FILE*		file,		/*!< in: file where to print */
 	trx_t*		trx,		/*!< in: transaction */
 	dict_foreign_t*	foreign,	/*!< in: foreign key constraint */
-	ibool		add_newline);	/*!< in: whether to add a newline */
+	ibool		add_newline)	/*!< in: whether to add a newline */
+	__attribute__((nonnull(1,3)));
 /********************************************************************//**
 Displays the names of the index and the table. */
 UNIV_INTERN
@@ -539,8 +611,35 @@ void
 dict_index_name_print(
 /*==================*/
 	FILE*			file,	/*!< in: output stream */
-	trx_t*			trx,	/*!< in: transaction */
-	const dict_index_t*	index);	/*!< in: index to print */
+	const trx_t*		trx,	/*!< in: transaction */
+	const dict_index_t*	index)	/*!< in: index to print */
+	__attribute__((nonnull(1,3)));
+/*********************************************************************//**
+Tries to find an index whose first fields are the columns in the array,
+in the same order and is not marked for deletion and is not the same
+as types_idx.
+@return	matching index, NULL if not found */
+UNIV_INTERN
+bool
+dict_foreign_qualify_index(
+/*====================*/
+	const dict_table_t*	table,	/*!< in: table */
+	const char**		columns,/*!< in: array of column names */
+	ulint			n_cols,	/*!< in: number of columns */
+	const dict_index_t*	index,	/*!< in: index to check */
+	const dict_index_t*	types_idx,
+					/*!< in: NULL or an index
+					whose types the column types
+					must match */
+	ibool			check_charsets,
+					/*!< in: whether to check
+					charsets.  only has an effect
+					if types_idx != NULL */
+	ulint			check_null)
+					/*!< in: nonzero if none of
+					the columns must be declared
+					NOT NULL */
+	__attribute__((nonnull(1,2), warn_unused_result));
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the first index on the table (the clustered index).
@@ -549,7 +648,17 @@ UNIV_INLINE
 dict_index_t*
 dict_table_get_first_index(
 /*=======================*/
-	const dict_table_t*	table);	/*!< in: table */
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Gets the last index on the table.
+@return	index, NULL if none exists */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_last_index(
+/*=======================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Gets the next index on the table.
 @return	index, NULL if none left */
@@ -557,9 +666,11 @@ UNIV_INLINE
 dict_index_t*
 dict_table_get_next_index(
 /*======================*/
-	const dict_index_t*	index);	/*!< in: index */
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
 #else /* UNIV_DEBUG */
 # define dict_table_get_first_index(table) UT_LIST_GET_FIRST((table)->indexes)
+# define dict_table_get_last_index(table) UT_LIST_GET_LAST((table)->indexes)
 # define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index)
 #endif /* UNIV_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
@@ -605,15 +716,6 @@ dict_index_is_ibuf(
 	const dict_index_t*	index)	/*!< in: index */
 	__attribute__((nonnull, pure, warn_unused_result));
 /********************************************************************//**
-Check whether the index is an universal index tree.
-@return	nonzero for universal tree, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_univ(
-/*===============*/
-	const dict_index_t*	index)	/*!< in: index */
-	__attribute__((nonnull, pure, warn_unused_result));
-/********************************************************************//**
 Check whether the index is a secondary index or the insert buffer tree.
 @return	nonzero for insert buffer, zero for other indexes */
 UNIV_INLINE
@@ -626,13 +728,14 @@ dict_index_is_sec_or_ibuf(
 /************************************************************************
 Gets the all the FTS indexes for the table. NOTE: must not be called for
 tables which do not have an FTS-index. */
-
+UNIV_INTERN
 ulint
 dict_table_get_all_fts_indexes(
 /*===========================*/
 				/* out: number of indexes collected */
 	dict_table_t*	table,	/* in: table */
-	ib_vector_t*	indexes);/* out: vector for collecting FTS indexes */
+	ib_vector_t*	indexes)/* out: vector for collecting FTS indexes */
+	__attribute__((nonnull));
 /********************************************************************//**
 Gets the number of user-defined columns in a table in the dictionary
 cache.
@@ -662,6 +765,35 @@ dict_table_get_n_cols(
 /*==================*/
 	const dict_table_t*	table)	/*!< in: table */
 	__attribute__((nonnull, pure, warn_unused_result));
+/********************************************************************//**
+Gets the approximately estimated number of rows in the table.
+@return	estimated number of rows */
+UNIV_INLINE
+ib_uint64_t
+dict_table_get_n_rows(
+/*==================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Increment the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_inc(
+/*==================*/
+	dict_table_t*	table)	/*!< in/out: table */
+	__attribute__((nonnull));
+/********************************************************************//**
+Decrement the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_dec(
+/*==================*/
+	dict_table_t*	table)	/*!< in/out: table */
+	__attribute__((nonnull));
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the nth column of a table.
@@ -671,7 +803,8 @@ dict_col_t*
 dict_table_get_nth_col(
 /*===================*/
 	const dict_table_t*	table,	/*!< in: table */
-	ulint			pos);	/*!< in: position of column */
+	ulint			pos)	/*!< in: position of column */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Gets the given system column of a table.
 @return	pointer to column object */
@@ -680,7 +813,8 @@ dict_col_t*
 dict_table_get_sys_col(
 /*===================*/
 	const dict_table_t*	table,	/*!< in: table */
-	ulint			sys);	/*!< in: DATA_ROW_ID, ... */
+	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
+	__attribute__((nonnull, warn_unused_result));
 #else /* UNIV_DEBUG */
 #define dict_table_get_nth_col(table, pos) \
 ((table)->cols + (pos))
@@ -695,7 +829,8 @@ ulint
 dict_table_get_sys_col_no(
 /*======================*/
 	const dict_table_t*	table,	/*!< in: table */
-	ulint			sys);	/*!< in: DATA_ROW_ID, ... */
+	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
+	__attribute__((nonnull, warn_unused_result));
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
 Returns the minimum data size of an index record.
@@ -704,7 +839,8 @@ UNIV_INLINE
 ulint
 dict_index_get_min_size(
 /*====================*/
-	const dict_index_t*	index);	/*!< in: index */
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /********************************************************************//**
 Check whether the table uses the compact page format.
@@ -713,7 +849,8 @@ UNIV_INLINE
 ibool
 dict_table_is_comp(
 /*===============*/
-	const dict_table_t*	table);	/*!< in: table */
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Determine the file format of a table.
 @return	file format version */
@@ -721,7 +858,8 @@ UNIV_INLINE
 ulint
 dict_table_get_format(
 /*==================*/
-	const dict_table_t*	table);	/*!< in: table */
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Determine the file format from a dict_table_t::flags.
 @return	file format version */
@@ -729,7 +867,8 @@ UNIV_INLINE
 ulint
 dict_tf_get_format(
 /*===============*/
-	ulint		flags);		/*!< in: dict_table_t::flags */
+	ulint		flags)		/*!< in: dict_table_t::flags */
+	__attribute__((warn_unused_result));
 /********************************************************************//**
 Set the various values in a dict_table_t::flags pointer. */
 UNIV_INLINE
@@ -738,7 +877,9 @@ dict_tf_set(
 /*========*/
 	ulint*		flags,		/*!< in/out: table */
 	rec_format_t	format,		/*!< in: file format */
-	ulint		zip_ssize);	/*!< in: zip shift size */
+	ulint		zip_ssize,	/*!< in: zip shift size */
+	bool		remote_path)	/*!< in: table uses DATA DIRECTORY */
+	__attribute__((nonnull));
 /********************************************************************//**
 Convert a 32 bit integer table flags to the 32 bit integer that is
 written into the tablespace header at the offset FSP_SPACE_FLAGS and is
@@ -756,13 +897,6 @@ dict_tf_to_fsp_flags(
 /*=================*/
 	ulint	flags)	/*!< in: dict_table_t::flags */
 	__attribute__((const));
-/********************************************************************/
-UNIV_INLINE
-ulint
-dict_tf_to_sys_tables_type(
-/*=======================*/
-	ulint	flags)	/*!< in: dict_table_t::flags */
-	__attribute__((const));
 /********************************************************************//**
 Extract the compressed page size from table flags.
 @return	compressed page size, or 0 if not compressed */
@@ -779,7 +913,8 @@ UNIV_INLINE
 ulint
 dict_table_zip_size(
 /*================*/
-	const dict_table_t*	table);	/*!< in: table */
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Obtain exclusive locks on all index trees of the table. This is to prevent
@@ -789,15 +924,16 @@ UNIV_INLINE
 void
 dict_table_x_lock_indexes(
 /*======================*/
-	dict_table_t*	table);	/*!< in: table */
+	dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Release the exclusive locks on all index tree. */
 UNIV_INLINE
 void
 dict_table_x_unlock_indexes(
 /*========================*/
-	dict_table_t*	table);	/*!< in: table */
-#endif /* !UNIV_HOTBACKUP */
+	dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull));
 /********************************************************************//**
 Checks if a column is in the ordering columns of the clustered index of a
 table. Column prefixes are treated like whole columns.
@@ -807,7 +943,8 @@ ibool
 dict_table_col_in_clustered_key(
 /*============================*/
 	const dict_table_t*	table,	/*!< in: table */
-	ulint			n);	/*!< in: column number */
+	ulint			n)	/*!< in: column number */
+	__attribute__((nonnull, warn_unused_result));
 /*******************************************************************//**
 Check if the table has an FTS index.
 @return TRUE if table has an FTS index */
@@ -815,36 +952,8 @@ UNIV_INLINE
 ibool
 dict_table_has_fts_index(
 /*=====================*/
-	dict_table_t*   table);		/*!< in: table */
-/*******************************************************************//**
-Validate and return the table flags.
-@return	Same as input after validating it as dict_table_t::flags.
-If there is an error, trigger assertion failure. */
-UNIV_INLINE
-ulint
-dict_tf_validate(
-/*=============*/
-         ulint	flags);		/*!< in: table flags */
-/********************************************************************//**
-Validate a SYS_TABLES TYPE field and return it.
-@return	Same as input after validating it as a SYS_TABLES TYPE field.
-If there is an error, return ULINT_UNDEFINED. */
-UNIV_INLINE
-ulint
-dict_sys_tables_type_validate(
-/*==========================*/
-	ulint	type,		/*!< in: SYS_TABLES.TYPE */
-	ulint	n_cols);	/*!< in: SYS_TABLES.N_COLS */
-/********************************************************************//**
-Determine the file format from dict_table_t::flags
-The low order bit will be zero for REDUNDANT and 1 for COMPACT. For any
-other row_format, file_format is > 0 and DICT_TF_COMPACT will also be set.
-@return	file format version */
-UNIV_INLINE
-rec_format_t
-dict_tf_get_rec_format(
-/*===================*/
-        ulint		flags);	/*!< in: dict_table_t::flags */
+	dict_table_t*   table)		/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
 /*******************************************************************//**
 Copies types of columns contained in table to tuple and sets all
 fields of the tuple to the SQL NULL value.  This function should
@@ -854,18 +963,20 @@ void
 dict_table_copy_types(
 /*==================*/
 	dtuple_t*		tuple,	/*!< in/out: data tuple */
-	const dict_table_t*	table);	/*!< in: table */
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull));
 /********************************************************************
 Wait until all the background threads of the given table have exited, i.e.,
 bg_threads == 0. Note: bg_threads_mutex must be reserved when
 calling this. */
-
+UNIV_INTERN
 void
 dict_table_wait_for_bg_threads_to_exit(
 /*===================================*/
 	dict_table_t*	table,	/* in: table */
-	ulint		delay);	/* in: time in microseconds to wait between
+	ulint		delay)	/* in: time in microseconds to wait between
 				checks of bg_threads. */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Looks for an index with the given id. NOTE that we do not reserve
 the dictionary mutex: this function is for emergency purposes like
@@ -875,7 +986,8 @@ UNIV_INTERN
 dict_index_t*
 dict_index_find_on_id_low(
 /*======================*/
-	index_id_t	id);	/*!< in: index id */
+	index_id_t	id)	/*!< in: index id */
+	__attribute__((warn_unused_result));
 /**********************************************************************//**
 Make room in the table cache by evicting an unused table. The unused table
 should not be part of FK relationship and currently not used in any user
@@ -891,16 +1003,17 @@ dict_make_room_in_cache(
 Adds an index to the dictionary cache.
 @return	DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
 UNIV_INTERN
-ulint
+dberr_t
 dict_index_add_to_cache(
 /*====================*/
 	dict_table_t*	table,	/*!< in: table on which the index is */
 	dict_index_t*	index,	/*!< in, own: index; NOTE! The index memory
 				object is freed in this function! */
 	ulint		page_no,/*!< in: root page number of the index */
-	ibool		strict);/*!< in: TRUE=refuse to create the index
+	ibool		strict)	/*!< in: TRUE=refuse to create the index
 				if records could be too big to fit in
 				an B-tree page */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************************//**
 Removes an index from the dictionary cache. */
 UNIV_INTERN
@@ -908,8 +1021,9 @@ void
 dict_index_remove_from_cache(
 /*=========================*/
 	dict_table_t*	table,	/*!< in/out: table */
-	dict_index_t*	index);	/*!< in, own: index */
-
+	dict_index_t*	index)	/*!< in, own: index */
+	__attribute__((nonnull));
+#endif /* !UNIV_HOTBACKUP */
 /********************************************************************//**
 Gets the number of fields in the internal representation of an index,
 including fields added by the dictionary system.
@@ -918,9 +1032,10 @@ UNIV_INLINE
 ulint
 dict_index_get_n_fields(
 /*====================*/
-	const dict_index_t*	index);	/*!< in: an internal
+	const dict_index_t*	index)	/*!< in: an internal
 					representation of index (in
 					the dictionary cache) */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Gets the number of fields in the internal representation of an index
 that uniquely determine the position of an index entry in the index, if
@@ -931,8 +1046,9 @@ UNIV_INLINE
 ulint
 dict_index_get_n_unique(
 /*====================*/
-	const dict_index_t*	index);	/*!< in: an internal representation
+	const dict_index_t*	index)	/*!< in: an internal representation
 					of index (in the dictionary cache) */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Gets the number of fields in the internal representation of an index
 which uniquely determine the position of an index entry in the index, if
@@ -942,8 +1058,9 @@ UNIV_INLINE
 ulint
 dict_index_get_n_unique_in_tree(
 /*============================*/
-	const dict_index_t*	index);	/*!< in: an internal representation
+	const dict_index_t*	index)	/*!< in: an internal representation
 					of index (in the dictionary cache) */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Gets the number of user-defined ordering fields in the index. In the internal
 representation we add the row id to the ordering fields to make all indexes
@@ -954,8 +1071,9 @@ UNIV_INLINE
 ulint
 dict_index_get_n_ordering_defined_by_user(
 /*======================================*/
-	const dict_index_t*	index);	/*!< in: an internal representation
+	const dict_index_t*	index)	/*!< in: an internal representation
 					of index (in the dictionary cache) */
+	__attribute__((nonnull, warn_unused_result));
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the nth field of an index.
@@ -965,7 +1083,8 @@ dict_field_t*
 dict_index_get_nth_field(
 /*=====================*/
 	const dict_index_t*	index,	/*!< in: index */
-	ulint			pos);	/*!< in: position of field */
+	ulint			pos)	/*!< in: position of field */
+	__attribute__((nonnull, warn_unused_result));
 #else /* UNIV_DEBUG */
 # define dict_index_get_nth_field(index, pos) ((index)->fields + (pos))
 #endif /* UNIV_DEBUG */
@@ -977,7 +1096,8 @@ const dict_col_t*
 dict_index_get_nth_col(
 /*===================*/
 	const dict_index_t*	index,	/*!< in: index */
-	ulint			pos);	/*!< in: position of the field */
+	ulint			pos)	/*!< in: position of the field */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Gets the column number of the nth field in an index.
 @return	column number */
@@ -986,7 +1106,8 @@ ulint
 dict_index_get_nth_col_no(
 /*======================*/
 	const dict_index_t*	index,	/*!< in: index */
-	ulint			pos);	/*!< in: position of the field */
+	ulint			pos)	/*!< in: position of the field */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Looks for column n in an index.
 @return position in internal representation of the index;
@@ -996,7 +1117,8 @@ ulint
 dict_index_get_nth_col_pos(
 /*=======================*/
 	const dict_index_t*	index,	/*!< in: index */
-	ulint			n);	/*!< in: column number */
+	ulint			n)	/*!< in: column number */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Looks for column n in an index.
 @return position in internal representation of the index;
@@ -1007,8 +1129,9 @@ dict_index_get_nth_col_or_prefix_pos(
 /*=================================*/
 	const dict_index_t*	index,		/*!< in: index */
 	ulint			n,		/*!< in: column number */
-	ibool			inc_prefix);	/*!< in: TRUE=consider
+	ibool			inc_prefix)	/*!< in: TRUE=consider
 						column prefixes too */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Returns TRUE if the index contains a column or a prefix of that column.
 @return	TRUE if contains the column or its prefix */
@@ -1017,7 +1140,8 @@ ibool
 dict_index_contains_col_or_prefix(
 /*==============================*/
 	const dict_index_t*	index,	/*!< in: index */
-	ulint			n);	/*!< in: column number */
+	ulint			n)	/*!< in: column number */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Looks for a matching field in an index. The column has to be the same. The
 column in index must be complete, or must contain a prefix longer than the
@@ -1031,7 +1155,8 @@ dict_index_get_nth_field_pos(
 /*=========================*/
 	const dict_index_t*	index,	/*!< in: index from which to search */
 	const dict_index_t*	index2,	/*!< in: index */
-	ulint			n);	/*!< in: field number in index2 */
+	ulint			n)	/*!< in: field number in index2 */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Looks for column n position in the clustered index.
 @return	position in internal representation of the clustered index */
@@ -1040,7 +1165,8 @@ ulint
 dict_table_get_nth_col_pos(
 /*=======================*/
 	const dict_table_t*	table,	/*!< in: table */
-	ulint			n);	/*!< in: column number */
+	ulint			n)	/*!< in: column number */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Returns the position of a system column in an index.
 @return	position, ULINT_UNDEFINED if not contained */
@@ -1049,7 +1175,8 @@ ulint
 dict_index_get_sys_col_pos(
 /*=======================*/
 	const dict_index_t*	index,	/*!< in: index */
-	ulint			type);	/*!< in: DATA_ROW_ID, ... */
+	ulint			type)	/*!< in: DATA_ROW_ID, ... */
+	__attribute__((nonnull, warn_unused_result));
 /*******************************************************************//**
 Adds a column to index. */
 UNIV_INTERN
@@ -1059,7 +1186,8 @@ dict_index_add_col(
 	dict_index_t*		index,		/*!< in/out: index */
 	const dict_table_t*	table,		/*!< in: table */
 	dict_col_t*		col,		/*!< in: column */
-	ulint			prefix_len);	/*!< in: column prefix length */
+	ulint			prefix_len)	/*!< in: column prefix length */
+	__attribute__((nonnull));
 #ifndef UNIV_HOTBACKUP
 /*******************************************************************//**
 Copies types of fields contained in index to tuple. */
@@ -1069,8 +1197,9 @@ dict_index_copy_types(
 /*==================*/
 	dtuple_t*		tuple,		/*!< in/out: data tuple */
 	const dict_index_t*	index,		/*!< in: index */
-	ulint			n_fields);	/*!< in: number of
+	ulint			n_fields)	/*!< in: number of
 						field types to copy */
+	__attribute__((nonnull));
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Gets the field column.
@@ -1079,7 +1208,8 @@ UNIV_INLINE
 const dict_col_t*
 dict_field_get_col(
 /*===============*/
-	const dict_field_t*	field);	/*!< in: index field */
+	const dict_field_t*	field)	/*!< in: index field */
+	__attribute__((nonnull, warn_unused_result));
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Returns an index object if it is found in the dictionary cache.
@@ -1089,7 +1219,8 @@ UNIV_INTERN
 dict_index_t*
 dict_index_get_if_in_cache_low(
 /*===========================*/
-	index_id_t	index_id);	/*!< in: index id */
+	index_id_t	index_id)	/*!< in: index id */
+	__attribute__((warn_unused_result));
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /**********************************************************************//**
 Returns an index object if it is found in the dictionary cache.
@@ -1098,7 +1229,8 @@ UNIV_INTERN
 dict_index_t*
 dict_index_get_if_in_cache(
 /*=======================*/
-	index_id_t	index_id);	/*!< in: index id */
+	index_id_t	index_id)	/*!< in: index id */
+	__attribute__((warn_unused_result));
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 #ifdef UNIV_DEBUG
 /**********************************************************************//**
@@ -1110,7 +1242,17 @@ ibool
 dict_index_check_search_tuple(
 /*==========================*/
 	const dict_index_t*	index,	/*!< in: index tree */
-	const dtuple_t*		tuple);	/*!< in: tuple used in a search */
+	const dtuple_t*		tuple)	/*!< in: tuple used in a search */
+	__attribute__((nonnull, warn_unused_result));
+/** Whether and when to allow temporary index names */
+enum check_name {
+	/** Require all indexes to be complete. */
+	CHECK_ALL_COMPLETE,
+	/** Allow aborted online index creation. */
+	CHECK_ABORTED_OK,
+	/** Allow partial indexes to exist. */
+	CHECK_PARTIAL_OK
+};
 /**********************************************************************//**
 Check for duplicate index entries in a table [using the index name] */
 UNIV_INTERN
@@ -1119,8 +1261,9 @@ dict_table_check_for_dup_indexes(
 /*=============================*/
 	const dict_table_t*	table,	/*!< in: Check for dup indexes
 					in this table */
-	ibool			tmp_ok);/*!< in: TRUE=allow temporary
-					index names */
+	enum check_name		check)	/*!< in: whether and when to allow
+					temporary index names */
+	__attribute__((nonnull));
 #endif /* UNIV_DEBUG */
 /**********************************************************************//**
 Builds a node pointer out of a physical record and a page number.
@@ -1136,8 +1279,9 @@ dict_index_build_node_ptr(
 					pointer */
 	mem_heap_t*		heap,	/*!< in: memory heap where pointer
 					created */
-	ulint			level);	/*!< in: level of rec in tree:
+	ulint			level)	/*!< in: level of rec in tree:
 					0 means leaf level */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************************//**
 Copies an initial segment of a physical record, long enough to specify an
 index entry uniquely.
@@ -1152,7 +1296,8 @@ dict_index_copy_rec_order_prefix(
 	ulint*			n_fields,/*!< out: number of fields copied */
 	byte**			buf,	/*!< in/out: memory buffer for the
 					copied prefix, or NULL */
-	ulint*			buf_size);/*!< in/out: buffer size */
+	ulint*			buf_size)/*!< in/out: buffer size */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************************//**
 Builds a typed data tuple out of a physical record.
 @return	own: data tuple */
@@ -1163,7 +1308,8 @@ dict_index_build_data_tuple(
 	dict_index_t*	index,	/*!< in: index */
 	rec_t*		rec,	/*!< in: record for which to build data tuple */
 	ulint		n_fields,/*!< in: number of data fields */
-	mem_heap_t*	heap);	/*!< in: memory heap where tuple created */
+	mem_heap_t*	heap)	/*!< in: memory heap where tuple created */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets the space id of the root of the index tree.
 @return	space id */
@@ -1171,7 +1317,8 @@ UNIV_INLINE
 ulint
 dict_index_get_space(
 /*=================*/
-	const dict_index_t*	index);	/*!< in: index */
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Sets the space id of the root of the index tree. */
 UNIV_INLINE
@@ -1179,7 +1326,8 @@ void
 dict_index_set_space(
 /*=================*/
 	dict_index_t*	index,	/*!< in/out: index */
-	ulint		space);	/*!< in: space id */
+	ulint		space)	/*!< in: space id */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Gets the page number of the root of the index tree.
 @return	page number */
@@ -1187,7 +1335,8 @@ UNIV_INLINE
 ulint
 dict_index_get_page(
 /*================*/
-	const dict_index_t*	tree);	/*!< in: index */
+	const dict_index_t*	tree)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets the read-write lock of the index tree.
 @return	read-write lock */
@@ -1195,7 +1344,8 @@ UNIV_INLINE
 rw_lock_t*
 dict_index_get_lock(
 /*================*/
-	dict_index_t*	index);	/*!< in: index */
+	dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Returns free space reserved for future updates of records. This is
 relevant only in the case of many consecutive inserts, as updates
@@ -1205,13 +1355,48 @@ UNIV_INLINE
 ulint
 dict_index_get_space_reserve(void);
 /*==============================*/
+
+/* Online index creation @{ */
+/********************************************************************//**
+Gets the status of online index creation.
+@return the status */
+UNIV_INLINE
+enum online_index_status
+dict_index_get_online_status(
+/*=========================*/
+	const dict_index_t*	index)	/*!< in: secondary index */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Sets the status of online index creation. */
+UNIV_INLINE
+void
+dict_index_set_online_status(
+/*=========================*/
+	dict_index_t*			index,	/*!< in/out: index */
+	enum online_index_status	status)	/*!< in: status */
+	__attribute__((nonnull));
+/********************************************************************//**
+Determines if a secondary index is being or has been created online,
+or if the table is being rebuilt online, allowing concurrent modifications
+to the table.
+@retval true if the index is being or has been built online, or
+if this is a clustered index and the table is being or has been rebuilt online
+@retval false if the index has been created or the table has been
+rebuilt completely */
+UNIV_INLINE
+bool
+dict_index_is_online_ddl(
+/*=====================*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Calculates the minimum record length in an index. */
 UNIV_INTERN
 ulint
 dict_index_calc_min_rec_len(
 /*========================*/
-	const dict_index_t*	index);	/*!< in: index */
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Reserves the dictionary system mutex for MySQL. */
 UNIV_INTERN
@@ -1233,8 +1418,9 @@ void
 dict_table_stats_lock(
 /*==================*/
 	const dict_table_t*	table,		/*!< in: table */
-	ulint			latch_mode);	/*!< in: RW_S_LATCH or
+	ulint			latch_mode)	/*!< in: RW_S_LATCH or
 						RW_X_LATCH */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Unlock the latch that has been locked by dict_table_stats_lock() */
 UNIV_INTERN
@@ -1242,8 +1428,9 @@ void
 dict_table_stats_unlock(
 /*====================*/
 	const dict_table_t*	table,		/*!< in: table */
-	ulint			latch_mode);	/*!< in: RW_S_LATCH or
+	ulint			latch_mode)	/*!< in: RW_S_LATCH or
 						RW_X_LATCH */
+	__attribute__((nonnull));
 /********************************************************************//**
 Checks if the database name in two table names is the same.
 @return	TRUE if same db name */
@@ -1253,8 +1440,9 @@ dict_tables_have_same_db(
 /*=====================*/
 	const char*	name1,	/*!< in: table name in the form
 				dbname '/' tablename */
-	const char*	name2);	/*!< in: table name in the form
+	const char*	name2)	/*!< in: table name in the form
 				dbname '/' tablename */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Removes an index from the cache */
 UNIV_INTERN
@@ -1262,7 +1450,8 @@ void
 dict_index_remove_from_cache(
 /*=========================*/
 	dict_table_t*	table,	/*!< in/out: table */
-	dict_index_t*	index);	/*!< in, own: index */
+	dict_index_t*	index)	/*!< in, own: index */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Get index by name
 @return	index, NULL if does not exist */
@@ -1271,7 +1460,8 @@ dict_index_t*
 dict_table_get_index_on_name(
 /*=========================*/
 	dict_table_t*	table,	/*!< in: table */
-	const char*	name);	/*!< in: name of the index to find */
+	const char*	name)	/*!< in: name of the index to find */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************************//**
 In case there is more than one index with the same name return the index
 with the min(id).
@@ -1281,7 +1471,8 @@ dict_index_t*
 dict_table_get_index_on_name_and_min_id(
 /*====================================*/
 	dict_table_t*	table,	/*!< in: table */
-	const char*	name);	/*!< in: name of the index to find */
+	const char*	name)	/*!< in: name of the index to find */
+	__attribute__((nonnull, warn_unused_result));
 /***************************************************************
 Check whether a column exists in an FTS index. */
 UNIV_INLINE
@@ -1291,32 +1482,42 @@ dict_table_is_fts_column(
 				/* out: ULINT_UNDEFINED if no match else
 				the offset within the vector */
 	ib_vector_t*	indexes,/* in: vector containing only FTS indexes */
-	ulint		col_no);/* in: col number to search for */
+	ulint		col_no)	/* in: col number to search for */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************************//**
 Move a table to the non LRU end of the LRU list. */
 UNIV_INTERN
 void
 dict_table_move_from_lru_to_non_lru(
 /*================================*/
-	dict_table_t*	table);	/*!< in: table to move from LRU to non-LRU */
+	dict_table_t*	table)	/*!< in: table to move from LRU to non-LRU */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Move a table to the LRU list from the non-LRU list. */
 UNIV_INTERN
 void
 dict_table_move_from_non_lru_to_lru(
 /*================================*/
-	dict_table_t*	table);	/*!< in: table to move from non-LRU to LRU */
+	dict_table_t*	table)	/*!< in: table to move from non-LRU to LRU */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Move to the most recently used segment of the LRU list. */
 UNIV_INTERN
 void
 dict_move_to_mru(
 /*=============*/
-	dict_table_t*	table);	/*!< in: table to move to MRU */
+	dict_table_t*	table)	/*!< in: table to move to MRU */
+	__attribute__((nonnull));
+
+/** Maximum number of columns in a foreign key constraint. Please Note MySQL
+has a much lower limit on the number of columns allowed in a foreign key
+constraint */
+#define MAX_NUM_FK_COLUMNS		500
+
 /* Buffers for storing detailed information about the latest foreign key
 and unique key errors */
 extern FILE*	dict_foreign_err_file;
-extern mutex_t	dict_foreign_err_mutex; /* mutex protecting the buffers */
+extern ib_mutex_t	dict_foreign_err_mutex; /* mutex protecting the buffers */
 
 /** the dictionary system */
 extern dict_sys_t*	dict_sys;
@@ -1324,8 +1525,8 @@ extern dict_sys_t*	dict_sys;
 extern rw_lock_t	dict_operation_lock;
 
 /* Dictionary system struct */
-struct dict_sys_struct{
-	mutex_t		mutex;		/*!< mutex protecting the data
+struct dict_sys_t{
+	ib_mutex_t		mutex;		/*!< mutex protecting the data
 					dictionary; protects also the
 					disk-based dictionary system tables;
 					this mutex serializes CREATE TABLE
@@ -1376,7 +1577,7 @@ dict_ind_init(void);
 
 /* This struct is used to specify the name and type that a column must
 have when checking a table's schema. */
-struct dict_col_meta_struct {
+struct dict_col_meta_t {
 	const char*	name;		/* column name */
 	ulint		mtype;		/* required column main type */
 	ulint		prtype_mask;	/* required column precise type mask;
@@ -1385,12 +1586,11 @@ struct dict_col_meta_struct {
 					in the column's prtype */
 	ulint		len;		/* required column length */
 };
-typedef struct dict_col_meta_struct dict_col_meta_t;
 
 /* This struct is used for checking whether a given table exists and
 whether it has a predefined schema (number of columns and columns names
 and types) */
-struct dict_table_schema_struct {
+struct dict_table_schema_t {
 	const char*		table_name;	/* the name of the table whose
 						structure we are checking */
 	ulint			n_cols;		/* the number of columns the
@@ -1398,8 +1598,15 @@ struct dict_table_schema_struct {
 	dict_col_meta_t*	columns;	/* metadata for the columns;
 						this array has n_cols
 						elements */
+	ulint			n_foreign;	/* number of foreign keys this
+						table has, pointing to other
+						tables (where this table is
+						FK child) */
+	ulint			n_referenced;	/* number of foreign keys other
+						tables have, pointing to this
+						table (where this table is
+						parent) */
 };
-typedef struct dict_table_schema_struct dict_table_schema_t;
 /* @} */
 
 /*********************************************************************//**
@@ -1410,7 +1617,7 @@ The caller must own the dictionary mutex.
 dict_table_schema_check() @{
 @return DB_SUCCESS if the table exists and contains the necessary columns */
 UNIV_INTERN
-enum db_err
+dberr_t
 dict_table_schema_check(
 /*====================*/
 	dict_table_schema_t*	req_schema,	/*!< in/out: required table
@@ -1419,9 +1626,27 @@ dict_table_schema_check(
 						message if != DB_SUCCESS and
 						!= DB_TABLE_NOT_FOUND is
 						returned */
-	size_t			errstr_sz);	/*!< in: errstr size */
+	size_t			errstr_sz)	/*!< in: errstr size */
+	__attribute__((nonnull, warn_unused_result));
 /* @} */
 
+/*********************************************************************//**
+Converts a database and table name from filesystem encoding
+(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
+strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
+at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
+UNIV_INTERN
+void
+dict_fs2utf8(
+/*=========*/
+	const char*	db_and_table,	/*!< in: database and table names,
+					e.g. d@i1b/a@q1b@1Kc */
+	char*		db_utf8,	/*!< out: database name, e.g. dцb */
+	size_t		db_utf8_size,	/*!< in: dbname_utf8 size */
+	char*		table_utf8,	/*!< out: table name, e.g. aюbØc */
+	size_t		table_utf8_size)/*!< in: table_utf8 size */
+	__attribute__((nonnull));
+
 /**********************************************************************//**
 Closes the data dictionary module. */
 UNIV_INTERN
@@ -1437,7 +1662,7 @@ ulint
 dict_table_is_corrupted(
 /*====================*/
 	const dict_table_t*	table)	/*!< in: table */
-	__attribute__((nonnull, pure, warn_unused_result));
+	__attribute__((nonnull, warn_unused_result));
 
 /**********************************************************************//**
 Check whether the index is corrupted.
@@ -1447,7 +1672,7 @@ ulint
 dict_index_is_corrupted(
 /*====================*/
 	const dict_index_t*	index)	/*!< in: index */
-	__attribute__((nonnull, pure, warn_unused_result));
+	__attribute__((nonnull, warn_unused_result));
 
 #endif /* !UNIV_HOTBACKUP */
 /**********************************************************************//**
@@ -1457,7 +1682,9 @@ UNIV_INTERN
 void
 dict_set_corrupted(
 /*===============*/
-	dict_index_t*	index)		/*!< in/out: index */
+	dict_index_t*	index,	/*!< in/out: index */
+	trx_t*		trx,	/*!< in/out: transaction */
+	const char*	ctx)	/*!< in: context */
 	UNIV_COLD __attribute__((nonnull));
 
 /**********************************************************************//**
@@ -1469,7 +1696,8 @@ void
 dict_set_corrupted_index_cache_only(
 /*================================*/
 	dict_index_t*	index,		/*!< in/out: index */
-	dict_table_t*	table);		/*!< in/out: table */
+	dict_table_t*	table)		/*!< in/out: table */
+	__attribute__((nonnull));
 
 /**********************************************************************//**
 Flags a table with specified space_id corrupted in the table dictionary
@@ -1481,6 +1709,76 @@ dict_set_corrupted_by_space(
 /*========================*/
 	ulint		space_id);	/*!< in: space ID */
 
+/********************************************************************//**
+Validate the table flags.
+@return	true if valid. */
+UNIV_INLINE
+bool
+dict_tf_is_valid(
+/*=============*/
+	ulint		flags)		/*!< in: table flags */
+	__attribute__((warn_unused_result));
+
+/********************************************************************//**
+Check if the tablespace for the table has been discarded.
+@return	true if the tablespace has been discarded. */
+UNIV_INLINE
+bool
+dict_table_is_discarded(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table to check */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+/********************************************************************//**
+Check if it is a temporary table.
+@return	true if temporary table flag is set. */
+UNIV_INLINE
+bool
+dict_table_is_temporary(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table to check */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+This function should be called whenever a page is successfully
+compressed. Updates the compression padding information. */
+UNIV_INTERN
+void
+dict_index_zip_success(
+/*===================*/
+	dict_index_t*	index)	/*!< in/out: index to be updated. */
+	__attribute__((nonnull));
+/*********************************************************************//**
+This function should be called whenever a page compression attempt
+fails. Updates the compression padding information. */
+UNIV_INTERN
+void
+dict_index_zip_failure(
+/*===================*/
+	dict_index_t*	index)	/*!< in/out: index to be updated. */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Return the optimal page size, for which page will likely compress.
+@return page size beyond which page may not compress*/
+UNIV_INTERN
+ulint
+dict_index_zip_pad_optimal_page_size(
+/*=================================*/
+	dict_index_t*	index)	/*!< in: index for which page size
+				is requested */
+	__attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
+Convert table flag to row format string.
+@return row format name */
+UNIV_INTERN
+const char*
+dict_tf_to_row_format_string(
+/*=========================*/
+	ulint	table_flag);		/*!< in: row format setting */
+
+#endif /* !UNIV_HOTBACKUP */
+
 #ifndef UNIV_NONINL
 #include "dict0dict.ic"
 #endif
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index f6585ea8205..83953c9325a 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -29,6 +29,7 @@ Created 1/8/1996 Heikki Tuuri
 #include "rem0types.h"
 #include "fsp0fsp.h"
 #include "srv0srv.h"
+#include "sync0rw.h" /* RW_S_LATCH */
 
 /*********************************************************************//**
 Gets the minimum number of bytes per character.
@@ -223,6 +224,22 @@ dict_table_get_first_index(
 }
 
 /********************************************************************//**
+Gets the last index on the table.
+@return	index, NULL if none exists */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_last_index(
+/*=======================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(UT_LIST_GET_LAST((const_cast<dict_table_t*>(table))
+				->indexes));
+}
+
+/********************************************************************//**
 Gets the next index on the table.
 @return	index, NULL if none left */
 UNIV_INLINE
@@ -365,6 +382,56 @@ dict_table_get_n_cols(
 	return(table->n_cols);
 }
 
+/********************************************************************//**
+Gets the approximately estimated number of rows in the table.
+@return	estimated number of rows */
+UNIV_INLINE
+ib_uint64_t
+dict_table_get_n_rows(
+/*==================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table->stat_initialized);
+
+	return(table->stat_n_rows);
+}
+
+/********************************************************************//**
+Increment the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_inc(
+/*==================*/
+	dict_table_t*	table)	/*!< in/out: table */
+{
+	if (table->stat_initialized) {
+		ib_uint64_t	n_rows = table->stat_n_rows;
+		if (n_rows < 0xFFFFFFFFFFFFFFFFULL) {
+			table->stat_n_rows = n_rows + 1;
+		}
+	}
+}
+
+/********************************************************************//**
+Decrement the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_dec(
+/*==================*/
+	dict_table_t*	table)	/*!< in/out: table */
+{
+	if (table->stat_initialized) {
+		ib_uint64_t	n_rows = table->stat_n_rows;
+		if (n_rows > 0) {
+			table->stat_n_rows = n_rows - 1;
+		}
+	}
+}
+
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the nth column of a table.
@@ -458,12 +525,11 @@ dict_table_has_fts_index(
 }
 
 /********************************************************************//**
-Validate and return the table flags.
-@return	Same as input after validating it as dict_table_t::flags.
-If there is an error, trigger assertion failure. */
+Validate the table flags.
+@return	true if valid. */
 UNIV_INLINE
-ulint
-dict_tf_validate(
+bool
+dict_tf_is_valid(
 /*=============*/
 	ulint	flags)		/*!< in: table flags */
 {
@@ -473,31 +539,43 @@ dict_tf_validate(
 	ulint	unused = DICT_TF_GET_UNUSED(flags);
 
 	/* Make sure there are no bits that we do not know about. */
-	ut_a(unused == 0);
+	if (unused != 0) {
 
-	if (atomic_blobs) {
+		return(false);
+
+	} else if (atomic_blobs) {
 		/* Barracuda row formats COMPRESSED and DYNAMIC build on
 		the page structure introduced for the COMPACT row format
 		by allowing keys in secondary indexes to be made from
 		data stored off-page in the clustered index. */
-		ut_a(compact);
-	} else {
+
+		if (!compact) {
+			return(false);
+		}
+
+	} else if (zip_ssize) {
+
 		/* Antelope does not support COMPRESSED row format. */
-		ut_a(!zip_ssize);
+		return(false);
 	}
 
 	if (zip_ssize) {
+
 		/* COMPRESSED row format must have compact and atomic_blobs
-		bits set. */
-		ut_a(compact);
-		ut_a(atomic_blobs);
+		bits set and validate the number is within allowed range. */
 
-		/* Validate the number is within allowed range. */
-		ut_a(zip_ssize <= PAGE_ZIP_SSIZE_MAX);
+		if (!compact
+		    || !atomic_blobs
+		    || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+
+			return(false);
+		}
 	}
 
-	/* Return the flags sent if we did not crash. */
-	return(flags);
+	/* CREATE TABLE ... DATA DIRECTORY is supported for any row format,
+	so the DATA_DIR flag is compatible with all other table flags. */
+
+	return(true);
 }
 
 /********************************************************************//**
@@ -517,9 +595,7 @@ dict_sys_tables_type_validate(
 	ulint	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type);
 	ulint	unused = DICT_TF_GET_UNUSED(type);
 
-	/* If the format is UNIV_FORMAT_A, table->flags == 0, but
-	SYS_TABLES.TYPE == 1, which is defined as SYS_TABLE_TYPE_ANTELOPE.
-	The low order bit of SYS_TABLES.TYPE is always set to 1.
+	/* The low order bit of SYS_TABLES.TYPE is always set to 1.
 	If the format is UNIV_FORMAT_B or higher, this field is the same
 	as dict_table_t::flags. Zero is not allowed here. */
 	if (!low_order_bit) {
@@ -527,12 +603,9 @@ dict_sys_tables_type_validate(
 	}
 
 	if (redundant) {
-		/* This is Redundant row format, only the first bit
-		should be set in SYS_TABLES.TYPE */
-		if (type != SYS_TABLE_TYPE_ANTELOPE) {
+		if (zip_ssize || atomic_blobs) {
 			return(ULINT_UNDEFINED);
 		}
-		return(DICT_TF_REDUNDANT);
 	}
 
 	/* Make sure there are no bits that we do not know about. */
@@ -569,6 +642,11 @@ dict_sys_tables_type_validate(
 		}
 	}
 
+	/* There is nothing to validate for the data_dir field.
+	CREATE TABLE ... DATA DIRECTORY is supported for any row
+	format, so the DATA_DIR flag is compatible with any other
+	table flags. However, it is not used with TEMPORARY tables.*/
+
 	/* Return the validated SYS_TABLES.TYPE. */
 	return(type);
 }
@@ -584,7 +662,7 @@ dict_tf_get_rec_format(
 /*===================*/
 	ulint		flags)	/*!< in: dict_table_t::flags */
 {
-	dict_tf_validate(flags);
+	ut_a(dict_tf_is_valid(flags));
 
 	if (!DICT_TF_GET_COMPACT(flags)) {
 		return(REC_FORMAT_REDUNDANT);
@@ -640,7 +718,8 @@ dict_tf_set(
 /*========*/
 	ulint*		flags,		/*!< in/out: table flags */
 	rec_format_t	format,		/*!< in: file format */
-	ulint		zip_ssize)	/*!< in: zip shift size */
+	ulint		zip_ssize,	/*!< in: zip shift size */
+	bool		use_data_dir)	/*!< in: table uses DATA DIRECTORY */
 {
 	switch (format) {
 	case REC_FORMAT_REDUNDANT:
@@ -662,6 +741,10 @@ dict_tf_set(
 		ut_ad(zip_ssize == 0);
 		break;
 	}
+
+	if (use_data_dir) {
+		*flags |= (1 << DICT_TF_POS_DATA_DIR);
+	}
 }
 
 /********************************************************************//**
@@ -679,15 +762,61 @@ UNIV_INLINE
 ulint
 dict_tf_to_fsp_flags(
 /*=================*/
-	ulint	flags)	/*!< in: dict_table_t::flags */
+	ulint	table_flags)	/*!< in: dict_table_t::flags */
 {
+	ulint fsp_flags;
+
+	DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure",
+			return(ULINT_UNDEFINED););
+
 	/* Adjust bit zero. */
-	flags = (flags == DICT_TF_COMPACT) ? 0 : flags;
+	fsp_flags = DICT_TF_HAS_ATOMIC_BLOBS(table_flags) ? 1 : 0;
+
+	/* ZIP_SSIZE and ATOMIC_BLOBS are at the same position. */
+	fsp_flags |= table_flags & DICT_TF_MASK_ZIP_SSIZE;
+	fsp_flags |= table_flags & DICT_TF_MASK_ATOMIC_BLOBS;
 
 	/* In addition, tablespace flags also contain the page size. */
-	flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE);
+	fsp_flags |= fsp_flags_set_page_size(fsp_flags, UNIV_PAGE_SIZE);
+
+	/* The DATA_DIR flag is in a different position in fsp_flag */
+	fsp_flags |= DICT_TF_HAS_DATA_DIR(table_flags)
+		     ? FSP_FLAGS_MASK_DATA_DIR : 0;
 
-	return(fsp_flags_validate(flags));
+	ut_a(fsp_flags_is_valid(fsp_flags));
+
+	return(fsp_flags);
+}
+
+/********************************************************************//**
+Convert a 32 bit integer from SYS_TABLES.TYPE to dict_table_t::flags
+The following chart shows the translation of the low order bit.
+Other bits are the same.
+========================= Low order bit ==========================
+                    | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC
+SYS_TABLES.TYPE     |     1     |    1    |     1
+dict_table_t::flags |     0     |    1    |     1
+==================================================================
+@return	ulint containing SYS_TABLES.TYPE */
+UNIV_INLINE
+ulint
+dict_sys_tables_type_to_tf(
+/*=======================*/
+	ulint	type,	/*!< in: SYS_TABLES.TYPE field */
+	ulint	n_cols)	/*!< in: SYS_TABLES.N_COLS field */
+{
+	ulint	flags;
+	ulint	redundant = !(n_cols & DICT_N_COLS_COMPACT);
+
+	/* Adjust bit zero. */
+	flags = redundant ? 0 : 1;
+
+	/* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+	flags |= type & (DICT_TF_MASK_ZIP_SSIZE
+			 | DICT_TF_MASK_ATOMIC_BLOBS
+			 | DICT_TF_MASK_DATA_DIR);
+
+	return(flags);
 }
 
 /********************************************************************//**
@@ -706,13 +835,19 @@ dict_tf_to_sys_tables_type(
 /*=======================*/
 	ulint	flags)	/*!< in: dict_table_t::flags */
 {
-	if (!DICT_TF_HAS_ATOMIC_BLOBS(flags)) {
-		ut_a(flags == DICT_TF_REDUNDANT
-		     || flags == DICT_TF_COMPACT);
-		return(SYS_TABLE_TYPE_ANTELOPE);
-	}
+	ulint type;
+
+	ut_a(dict_tf_is_valid(flags));
+
+	/* Adjust bit zero. It is always 1 in SYS_TABLES.TYPE */
+	type = 1;
+
+	/* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+	type |= flags & (DICT_TF_MASK_ZIP_SSIZE
+			 | DICT_TF_MASK_ATOMIC_BLOBS
+			 | DICT_TF_MASK_DATA_DIR);
 
-	return(dict_tf_validate(flags));
+	return(type);
 }
 
 /********************************************************************//**
@@ -1064,6 +1199,103 @@ dict_index_get_space_reserve(void)
 	return(UNIV_PAGE_SIZE / 16);
 }
 
+/********************************************************************//**
+Gets the status of online index creation.
+@return the status */
+UNIV_INLINE
+enum online_index_status
+dict_index_get_online_status(
+/*=========================*/
+	const dict_index_t*	index)	/*!< in: secondary index */
+{
+	enum online_index_status	status;
+
+	status = (enum online_index_status) index->online_status;
+
+	/* Without the index->lock protection, the online
+	status can change from ONLINE_INDEX_CREATION to
+	ONLINE_INDEX_COMPLETE (or ONLINE_INDEX_ABORTED) in
+	row_log_apply() once log application is done. So to make
+	sure the status is ONLINE_INDEX_CREATION or ONLINE_INDEX_COMPLETE
+	you should always do the recheck after acquiring index->lock */
+
+#ifdef UNIV_DEBUG
+	switch (status) {
+	case ONLINE_INDEX_COMPLETE:
+	case ONLINE_INDEX_CREATION:
+	case ONLINE_INDEX_ABORTED:
+	case ONLINE_INDEX_ABORTED_DROPPED:
+		return(status);
+	}
+	ut_error;
+#endif /* UNIV_DEBUG */
+	return(status);
+}
+
+/********************************************************************//**
+Sets the status of online index creation. */
+UNIV_INLINE
+void
+dict_index_set_online_status(
+/*=========================*/
+	dict_index_t*			index,	/*!< in/out: index */
+	enum online_index_status	status)	/*!< in: status */
+{
+	ut_ad(!(index->type & DICT_FTS));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	switch (dict_index_get_online_status(index)) {
+	case ONLINE_INDEX_COMPLETE:
+	case ONLINE_INDEX_CREATION:
+		break;
+	case ONLINE_INDEX_ABORTED:
+		ut_ad(status == ONLINE_INDEX_ABORTED_DROPPED);
+		break;
+	case ONLINE_INDEX_ABORTED_DROPPED:
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
+
+	index->online_status = status;
+	ut_ad(dict_index_get_online_status(index) == status);
+}
+
+/********************************************************************//**
+Determines if a secondary index is being or has been created online,
+or if the table is being rebuilt online, allowing concurrent modifications
+to the table.
+@retval true if the index is being or has been built online, or
+if this is a clustered index and the table is being or has been rebuilt online
+@retval false if the index has been created or the table has been
+rebuilt completely */
+UNIV_INLINE
+bool
+dict_index_is_online_ddl(
+/*=====================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+#ifdef UNIV_DEBUG
+	if (dict_index_is_clust(index)) {
+		switch (dict_index_get_online_status(index)) {
+		case ONLINE_INDEX_CREATION:
+			return(true);
+		case ONLINE_INDEX_COMPLETE:
+			return(false);
+		case ONLINE_INDEX_ABORTED:
+		case ONLINE_INDEX_ABORTED_DROPPED:
+			break;
+		}
+		ut_ad(0);
+		return(false);
+	}
+#endif /* UNIV_DEBUG */
+
+	return(UNIV_UNLIKELY(dict_index_get_online_status(index)
+			     != ONLINE_INDEX_COMPLETE));
+}
+
 /**********************************************************************//**
 Check whether a column exists in an FTS index.
 @return ULINT_UNDEFINED if no match else the offset within the vector */
@@ -1147,4 +1379,28 @@ dict_index_is_corrupted(
 	       || (index->table && index->table->corrupted));
 }
 
+/********************************************************************//**
+Check if the tablespace for the table has been discarded.
+@return	true if the tablespace has been discarded. */
+UNIV_INLINE
+bool
+dict_table_is_discarded(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table to check */
+{
+	return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_DISCARDED));
+}
+
+/********************************************************************//**
+Check if it is a temporary table.
+@return	true if temporary table flag is set. */
+UNIV_INLINE
+bool
+dict_table_is_temporary(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table to check */
+{
+	return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY));
+}
+
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
index 13b9a121c1c..5991d58a686 100644
--- a/storage/innobase/include/dict0load.h
+++ b/storage/innobase/include/dict0load.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,38 +29,35 @@ Created 4/24/1996 Heikki Tuuri
 
 #include "univ.i"
 #include "dict0types.h"
+#include "trx0types.h"
 #include "ut0byte.h"
 #include "mem0mem.h"
 #include "btr0types.h"
 
-/** enum that defines all 6 system table IDs */
-enum dict_system_table_id {
+/** enum that defines all system table IDs. @see SYSTEM_TABLE_NAME[] */
+enum dict_system_id_t {
 	SYS_TABLES = 0,
 	SYS_INDEXES,
 	SYS_COLUMNS,
 	SYS_FIELDS,
 	SYS_FOREIGN,
 	SYS_FOREIGN_COLS,
+	SYS_TABLESPACES,
+	SYS_DATAFILES,
 
 	/* This must be last item. Defines the number of system tables. */
 	SYS_NUM_SYSTEM_TABLES
 };
 
-typedef enum dict_system_table_id	dict_system_id_t;
-
 /** Status bit for dict_process_sys_tables_rec_and_mtr_commit() */
-enum dict_table_info {
+enum dict_table_info_t {
 	DICT_TABLE_LOAD_FROM_RECORD = 0,/*!< Directly populate a dict_table_t
 					structure with information from
 					a SYS_TABLES record */
-	DICT_TABLE_LOAD_FROM_CACHE = 1,	/*!< Check first whether dict_table_t
+	DICT_TABLE_LOAD_FROM_CACHE = 1	/*!< Check first whether dict_table_t
 					is in the cache, if so, return it */
-	DICT_TABLE_UPDATE_STATS = 2	/*!< whether to update statistics
-					when loading SYS_TABLES information. */
 };
 
-typedef enum dict_table_info	dict_table_info_t;
-
 /********************************************************************//**
 In a crash recovery we already have all the tablespace objects created.
 This function compares the space id information in the InnoDB data dictionary
@@ -157,6 +154,27 @@ dict_load_field_low(
 					for temporary storage */
 	const rec_t*	rec);		/*!< in: SYS_FIELDS record */
 /********************************************************************//**
+Using the table->heap, copy the null-terminated filepath into
+table->data_dir_path and put a null byte before the extension.
+This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path.
+Make this data directory path only if it has not yet been saved. */
+UNIV_INTERN
+void
+dict_save_data_dir_path(
+/*====================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	char*		filepath);	/*!< in: filepath of tablespace */
+/*****************************************************************//**
+Make sure the data_file_name is saved in dict_table_t if needed. Try to
+read it from the file dictionary first, then from SYS_DATAFILES. */
+UNIV_INTERN
+void
+dict_get_and_save_data_dir_path(
+/*============================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	bool		dict_mutex_own);	/*!< in: true if dict_sys->mutex
+					is owned already */
+/********************************************************************//**
 Loads a table definition and also all its index definitions, and also
 the cluster definition if the table is a member in a cluster. Also loads
 all foreign key constraints where the foreign key is in the table or where
@@ -199,14 +217,15 @@ cache already contains all constraints where the other relevant table is
 already in the dictionary cache.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 dict_load_foreigns(
 /*===============*/
 	const char*	table_name,	/*!< in: table name */
 	ibool		check_recursive,/*!< in: Whether to check recursive
 					load of tables chained by FK */
-	ibool		check_charsets);/*!< in: TRUE=check charsets
+	ibool		check_charsets)	/*!< in: TRUE=check charsets
 					compatibility */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************//**
 Prints to the standard output information on all tables found in the data
 dictionary system table. */
@@ -324,6 +343,66 @@ dict_process_sys_foreign_col_rec(
 	const char**	ref_col_name,	/*!< out: referenced column name
 					in referenced table */
 	ulint*		pos);		/*!< out: column position */
+/********************************************************************//**
+This function parses a SYS_TABLESPACES record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_tablespaces(
+/*=========================*/
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	const rec_t*	rec,		/*!< in: current SYS_TABLESPACES rec */
+	ulint*		space,		/*!< out: pace id */
+	const char**	name,		/*!< out: tablespace name */
+	ulint*		flags);		/*!< out: tablespace flags */
+/********************************************************************//**
+This function parses a SYS_DATAFILES record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_datafiles(
+/*=======================*/
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	const rec_t*	rec,		/*!< in: current SYS_DATAFILES rec */
+	ulint*		space,		/*!< out: pace id */
+	const char**	path);		/*!< out: datafile path */
+/********************************************************************//**
+Get the filepath for a spaceid from SYS_DATAFILES. This function provides
+a temporary heap which is used for the table lookup, but not for the path.
+The caller must free the memory for the path returned. This function can
+return NULL if the space ID is not found in SYS_DATAFILES, then the caller
+will assume that the ibd file is in the normal datadir.
+@return	own: A copy of the first datafile found in SYS_DATAFILES.PATH for
+the given space ID. NULL if space ID is zero or not found. */
+UNIV_INTERN
+char*
+dict_get_first_path(
+/*================*/
+	ulint		space,	/*!< in: space id */
+	const char*	name);	/*!< in: tablespace name */
+/********************************************************************//**
+Update the record for space_id in SYS_TABLESPACES to this filepath.
+@return	DB_SUCCESS if OK, dberr_t if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_update_filepath(
+/*=================*/
+	ulint		space_id,	/*!< in: space id */
+	const char*	filepath);	/*!< in: filepath */
+/********************************************************************//**
+Insert records into SYS_TABLESPACES and SYS_DATAFILES.
+@return	DB_SUCCESS if OK, dberr_t if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_insert_tablespace_and_filepath(
+/*================================*/
+	ulint		space,		/*!< in: space id */
+	const char*	name,		/*!< in: talespace name */
+	const char*	filepath,	/*!< in: filepath */
+	ulint		fsp_flags);	/*!< in: tablespace flags */
+
 #ifndef UNIV_NONINL
 #include "dict0load.ic"
 #endif
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index ea7e996dfa8..671f67eb1f8 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,6 +31,7 @@ Created 1/8/1996 Heikki Tuuri
 #include "dict0types.h"
 #include "data0type.h"
 #include "mem0mem.h"
+#include "row0types.h"
 #include "rem0types.h"
 #include "btr0types.h"
 #ifndef UNIV_HOTBACKUP
@@ -46,7 +48,7 @@ Created 1/8/1996 Heikki Tuuri
 #include "fts0fts.h"
 
 /* Forward declaration. */
-typedef struct ib_rbt_struct ib_rbt_t;
+struct ib_rbt_t;
 
 /** Type flags of an index: OR'ing of the flags is allowed to define a
 combination of types */
@@ -93,12 +95,9 @@ and SYS_TABLES.TYPE.  Similar flags found in fil_space_t and FSP_SPACE_FLAGS
 are described in fsp0fsp.h. */
 
 /* @{ */
-/** SYS_TABLES.TYPE can be equal to 1 which means that the Row format
-is one of two Antelope row formats, Redundant or Compact. */
-#define SYS_TABLE_TYPE_ANTELOPE		1
-/** dict_table_t::flags can be equal to 0 if the row format = Redundant */
+/** dict_table_t::flags bit 0 is equal to 0 if the row format = Redundant */
 #define DICT_TF_REDUNDANT		0	/*!< Redundant row format. */
-/** dict_table_t::flags can be equal to 1 if the row format = Compact */
+/** dict_table_t::flags bit 0 is equal to 1 if the row format = Compact */
 #define DICT_TF_COMPACT			1	/*!< Compact row format. */
 
 /** This bitmask is used in SYS_TABLES.N_COLS to set and test whether
@@ -115,10 +114,17 @@ Brracuda row formats store the whole blob or text field off-page atomically.
 Secondary indexes are created from this external data using row_ext_t
 to cache the BLOB prefixes. */
 #define DICT_TF_WIDTH_ATOMIC_BLOBS	1
+/** If a table is created with the MYSQL option DATA DIRECTORY and
+innodb-file-per-table, an older engine will not be able to find that table.
+This flag prevents older engines from attempting to open the table and
+allows InnoDB to update_create_info() accordingly. */
+#define DICT_TF_WIDTH_DATA_DIR		1
+
 /** Width of all the currently known table flags */
 #define DICT_TF_BITS	(DICT_TF_WIDTH_COMPACT		\
 			+ DICT_TF_WIDTH_ZIP_SSIZE	\
-			+ DICT_TF_WIDTH_ATOMIC_BLOBS)
+			+ DICT_TF_WIDTH_ATOMIC_BLOBS	\
+			+ DICT_TF_WIDTH_DATA_DIR)
 
 /** A mask of all the known/used bits in table flags */
 #define DICT_TF_BIT_MASK	(~(~0 << DICT_TF_BITS))
@@ -131,9 +137,12 @@ to cache the BLOB prefixes. */
 /** Zero relative shift position of the ATOMIC_BLOBS field */
 #define DICT_TF_POS_ATOMIC_BLOBS	(DICT_TF_POS_ZIP_SSIZE		\
 					+ DICT_TF_WIDTH_ZIP_SSIZE)
-/** Zero relative shift position of the start of the UNUSED bits */
-#define DICT_TF_POS_UNUSED		(DICT_TF_POS_ATOMIC_BLOBS	\
+/** Zero relative shift position of the DATA_DIR field */
+#define DICT_TF_POS_DATA_DIR		(DICT_TF_POS_ATOMIC_BLOBS	\
 					+ DICT_TF_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the start of the UNUSED bits */
+#define DICT_TF_POS_UNUSED		(DICT_TF_POS_DATA_DIR		\
+					+ DICT_TF_WIDTH_DATA_DIR)
 
 /** Bit mask of the COMPACT field */
 #define DICT_TF_MASK_COMPACT				\
@@ -147,6 +156,10 @@ to cache the BLOB prefixes. */
 #define DICT_TF_MASK_ATOMIC_BLOBS			\
 		((~(~0 << DICT_TF_WIDTH_ATOMIC_BLOBS))	\
 		<< DICT_TF_POS_ATOMIC_BLOBS)
+/** Bit mask of the DATA_DIR field */
+#define DICT_TF_MASK_DATA_DIR				\
+		((~(~0 << DICT_TF_WIDTH_DATA_DIR))	\
+		<< DICT_TF_POS_DATA_DIR)
 
 /** Return the value of the COMPACT field */
 #define DICT_TF_GET_COMPACT(flags)			\
@@ -160,6 +173,10 @@ to cache the BLOB prefixes. */
 #define DICT_TF_HAS_ATOMIC_BLOBS(flags)			\
 		((flags & DICT_TF_MASK_ATOMIC_BLOBS)	\
 		>> DICT_TF_POS_ATOMIC_BLOBS)
+/** Return the value of the ATOMIC_BLOBS field */
+#define DICT_TF_HAS_DATA_DIR(flags)			\
+		((flags & DICT_TF_MASK_DATA_DIR)	\
+		>> DICT_TF_POS_DATA_DIR)
 /** Return the contents of the UNUSED bits */
 #define DICT_TF_GET_UNUSED(flags)			\
 		(flags >> DICT_TF_POS_UNUSED)
@@ -174,7 +191,7 @@ ROW_FORMAT=REDUNDANT.  InnoDB engines do not check these flags
 for unknown bits in order to protect backward incompatibility. */
 /* @{ */
 /** Total number of bits in table->flags2. */
-#define DICT_TF2_BITS			5
+#define DICT_TF2_BITS			6
 #define DICT_TF2_BIT_MASK		~(~0 << DICT_TF2_BITS)
 
 /** TEMPORARY; TRUE for tables from CREATE TEMPORARY TABLE. */
@@ -189,6 +206,9 @@ This is a transient bit for index build */
 /** This bit is used during table creation to indicate that it will
 use its own tablespace instead of the system tablespace. */
 #define DICT_TF2_USE_TABLESPACE		16
+
+/** Set when we discard/detach the tablespace */
+#define DICT_TF2_DISCARDED		32
 /* @} */
 
 #define DICT_TF2_FLAG_SET(table, flag)				\
@@ -225,9 +245,7 @@ dict_mem_table_create(
 /*==================*/
 	const char*	name,		/*!< in: table name */
 	ulint		space,		/*!< in: space where the clustered index
-					of the table is placed; this parameter
-					is ignored if the table is made
-					a member of a cluster */
+					of the table is placed */
 	ulint		n_cols,		/*!< in: number of columns */
 	ulint		flags,		/*!< in: table flags */
 	ulint		flags2);	/*!< in: table flags2 */
@@ -249,7 +267,19 @@ dict_mem_table_add_col(
 	const char*	name,	/*!< in: column name, or NULL */
 	ulint		mtype,	/*!< in: main datatype */
 	ulint		prtype,	/*!< in: precise type */
-	ulint		len);	/*!< in: precision */
+	ulint		len)	/*!< in: precision */
+	__attribute__((nonnull(1)));
+/**********************************************************************//**
+Renames a column of a table in the data dictionary cache. */
+UNIV_INTERN
+void
+dict_mem_table_col_rename(
+/*======================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	unsigned	nth_col,/*!< in: column index */
+	const char*	from,	/*!< in: old column name */
+	const char*	to)	/*!< in: new column name */
+	__attribute__((nonnull));
 /**********************************************************************//**
 This function populates a dict_col_t memory structure with
 supplied information. */
@@ -347,8 +377,19 @@ dict_mem_referenced_table_name_lookup_set(
 	dict_foreign_t*	foreign,	/*!< in/out: foreign struct */
 	ibool		do_alloc);	/*!< in: is an alloc needed */
 
+/*******************************************************************//**
+Create a temporary tablename.
+@return temporary tablename suitable for InnoDB use */
+UNIV_INTERN __attribute__((nonnull, warn_unused_result))
+char*
+dict_mem_create_temporary_tablename(
+/*================================*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	const char*	dbtab,	/*!< in: database/table name */
+	table_id_t	id);	/*!< in: InnoDB table id */
+
 /** Data structure for a column in a table */
-struct dict_col_struct{
+struct dict_col_t{
 	/*----------------------*/
 	/** The following are copied from dtype_t,
 	so that all bit-fields can be packed tightly. */
@@ -424,7 +465,7 @@ be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */
 #define DICT_MAX_FIXED_COL_LEN		DICT_ANTELOPE_MAX_INDEX_COL_LEN
 
 /** Data structure for a field in an index */
-struct dict_field_struct{
+struct dict_field_t{
 	dict_col_t*	col;		/*!< pointer to the table column */
 	const char*	name;		/*!< name of the column */
 	unsigned	prefix_len:12;	/*!< 0 or the length of the column
@@ -440,9 +481,61 @@ struct dict_field_struct{
 					DICT_ANTELOPE_MAX_INDEX_COL_LEN */
 };
 
+/**********************************************************************//**
+PADDING HEURISTIC BASED ON LINEAR INCREASE OF PADDING TO AVOID
+COMPRESSION FAILURES
+(Note: this is relevant only for compressed indexes)
+GOAL: Avoid compression failures by maintaining information about the
+compressibility of data. If data is not very compressible then leave
+some extra space 'padding' in the uncompressed page making it more
+likely that compression of less than fully packed uncompressed page will
+succeed.
+
+This padding heuristic works by increasing the pad linearly until the
+desired failure rate is reached. A "round" is a fixed number of
+compression operations.
+After each round, the compression failure rate for that round is
+computed. If the failure rate is too high, then padding is incremented
+by a fixed value, otherwise it's left intact.
+If the compression failure is lower than the desired rate for a fixed
+number of consecutive rounds, then the padding is decreased by a fixed
+value. This is done to prevent overshooting the padding value,
+and to accommodate the possible change in data compressibility. */
+
+/** Number of zip ops in one round. */
+#define ZIP_PAD_ROUND_LEN			(128)
+
+/** Number of successful rounds after which the padding is decreased */
+#define ZIP_PAD_SUCCESSFUL_ROUND_LIMIT		(5)
+
+/** Amount by which padding is increased. */
+#define ZIP_PAD_INCR				(128)
+
+/** Percentage of compression failures that are allowed in a single
+round */
+extern ulong	zip_failure_threshold_pct;
+
+/** Maximum percentage of a page that can be allowed as a pad to avoid
+compression failures */
+extern ulong	zip_pad_max;
+
+/** Data structure to hold information about about how much space in
+an uncompressed page should be left as padding to avoid compression
+failures. This estimate is based on a self-adapting heuristic. */
+struct zip_pad_info_t {
+	os_fast_mutex_t	mutex;	/*!< mutex protecting the info */
+	ulint		pad;	/*!< number of bytes used as pad */
+	ulint		success;/*!< successful compression ops during
+				current round */
+	ulint		failure;/*!< failed compression ops during
+				current round */
+	ulint		n_rounds;/*!< number of currently successful
+				rounds */
+};
+
 /** Data structure for an index.  Most fields will be
 initialized to 0, NULL or FALSE in dict_mem_index_create(). */
-struct dict_index_struct{
+struct dict_index_t{
 	index_id_t	id;	/*!< id of the index */
 	mem_heap_t*	heap;	/*!< memory heap */
 	const char*	name;	/*!< index name */
@@ -478,24 +571,35 @@ struct dict_index_struct{
 	unsigned	cached:1;/*!< TRUE if the index object is in the
 				dictionary cache */
 	unsigned	to_be_dropped:1;
-				/*!< TRUE if this index is marked to be
-				dropped in ha_innobase::prepare_drop_index(),
-				otherwise FALSE. Protected by
-				dict_sys->mutex, dict_operation_lock and
-				index->lock.*/
+				/*!< TRUE if the index is to be dropped;
+				protected by dict_operation_lock */
+	unsigned	online_status:2;
+				/*!< enum online_index_status.
+				Transitions from ONLINE_INDEX_COMPLETE (to
+				ONLINE_INDEX_CREATION) are protected
+				by dict_operation_lock and
+				dict_sys->mutex. Other changes are
+				protected by index->lock. */
 	dict_field_t*	fields;	/*!< array of field descriptions */
 #ifndef UNIV_HOTBACKUP
 	UT_LIST_NODE_T(dict_index_t)
 			indexes;/*!< list of indexes of the table */
-	btr_search_t*	search_info; /*!< info used in optimistic searches */
+	btr_search_t*	search_info;
+				/*!< info used in optimistic searches */
+	row_log_t*	online_log;
+				/*!< the log of modifications
+				during online index creation;
+				valid when online_status is
+				ONLINE_INDEX_CREATION */
 	/*----------------------*/
 	/** Statistics for query optimization */
 	/* @{ */
 	ib_uint64_t*	stat_n_diff_key_vals;
 				/*!< approximate number of different
 				key values for this index, for each
-				n-column prefix where n <=
-				dict_get_n_unique(index); we
+				n-column prefix where 1 <= n <=
+				dict_get_n_unique(index) (the array is
+				indexed from 0 to n_uniq-1); we
 				periodically calculate new
 				estimates */
 	ib_uint64_t*	stat_n_sample_sizes;
@@ -506,7 +610,8 @@ struct dict_index_struct{
 	ib_uint64_t*	stat_n_non_null_key_vals;
 				/* approximate number of non-null key values
 				for this index, for each column where
-				n < dict_get_n_unique(index); This
+				1 <= n <= dict_get_n_unique(index) (the array
+				is indexed from 0 to n_uniq-1); This
 				is used when innodb_stats_method is
 				"nulls_ignored". */
 	ulint		stat_index_size;
@@ -521,9 +626,11 @@ struct dict_index_struct{
 	trx_id_t	trx_id; /*!< id of the transaction that created this
 				index, or 0 if the index existed
 				when InnoDB was started up */
+	zip_pad_info_t	zip_pad;/*!< Information about state of
+				compression failures and successes */
 #endif /* !UNIV_HOTBACKUP */
 #ifdef UNIV_BLOB_DEBUG
-	mutex_t		blobs_mutex;
+	ib_mutex_t		blobs_mutex;
 				/*!< mutex protecting blobs */
 	ib_rbt_t*	blobs;	/*!< map of (page_no,heap_no,field_no)
 				to first_blob_page_no; protected by
@@ -531,15 +638,35 @@ struct dict_index_struct{
 #endif /* UNIV_BLOB_DEBUG */
 #ifdef UNIV_DEBUG
 	ulint		magic_n;/*!< magic number */
-/** Value of dict_index_struct::magic_n */
+/** Value of dict_index_t::magic_n */
 # define DICT_INDEX_MAGIC_N	76789786
 #endif
 };
 
+/** The status of online index creation */
+enum online_index_status {
+	/** the index is complete and ready for access */
+	ONLINE_INDEX_COMPLETE = 0,
+	/** the index is being created, online
+	(allowing concurrent modifications) */
+	ONLINE_INDEX_CREATION,
+	/** secondary index creation was aborted and the index
+	should be dropped as soon as index->table->n_ref_count reaches 0,
+	or online table rebuild was aborted and the clustered index
+	of the original table should soon be restored to
+	ONLINE_INDEX_COMPLETE */
+	ONLINE_INDEX_ABORTED,
+	/** the online index creation was aborted, the index was
+	dropped from the data dictionary and the tablespace, and it
+	should be dropped from the data dictionary cache as soon as
+	index->table->n_ref_count reaches 0. */
+	ONLINE_INDEX_ABORTED_DROPPED
+};
+
 /** Data structure for a foreign key constraint; an example:
 FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D).  Most fields will be
 initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */
-struct dict_foreign_struct{
+struct dict_foreign_t{
 	mem_heap_t*	heap;		/*!< this object is allocated from
 					this memory heap */
 	char*		id;		/*!< id of the constraint as a
@@ -592,7 +719,7 @@ a foreign key constraint is enforced, therefore RESTRICT just means no flag */
 
 /** Data structure for a database table.  Most fields will be
 initialized to 0, NULL or FALSE in dict_mem_table_create(). */
-struct dict_table_struct{
+struct dict_table_t{
 	table_id_t	id;	/*!< id of the table */
 	mem_heap_t*	heap;	/*!< memory heap */
 	char*		name;	/*!< table name */
@@ -602,6 +729,8 @@ struct dict_table_struct{
 				innodb_file_per_table is defined in my.cnf;
 				in Unix this is usually /tmp/..., in Windows
 				temp\... */
+	char*		data_dir_path; /*!< NULL or the directory path
+				specified by DATA DIRECTORY */
 	unsigned	space:32;
 				/*!< space where the clustered index of the
 				table is placed */
@@ -612,13 +741,16 @@ struct dict_table_struct{
 				tablespace and the .ibd file is missing; then
 				we must return in ha_innodb.cc an error if the
 				user tries to query such an orphaned table */
-	unsigned	tablespace_discarded:1;
-				/*!< this flag is set TRUE when the user
-				calls DISCARD TABLESPACE on this
-				table, and reset to FALSE in IMPORT
-				TABLESPACE */
 	unsigned	cached:1;/*!< TRUE if the table object has been added
 				to the dictionary cache */
+	unsigned	to_be_dropped:1;
+				/*!< TRUE if the table is to be dropped, but
+				not yet actually dropped (could in the bk
+				drop list); It is turned on at the beginning
+				of row_drop_table_for_mysql() and turned off
+				just before we start to update system tables
+				for the drop. It is protected by
+				dict_operation_lock */
 	unsigned	n_def:10;/*!< number of columns defined so far */
 	unsigned	n_cols:10;/*!< number of columns */
 	unsigned	can_be_evicted:1;
@@ -626,6 +758,10 @@ struct dict_table_struct{
 				or a table that has no FK relationships */
 	unsigned	corrupted:1;
 				/*!< TRUE if table is corrupted */
+	unsigned	drop_aborted:1;
+				/*!< TRUE if some indexes should be dropped
+				after ONLINE_INDEX_ABORTED
+				or ONLINE_INDEX_ABORTED_DROPPED */
 	dict_col_t*	cols;	/*!< array of column descriptions */
 	const char*	col_names;
 				/*!< Column names packed in a character string
@@ -659,6 +795,12 @@ struct dict_table_struct{
 				on the table: we cannot drop the table while
 				there are foreign key checks running on
 				it! */
+	trx_id_t	def_trx_id;
+				/*!< transaction id that last touched
+				the table definition, either when
+				loading the definition or CREATE
+				TABLE, or ALTER TABLE (prepare,
+				commit, and rollback phases) */
 	trx_id_t	query_cache_inv_trx_id;
 				/*!< transactions whose trx id is
 				smaller than this number are not
@@ -691,7 +833,55 @@ struct dict_table_struct{
 	unsigned	stat_initialized:1; /*!< TRUE if statistics have
 				been calculated the first time
 				after database startup or table creation */
-	ib_int64_t	stat_n_rows;
+	ib_time_t	stats_last_recalc;
+				/*!< Timestamp of last recalc of the stats */
+	ib_uint32_t	stat_persistent;
+				/*!< The two bits below are set in the
+				::stat_persistent member and have the following
+				meaning:
+				1. _ON=0, _OFF=0, no explicit persistent stats
+				setting for this table, the value of the global
+				srv_stats_persistent is used to determine
+				whether the table has persistent stats enabled
+				or not
+				2. _ON=0, _OFF=1, persistent stats are
+				explicitly disabled for this table, regardless
+				of the value of the global srv_stats_persistent
+				3. _ON=1, _OFF=0, persistent stats are
+				explicitly enabled for this table, regardless
+				of the value of the global srv_stats_persistent
+				4. _ON=1, _OFF=1, not allowed, we assert if
+				this ever happens. */
+#define DICT_STATS_PERSISTENT_ON	(1 << 1)
+#define DICT_STATS_PERSISTENT_OFF	(1 << 2)
+	ib_uint32_t	stats_auto_recalc;
+				/*!< The two bits below are set in the
+				::stats_auto_recalc member and have
+				the following meaning:
+				1. _ON=0, _OFF=0, no explicit auto recalc
+				setting for this table, the value of the global
+				srv_stats_persistent_auto_recalc is used to
+				determine whether the table has auto recalc
+				enabled or not
+				2. _ON=0, _OFF=1, auto recalc is explicitly
+				disabled for this table, regardless of the
+				value of the global
+				srv_stats_persistent_auto_recalc
+				3. _ON=1, _OFF=0, auto recalc is explicitly
+				enabled for this table, regardless of the
+				value of the global
+				srv_stats_persistent_auto_recalc
+				4. _ON=1, _OFF=1, not allowed, we assert if
+				this ever happens. */
+#define DICT_STATS_AUTO_RECALC_ON	(1 << 1)
+#define DICT_STATS_AUTO_RECALC_OFF	(1 << 2)
+	ulint		stats_sample_pages;
+				/*!< the number of pages to sample for this
+				table during persistent stats estimation;
+				if this is 0, then the value of the global
+				srv_stats_persistent_sample_pages will be
+				used instead. */
+	ib_uint64_t	stat_n_rows;
 				/*!< approximate number of rows in the table;
 				we periodically calculate new estimates */
 	ulint		stat_clustered_index_size;
@@ -699,19 +889,34 @@ struct dict_table_struct{
 				database pages */
 	ulint		stat_sum_of_other_index_sizes;
 				/*!< other indexes in database pages */
-	ulint		stat_modified_counter;
+	ib_uint64_t	stat_modified_counter;
 				/*!< when a row is inserted, updated,
 				or deleted,
 				we add 1 to this number; we calculate new
 				estimates for the stat_... values for the
-				table and the indexes at an interval of 2 GB
-				or when about 1 / 16 of table has been
-				modified; also when the estimate operation is
+				table and the indexes when about 1 / 16 of
+				table has been modified;
+				also when the estimate operation is
 				called for MySQL SHOW TABLE STATUS; the
 				counter is reset to zero at statistics
 				calculation; this counter is not protected by
 				any latch, because this is only used for
 				heuristics */
+#define BG_STAT_NONE		0
+#define BG_STAT_IN_PROGRESS	(1 << 0)
+				/*!< BG_STAT_IN_PROGRESS is set in
+				stats_bg_flag when the background
+				stats code is working on this table. The DROP
+				TABLE code waits for this to be cleared
+				before proceeding. */
+#define BG_STAT_SHOULD_QUIT	(1 << 1)
+				/*!< BG_STAT_SHOULD_QUIT is set in
+				stats_bg_flag when DROP TABLE starts
+				waiting on BG_STAT_IN_PROGRESS to be cleared,
+				the background stats thread will detect this
+				and will eventually quit sooner */
+	byte		stats_bg_flag;
+				/*!< see BG_STAT_* above */
 				/* @} */
 	/*----------------------*/
 				/**!< The following fields are used by the
@@ -737,7 +942,7 @@ struct dict_table_struct{
 				space from the lock heap of the trx:
 				otherwise the lock heap would grow rapidly
 				if we do a large insert from a select */
-	mutex_t		autoinc_mutex;
+	ib_mutex_t		autoinc_mutex;
 				/*!< mutex protecting the autoincrement
 				counter */
 	ib_uint64_t	autoinc;/*!< autoinc counter value to give to the
@@ -758,6 +963,14 @@ struct dict_table_struct{
 	fts_t*		fts;	/* FTS specific state variables */
 				/* @} */
 	/*----------------------*/
+
+	ib_quiesce_t	 quiesce;/*!< Quiescing states, protected by the
+				dict_index_t::lock. ie. we can only change
+				the state if we acquire all the latches
+				(dict_index_t::lock) in X mode of this table's
+				indexes. */
+
+	/*----------------------*/
 	ulint		n_rec_locks;
 				/*!< Count of the number of record locks on
 				this table. We use this to determine whether
@@ -776,7 +989,7 @@ struct dict_table_struct{
 
 #ifdef UNIV_DEBUG
 	ulint		magic_n;/*!< magic number */
-/** Value of dict_table_struct::magic_n */
+/** Value of dict_table_t::magic_n */
 # define DICT_TABLE_MAGIC_N	76333786
 #endif /* UNIV_DEBUG */
 };
diff --git a/storage/innobase/include/dict0stats.h b/storage/innobase/include/dict0stats.h
index 879e67a0918..186f90e3694 100644
--- a/storage/innobase/include/dict0stats.h
+++ b/storage/innobase/include/dict0stats.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2009, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2009, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -32,61 +32,128 @@ Created Jan 06, 2010 Vasil Dimov
 #include "dict0types.h"
 #include "trx0types.h"
 
-enum dict_stats_upd_option {
+enum dict_stats_upd_option_t {
 	DICT_STATS_RECALC_PERSISTENT,/* (re) calculate the
 				statistics using a precise and slow
 				algo and save them to the persistent
 				storage, if the persistent storage is
 				not present then emit a warning and
 				fall back to transient stats */
-	DICT_STATS_RECALC_PERSISTENT_SILENT,/* same as
-				DICT_STATS_RECALC_PERSISTENT
-				but do not emit a warning */
 	DICT_STATS_RECALC_TRANSIENT,/* (re) calculate the statistics
 				using an imprecise quick algo
 				without saving the results
 				persistently */
-	DICT_STATS_FETCH,	/* fetch the statistics from the
-				persistent storage */
-	DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY /* only fetch the stats
+	DICT_STATS_EMPTY_TABLE,	/* Write all zeros (or 1 where it makes sense)
+				into a table and its indexes' statistics
+				members. The resulting stats correspond to an
+				empty table. If the table is using persistent
+				statistics, then they are saved on disk. */
+	DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY /* fetch the stats
 				from the persistent storage if the in-memory
 				structures have not been initialized yet,
 				otherwise do nothing */
 };
 
-typedef enum dict_stats_upd_option	dict_stats_upd_option_t;
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. This function
+is relatively quick and is used to calculate transient statistics that
+are not saved on disk.
+This was the only way to calculate statistics before the
+Persistent Statistics feature was introduced. */
+UNIV_INTERN
+void
+dict_stats_update_transient(
+/*========================*/
+	dict_table_t*	table);	/*!< in/out: table */
+
+/*********************************************************************//**
+Set the persistent statistics flag for a given table. This is set only
+in the in-memory table object and is not saved on disk. It will be read
+from the .frm file upon first open from MySQL after a server restart. */
+UNIV_INLINE
+void
+dict_stats_set_persistent(
+/*======================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	ibool		ps_on,	/*!< in: persistent stats explicitly enabled */
+	ibool		ps_off)	/*!< in: persistent stats explicitly disabled */
+	__attribute__((nonnull));
+
+/*********************************************************************//**
+Check whether persistent statistics is enabled for a given table.
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_is_persistent_enabled(
+/*=============================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
+Set the auto recalc flag for a given table (only honored for a persistent
+stats enabled table). The flag is set only in the in-memory table object
+and is not saved in InnoDB files. It will be read from the .frm file upon
+first open from MySQL after a server restart. */
+UNIV_INLINE
+void
+dict_stats_auto_recalc_set(
+/*=======================*/
+	dict_table_t*	table,			/*!< in/out: table */
+	ibool		auto_recalc_on,		/*!< in: explicitly enabled */
+	ibool		auto_recalc_off);	/*!< in: explicitly disabled */
+
+/*********************************************************************//**
+Check whether auto recalc is enabled for a given table.
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_auto_recalc_is_enabled(
+/*==============================*/
+	const dict_table_t*	table);	/*!< in: table */
+
+/*********************************************************************//**
+Initialize table's stats for the first time when opening a table. */
+UNIV_INLINE
+void
+dict_stats_init(
+/*============*/
+	dict_table_t*	table);	/*!< in/out: table */
+
+/*********************************************************************//**
+Deinitialize table's stats after the last close of the table. This is
+used to detect "FLUSH TABLE" and refresh the stats upon next open. */
+UNIV_INLINE
+void
+dict_stats_deinit(
+/*==============*/
+	dict_table_t*	table)	/*!< in/out: table */
+	__attribute__((nonnull));
 
 /*********************************************************************//**
 Calculates new estimates for table and index statistics. The statistics
 are used in query optimization.
 @return DB_* error code or DB_SUCCESS */
 UNIV_INTERN
-enum db_err
+dberr_t
 dict_stats_update(
 /*==============*/
 	dict_table_t*		table,	/*!< in/out: table */
-	dict_stats_upd_option_t	stats_upd_option,
+	dict_stats_upd_option_t	stats_upd_option);
 					/*!< in: whether to (re) calc
 					the stats or to fetch them from
 					the persistent storage */
-	ibool			caller_has_dict_sys_mutex);
-					/*!< in: TRUE if the caller
-					owns dict_sys->mutex */
 
 /*********************************************************************//**
 Removes the information for a particular index's stats from the persistent
 storage if it exists and if there is data stored for this index.
-The transaction is not committed, it must not be committed in this
-function because this is the user trx that is running DROP INDEX.
-The transaction will be committed at the very end when dropping an
-index.
+This function creates its own trx and commits it.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-enum db_err
-dict_stats_delete_index_stats(
-/*==========================*/
-	dict_index_t*	index,	/*!< in: index */
-	trx_t*		trx,	/*!< in: transaction to use */
+dberr_t
+dict_stats_drop_index(
+/*==================*/
+	const char*	tname,	/*!< in: table name */
+	const char*	iname,	/*!< in: index name */
 	char*		errstr, /*!< out: error message if != DB_SUCCESS
 				is returned */
 	ulint		errstr_sz);/*!< in: size of the errstr buffer */
@@ -97,12 +164,39 @@ persistent storage if it exists and if there is data stored for the table.
 This function creates its own transaction and commits it.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-enum db_err
-dict_stats_delete_table_stats(
-/*==========================*/
+dberr_t
+dict_stats_drop_table(
+/*==================*/
 	const char*	table_name,	/*!< in: table name */
 	char*		errstr,		/*!< out: error message
 					if != DB_SUCCESS is returned */
 	ulint		errstr_sz);	/*!< in: size of errstr buffer */
 
+/*********************************************************************//**
+Fetches or calculates new estimates for index statistics. */
+UNIV_INTERN
+void
+dict_stats_update_for_index(
+/*========================*/
+	dict_index_t*	index)	/*!< in/out: index */
+	__attribute__((nonnull));
+
+/*********************************************************************//**
+Renames a table in InnoDB persistent stats storage.
+This function creates its own transaction and commits it.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_stats_rename_table(
+/*====================*/
+	const char*	old_name,	/*!< in: old table name */
+	const char*	new_name,	/*!< in: new table name */
+	char*		errstr,		/*!< out: error string if != DB_SUCCESS
+					is returned */
+	size_t		errstr_sz);	/*!< in: errstr size */
+
+#ifndef UNIV_NONINL
+#include "dict0stats.ic"
+#endif
+
 #endif /* dict0stats_h */
diff --git a/storage/innobase/include/dict0stats.ic b/storage/innobase/include/dict0stats.ic
new file mode 100644
index 00000000000..04763f174d0
--- /dev/null
+++ b/storage/innobase/include/dict0stats.ic
@@ -0,0 +1,250 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0stats.ic
+Code used for calculating and manipulating table statistics.
+
+Created Jan 23, 2012 Vasil Dimov
+*******************************************************/
+
+#include "univ.i"
+#include "dict0dict.h" /* dict_table_stats_lock() */
+#include "dict0types.h" /* dict_table_t */
+#include "srv0srv.h" /* srv_stats_persistent, srv_stats_auto_recalc */
+
+/*********************************************************************//**
+Set the persistent statistics flag for a given table. This is set only
+in the in-memory table object and is not saved on disk. It will be read
+from the .frm file upon first open from MySQL after a server restart.
+dict_stats_set_persistent() @{ */
+UNIV_INLINE
+void
+dict_stats_set_persistent(
+/*======================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	ibool		ps_on,	/*!< in: persistent stats explicitly enabled */
+	ibool		ps_off)	/*!< in: persistent stats explicitly disabled */
+{
+	/* Not allowed to have both flags set, but a CREATE or ALTER
+	statement that contains "STATS_PERSISTENT=0 STATS_PERSISTENT=1" would
+	end up having both set. In this case we clear the OFF flag. */
+	if (ps_on && ps_off) {
+		ps_off = FALSE;
+	}
+
+	ib_uint32_t	stat_persistent = 0;
+
+	if (ps_on) {
+		stat_persistent |= DICT_STATS_PERSISTENT_ON;
+	}
+
+	if (ps_off) {
+		stat_persistent |= DICT_STATS_PERSISTENT_OFF;
+	}
+
+	/* we rely on this assignment to be atomic */
+	table->stat_persistent = stat_persistent;
+}
+/* @} */
+
+/*********************************************************************//**
+Check whether persistent statistics is enabled for a given table.
+dict_stats_is_persistent_enabled() @{
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_is_persistent_enabled(
+/*=============================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	/* Because of the nature of this check (non-locking) it is possible
+	that a table becomes:
+	* PS-disabled immediately after this function has returned TRUE or
+	* PS-enabled immediately after this function has returned FALSE.
+	This means that it is possible that we do:
+	+ dict_stats_update(DICT_STATS_RECALC_PERSISTENT) on a table that has
+	  just been PS-disabled or
+	+ dict_stats_update(DICT_STATS_RECALC_TRANSIENT) on a table that has
+	  just been PS-enabled.
+	This is acceptable. Avoiding this would mean that we would have to
+	protect the ::stat_persistent with dict_table_stats_lock() like the
+	other ::stat_ members which would be too big performance penalty,
+	especially when this function is called from
+	row_update_statistics_if_needed(). */
+
+	/* we rely on this read to be atomic */
+	ib_uint32_t	stat_persistent = table->stat_persistent;
+
+	if (stat_persistent & DICT_STATS_PERSISTENT_ON) {
+		ut_ad(!(stat_persistent & DICT_STATS_PERSISTENT_OFF));
+		return(TRUE);
+	} else if (stat_persistent & DICT_STATS_PERSISTENT_OFF) {
+		return(FALSE);
+	} else {
+		return(srv_stats_persistent);
+	}
+}
+/* @} */
+
+/*********************************************************************//**
+Set the auto recalc flag for a given table (only honored for a persistent
+stats enabled table). The flag is set only in the in-memory table object
+and is not saved in InnoDB files. It will be read from the .frm file upon
+first open from MySQL after a server restart.
+dict_stats_auto_recalc_set() @{ */
+UNIV_INLINE
+void
+dict_stats_auto_recalc_set(
+/*=======================*/
+	dict_table_t*	table,			/*!< in/out: table */
+	ibool		auto_recalc_on,		/*!< in: explicitly enabled */
+	ibool		auto_recalc_off)	/*!< in: explicitly disabled */
+{
+	ut_ad(!auto_recalc_on || !auto_recalc_off);
+
+	ib_uint32_t	stats_auto_recalc = 0;
+
+	if (auto_recalc_on) {
+		stats_auto_recalc |= DICT_STATS_AUTO_RECALC_ON;
+	}
+
+	if (auto_recalc_off) {
+		stats_auto_recalc |= DICT_STATS_AUTO_RECALC_OFF;
+	}
+
+	/* we rely on this assignment to be atomic */
+	table->stats_auto_recalc = stats_auto_recalc;
+}
+/* @} */
+
+/*********************************************************************//**
+Check whether auto recalc is enabled for a given table.
+dict_stats_auto_recalc_is_enabled() @{
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_auto_recalc_is_enabled(
+/*==============================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	/* we rely on this read to be atomic */
+	ib_uint32_t	stats_auto_recalc = table->stats_auto_recalc;
+
+	if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_ON) {
+		ut_ad(!(stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF));
+		return(TRUE);
+	} else if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF) {
+		return(FALSE);
+	} else {
+		return(srv_stats_auto_recalc);
+	}
+}
+/* @} */
+
+/*********************************************************************//**
+Initialize table's stats for the first time when opening a table.
+dict_stats_init() @{ */
+UNIV_INLINE
+void
+dict_stats_init(
+/*============*/
+	dict_table_t*	table)	/*!< in/out: table */
+{
+	ut_ad(!mutex_own(&dict_sys->mutex));
+
+	if (table->stat_initialized) {
+		return;
+	}
+
+	dict_stats_upd_option_t	opt;
+
+	if (dict_stats_is_persistent_enabled(table)) {
+		opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
+	} else {
+		opt = DICT_STATS_RECALC_TRANSIENT;
+	}
+
+	dict_stats_update(table, opt);
+}
+/* @} */
+
+/*********************************************************************//**
+Deinitialize table's stats after the last close of the table. This is
+used to detect "FLUSH TABLE" and refresh the stats upon next open.
+dict_stats_deinit() @{ */
+UNIV_INLINE
+void
+dict_stats_deinit(
+/*==============*/
+	dict_table_t*	table)	/*!< in/out: table */
+{
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	ut_a(table->n_ref_count == 0);
+
+	dict_table_stats_lock(table, RW_X_LATCH);
+
+	if (!table->stat_initialized) {
+		dict_table_stats_unlock(table, RW_X_LATCH);
+		return;
+	}
+
+	table->stat_initialized = FALSE;
+
+#ifdef UNIV_DEBUG_VALGRIND
+	UNIV_MEM_INVALID(&table->stat_n_rows,
+			 sizeof(table->stat_n_rows));
+	UNIV_MEM_INVALID(&table->stat_clustered_index_size,
+			 sizeof(table->stat_clustered_index_size));
+	UNIV_MEM_INVALID(&table->stat_sum_of_other_index_sizes,
+			 sizeof(table->stat_sum_of_other_index_sizes));
+	UNIV_MEM_INVALID(&table->stat_modified_counter,
+			 sizeof(table->stat_modified_counter));
+
+	dict_index_t*   index;
+
+	for (index = dict_table_get_first_index(table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+
+		ulint	n_uniq = dict_index_get_n_unique(index);
+
+		UNIV_MEM_INVALID(
+			index->stat_n_diff_key_vals,
+			n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
+		UNIV_MEM_INVALID(
+			index->stat_n_sample_sizes,
+			n_uniq * sizeof(index->stat_n_sample_sizes[0]));
+		UNIV_MEM_INVALID(
+			index->stat_n_non_null_key_vals,
+			n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
+		UNIV_MEM_INVALID(
+			&index->stat_index_size,
+			sizeof(index->stat_index_size));
+		UNIV_MEM_INVALID(
+			&index->stat_n_leaf_pages,
+			sizeof(index->stat_n_leaf_pages));
+	}
+#endif /* UNIV_DEBUG_VALGRIND */
+
+	dict_table_stats_unlock(table, RW_X_LATCH);
+}
+/* @} */
+
+/* vim: set foldmethod=marker foldmarker=@{,@}: */
diff --git a/storage/innobase/include/dict0stats_bg.h b/storage/innobase/include/dict0stats_bg.h
new file mode 100644
index 00000000000..dd85088c7ba
--- /dev/null
+++ b/storage/innobase/include/dict0stats_bg.h
@@ -0,0 +1,116 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0stats_bg.h
+Code used for background table and index stats gathering.
+
+Created Apr 26, 2012 Vasil Dimov
+*******************************************************/
+
+#ifndef dict0stats_bg_h
+#define dict0stats_bg_h
+
+#include "univ.i"
+
+#include "dict0types.h" /* dict_table_t, table_id_t */
+#include "os0sync.h" /* os_event_t */
+#include "os0thread.h" /* DECLARE_THREAD */
+
+/** Event to wake up the stats thread */
+extern os_event_t	dict_stats_event;
+
+/*****************************************************************//**
+Add a table to the recalc pool, which is processed by the
+background stats gathering thread. Only the table id is added to the
+list, so the table can be closed after being enqueued and it will be
+opened when needed. If the table does not exist later (has been DROPped),
+then it will be removed from the pool and skipped.
+dict_stats_recalc_pool_add() @{ */
+UNIV_INTERN
+void
+dict_stats_recalc_pool_add(
+/*=======================*/
+	const dict_table_t*	table);	/*!< in: table to add */
+/* @} */
+
+/*****************************************************************//**
+Delete a given table from the auto recalc pool.
+dict_stats_recalc_pool_del() */
+UNIV_INTERN
+void
+dict_stats_recalc_pool_del(
+/*=======================*/
+	const dict_table_t*	table);	/*!< in: table to remove */
+/* @} */
+
+/*****************************************************************//**
+Wait until background stats thread has stopped using the specified table(s).
+The caller must have locked the data dictionary using
+row_mysql_lock_data_dictionary() and this function may unlock it temporarily
+and restore the lock before it exits.
+The background stats thead is guaranteed not to start using the specified
+tables after this function returns and before the caller unlocks the data
+dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
+under dict_sys->mutex.
+dict_stats_wait_bg_to_stop_using_table() @{ */
+UNIV_INTERN
+void
+dict_stats_wait_bg_to_stop_using_tables(
+/*====================================*/
+	dict_table_t*	table1,	/*!< in/out: table1 */
+	dict_table_t*	table2,	/*!< in/out: table2, could be NULL */
+	trx_t*		trx);	/*!< in/out: transaction to use for
+				unlocking/locking the data dict */
+/* @} */
+
+/*****************************************************************//**
+Initialize global variables needed for the operation of dict_stats_thread().
+Must be called before dict_stats_thread() is started.
+dict_stats_thread_init() @{ */
+UNIV_INTERN
+void
+dict_stats_thread_init();
+/*====================*/
+/* @} */
+
+/*****************************************************************//**
+Free resources allocated by dict_stats_thread_init(), must be called
+after dict_stats_thread() has exited.
+dict_stats_thread_deinit() @{ */
+UNIV_INTERN
+void
+dict_stats_thread_deinit();
+/*======================*/
+/* @} */
+
+/*****************************************************************//**
+This is the thread for background stats gathering. It pops tables, from
+the auto recalc list and proceeds them, eventually recalculating their
+statistics.
+dict_stats_thread() @{
+@return this function does not return, it calls os_thread_exit() */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(dict_stats_thread)(
+/*==============================*/
+	void*	arg);	/*!< in: a dummy parameter
+			required by os_thread_create */
+/* @} */
+
+#endif /* dict0stats_bg_h */
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
index cd2863582c1..b7f7c2d9df9 100644
--- a/storage/innobase/include/dict0types.h
+++ b/storage/innobase/include/dict0types.h
@@ -26,15 +26,15 @@ Created 1/8/1996 Heikki Tuuri
 #ifndef dict0types_h
 #define dict0types_h
 
-typedef struct dict_sys_struct		dict_sys_t;
-typedef struct dict_col_struct		dict_col_t;
-typedef struct dict_field_struct	dict_field_t;
-typedef struct dict_index_struct	dict_index_t;
-typedef struct dict_table_struct	dict_table_t;
-typedef struct dict_foreign_struct	dict_foreign_t;
+struct dict_sys_t;
+struct dict_col_t;
+struct dict_field_t;
+struct dict_index_t;
+struct dict_table_t;
+struct dict_foreign_t;
 
-typedef struct ind_node_struct		ind_node_t;
-typedef struct tab_node_struct		tab_node_t;
+struct ind_node_t;
+struct tab_node_t;
 
 /* Space id and page no where the dictionary header resides */
 #define	DICT_HDR_SPACE		0	/* the SYSTEM tablespace */
@@ -52,7 +52,7 @@ the table and index will be marked as "corrupted", and caller will
 be responsible to deal with corrupted table or index.
 Note: please define the IGNORE_ERR_* as bits, so their value can
 be or-ed together */
-enum dict_err_ignore {
+enum dict_err_ignore_t {
 	DICT_ERR_IGNORE_NONE = 0,	/*!< no error to ignore */
 	DICT_ERR_IGNORE_INDEX_ROOT = 1,	/*!< ignore error if index root
 					page is FIL_NULL or incorrect value */
@@ -60,6 +60,11 @@ enum dict_err_ignore {
 	DICT_ERR_IGNORE_ALL = 0xFFFF	/*!< ignore all errors */
 };
 
-typedef enum dict_err_ignore		dict_err_ignore_t;
+/** Quiescing states for flushing tables to disk. */
+enum ib_quiesce_t {
+	QUIESCE_NONE,
+	QUIESCE_START,			/*!< Initialise, prepare to start */
+	QUIESCE_COMPLETE		/*!< All done */
+};
 
 #endif
diff --git a/storage/innobase/include/dyn0dyn.h b/storage/innobase/include/dyn0dyn.h
index 5e69cb13122..ffb4f270d0e 100644
--- a/storage/innobase/include/dyn0dyn.h
+++ b/storage/innobase/include/dyn0dyn.h
@@ -31,10 +31,9 @@ Created 2/5/1996 Heikki Tuuri
 #include "mem0mem.h"
 
 /** A block in a dynamically allocated array */
-typedef struct dyn_block_struct		dyn_block_t;
+struct dyn_block_t;
 /** Dynamically allocated array */
-typedef dyn_block_t			dyn_array_t;
-
+typedef dyn_block_t		dyn_array_t;
 
 /** This is the initial 'payload' size of a dynamic array;
 this must be > MLOG_BUF_MARGIN + 30! */
@@ -159,7 +158,7 @@ dyn_push_string(
 /** @brief A block in a dynamically allocated array.
 NOTE! Do not access the fields of the struct directly: the definition
 appears here only for the compiler to know its size! */
-struct dyn_block_struct{
+struct dyn_block_t{
 	mem_heap_t*	heap;	/*!< in the first block this is != NULL
 				if dynamic allocation has been needed */
 	ulint		used;	/*!< number of data bytes used in this block;
diff --git a/storage/innobase/include/dyn0dyn.ic b/storage/innobase/include/dyn0dyn.ic
index b86697d6865..39254e632a8 100644
--- a/storage/innobase/include/dyn0dyn.ic
+++ b/storage/innobase/include/dyn0dyn.ic
@@ -23,9 +23,9 @@ The dynamically allocated array
 Created 2/5/1996 Heikki Tuuri
 *******************************************************/
 
-/** Value of dyn_block_struct::magic_n */
+/** Value of dyn_block_t::magic_n */
 #define DYN_BLOCK_MAGIC_N	375767
-/** Flag for dyn_block_struct::used that indicates a full block */
+/** Flag for dyn_block_t::used that indicates a full block */
 #define DYN_BLOCK_FULL_FLAG	0x1000000UL
 
 /************************************************************//**
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 1e2b8049860..56fda8b39b1 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -39,6 +39,14 @@ Created 10/25/1995 Heikki Tuuri
 #include "log0log.h"
 #endif /* !UNIV_HOTBACKUP */
 
+#include <list>
+
+// Forward declaration
+struct trx_t;
+struct fil_space_t;
+
+typedef std::list<const char*> space_name_list_t;
+
 /** When mysqld is run, the default directory "." is the mysqld datadir,
 but in the MySQL Embedded Server Library and ibbackup it is not the default
 directory, and we must set the base file path explicitly */
@@ -61,12 +69,8 @@ typedef	byte	fil_faddr_t;	/*!< 'type' definition in C: an address
 
 #define	FIL_ADDR_SIZE	6	/* address size is 6 bytes */
 
-/** A struct for storing a space address FIL_ADDR, when it is used
-in C program data structures. */
-
-typedef struct fil_addr_struct	fil_addr_t;
 /** File space address */
-struct fil_addr_struct{
+struct fil_addr_t{
 	ulint	page;		/*!< page number within a space */
 	ulint	boffset;	/*!< byte offset within the page */
 };
@@ -200,17 +204,19 @@ fil_space_get_type(
 	ulint	id);	/*!< in: space id */
 #endif /* !UNIV_HOTBACKUP */
 /*******************************************************************//**
-Appends a new file to the chain of files of a space. File must be closed. */
+Appends a new file to the chain of files of a space. File must be closed.
+@return pointer to the file name, or NULL on error */
 UNIV_INTERN
-void
+char*
 fil_node_create(
 /*============*/
 	const char*	name,	/*!< in: file name (file must be closed) */
 	ulint		size,	/*!< in: file size in database blocks, rounded
 				downwards to an integer */
 	ulint		id,	/*!< in: space id where to append */
-	ibool		is_raw);/*!< in: TRUE if a raw device or
+	ibool		is_raw)	/*!< in: TRUE if a raw device or
 				a raw disk partition */
+	__attribute__((nonnull, warn_unused_result));
 #ifdef UNIV_LOG_ARCHIVE
 /****************************************************************//**
 Drops files from the start of a file space, so that its size is cut by
@@ -248,6 +254,16 @@ fil_assign_new_space_id(
 /*====================*/
 	ulint*	space_id);	/*!< in/out: space id */
 /*******************************************************************//**
+Returns the path from the first fil_node_t found for the space ID sent.
+The caller is responsible for freeing the memory allocated here for the
+value returned.
+@return	a copy of fil_node_t::path, NULL if space is zero or not found. */
+UNIV_INTERN
+char*
+fil_space_get_first_path(
+/*=====================*/
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
 Returns the size of the space in pages. The tablespace must be cached in the
 memory cache.
 @return	space size, 0 if space not found */
@@ -316,6 +332,14 @@ void
 fil_close_all_files(void);
 /*=====================*/
 /*******************************************************************//**
+Closes the redo log files. There must not be any pending i/o's or not
+flushed modifications in the files. */
+UNIV_INTERN
+void
+fil_close_log_files(
+/*================*/
+	bool	free);	/*!< in: whether to free the memory object */
+/*******************************************************************//**
 Sets the max tablespace id counter if the given number is bigger than the
 previous value. */
 UNIV_INTERN
@@ -329,7 +353,7 @@ Writes the flushed lsn and the latest archived log number to the page
 header of the first page of each data file in the system tablespace.
 @return	DB_SUCCESS or error number */
 UNIV_INTERN
-ulint
+dberr_t
 fil_write_flushed_lsn_to_data_files(
 /*================================*/
 	lsn_t	lsn,		/*!< in: lsn to write */
@@ -346,6 +370,7 @@ fil_read_first_page(
 						parameters below already
 						contain sensible data */
 	ulint*		flags,			/*!< out: tablespace flags */
+	ulint*		space_id,		/*!< out: tablespace ID */
 #ifdef UNIV_LOG_ARCHIVE
 	ulint*		min_arch_log_no,	/*!< out: min of archived
 						log numbers in data files */
@@ -405,25 +430,44 @@ Deletes a single-table tablespace. The tablespace must be cached in the
 memory cache.
 @return	TRUE if success */
 UNIV_INTERN
-ibool
+dberr_t
 fil_delete_tablespace(
 /*==================*/
+	ulint		id,		/*!< in: space id */
+	buf_remove_t	buf_remove);	/*!< in: specify the action to take
+					on the tables pages in the buffer
+					pool */
+/*******************************************************************//**
+Closes a single-table tablespace. The tablespace must be cached in the
+memory cache. Free all pages used by the tablespace.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+dberr_t
+fil_close_tablespace(
+/*=================*/
+	trx_t*	trx,	/*!< in/out: Transaction covering the close */
 	ulint	id);	/*!< in: space id */
 #ifndef UNIV_HOTBACKUP
 /*******************************************************************//**
 Discards a single-table tablespace. The tablespace must be cached in the
 memory cache. Discarding is like deleting a tablespace, but
-1) we do not drop the table from the data dictionary;
-2) we remove all insert buffer entries for the tablespace immediately; in DROP
-TABLE they are only removed gradually in the background;
-3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
-as it originally had.
-@return	TRUE if success */
+
+ 1. We do not drop the table from the data dictionary;
+
+ 2. We remove all insert buffer entries for the tablespace immediately;
+    in DROP TABLE they are only removed gradually in the background;
+
+ 3. When the user does IMPORT TABLESPACE, the tablespace will have the
+    same id as it originally had.
+
+ 4. Free all the pages in use by the tablespace if rename=TRUE.
+@return	DB_SUCCESS or error */
 UNIV_INTERN
-ibool
+dberr_t
 fil_discard_tablespace(
 /*===================*/
-	ulint	id);	/*!< in: space id */
+	ulint	id)	/*!< in: space id */
+	__attribute__((warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /*******************************************************************//**
 Renames a single-table tablespace. The tablespace must be cached in the
@@ -433,16 +477,70 @@ UNIV_INTERN
 ibool
 fil_rename_tablespace(
 /*==================*/
-	const char*	old_name_in,	/*!< in: old table name in the standard
-					databasename/tablename format of
-					InnoDB, or NULL if we do the rename
-					based on the space id only */
+	const char*	old_name_in,	/*!< in: old table name in the
+					standard databasename/tablename
+					format of InnoDB, or NULL if we
+					do the rename based on the space
+					id only */
 	ulint		id,		/*!< in: space id */
-	const char*	new_name);	/*!< in: new table name in the standard
-					databasename/tablename format
-					of InnoDB */
+	const char*	new_name,	/*!< in: new table name in the
+					standard databasename/tablename
+					format of InnoDB */
+	const char*	new_path);	/*!< in: new full datafile path
+					if the tablespace is remotely
+					located, or NULL if it is located
+					in the normal data directory. */
 
 /*******************************************************************//**
+Allocates a file name for a single-table tablespace. The string must be freed
+by caller with mem_free().
+@return	own: file name */
+UNIV_INTERN
+char*
+fil_make_ibd_name(
+/*==============*/
+	const char*	name,		/*!< in: table name or a dir path */
+	bool		is_full_path);	/*!< in: TRUE if it is a dir path */
+/*******************************************************************//**
+Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
+The string must be freed by caller with mem_free().
+@return	own: file name */
+UNIV_INTERN
+char*
+fil_make_isl_name(
+/*==============*/
+	const char*	name);	/*!< in: table name */
+/*******************************************************************//**
+Creates a new InnoDB Symbolic Link (ISL) file.  It is always created
+under the 'datadir' of MySQL. The datadir is the directory of a
+running mysqld program. We can refer to it by simply using the path '.'.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_create_link_file(
+/*=================*/
+	const char*	tablename,	/*!< in: tablename */
+	const char*	filepath);	/*!< in: pathname of tablespace */
+/*******************************************************************//**
+Deletes an InnoDB Symbolic Link (ISL) file. */
+UNIV_INTERN
+void
+fil_delete_link_file(
+/*==================*/
+	const char*	tablename);	/*!< in: name of table */
+/*******************************************************************//**
+Reads an InnoDB Symbolic Link (ISL) file.
+It is always created under the 'datadir' of MySQL.  The name is of the
+form {databasename}/{tablename}. and the isl file is expected to be in a
+'{databasename}' directory called '{tablename}.isl'. The caller must free
+the memory of the null-terminated path returned if it is not null.
+@return	own: filepath found in link file, NULL if not found. */
+UNIV_INTERN
+char*
+fil_read_link_file(
+/*===============*/
+	const char*	name);		/*!< in: tablespace name */
+/*******************************************************************//**
 Creates a new single-table tablespace to a database directory of MySQL.
 Database directories are under the 'datadir' of MySQL. The datadir is the
 directory of a running mysqld program. We can refer to it by simply the
@@ -450,21 +548,20 @@ path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
 dir of the mysqld server.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fil_create_new_single_table_tablespace(
 /*===================================*/
 	ulint		space_id,	/*!< in: space id */
 	const char*	tablename,	/*!< in: the table name in the usual
 					databasename/tablename format
-					of InnoDB, or a dir path to a temp
-					table */
-	ibool		is_temp,	/*!< in: TRUE if a table created with
-					CREATE TEMPORARY TABLE */
+					of InnoDB */
+	const char*	dir_path,	/*!< in: NULL or a dir path */
 	ulint		flags,		/*!< in: tablespace flags */
 	ulint		flags2,		/*!< in: table flags2 */
-	ulint		size);		/*!< in: the initial size of the
+	ulint		size)		/*!< in: the initial size of the
 					tablespace file in pages,
 					must be >= FIL_IBD_FILE_INITIAL_SIZE */
+	__attribute__((nonnull, warn_unused_result));
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
 Tries to open a single-table tablespace and optionally checks the space id is
@@ -475,41 +572,31 @@ NOTE that we assume this operation is used either at the database startup
 or under the protection of the dictionary mutex, so that two users cannot
 race here. This operation does not leave the file associated with the
 tablespace open, but closes it after we have looked at the space id in it.
-@return	TRUE if success */
+
+If the validate boolean is set, we read the first page of the file and
+check that the space id in the file is what we expect. We assume that
+this function runs much faster if no check is made, since accessing the
+file inode probably is much faster (the OS caches them) than accessing
+the first page of the file.  This boolean may be initially FALSE, but if
+a remote tablespace is found it will be changed to true.
+
+If the fix_dict boolean is set, then it is safe to use an internal SQL
+statement to update the dictionary tables if they are incorrect.
+
+@return	DB_SUCCESS or error code */
 UNIV_INTERN
-ibool
+dberr_t
 fil_open_single_table_tablespace(
 /*=============================*/
-	ibool		check_space_id,	/*!< in: should we check that the space
-					id in the file is right; we assume
-					that this function runs much faster
-					if no check is made, since accessing
-					the file inode probably is much
-					faster (the OS caches them) than
-					accessing the first page of the file */
+	bool		validate,	/*!< in: Do we validate tablespace? */
+	bool		fix_dict,	/*!< in: Can we fix the dictionary? */
 	ulint		id,		/*!< in: space id */
 	ulint		flags,		/*!< in: tablespace flags */
-	const char*	name);		/*!< in: table name in the
-					databasename/tablename format */
-/********************************************************************//**
-It is possible, though very improbable, that the lsn's in the tablespace to be
-imported have risen above the current system lsn, if a lengthy purge, ibuf
-merge, or rollback was performed on a backup taken with ibbackup. If that is
-the case, reset page lsn's in the file. We assume that mysqld was shut down
-after it performed these cleanup operations on the .ibd file, so that it at
-the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
-first page of the .ibd file, and we can determine whether we need to reset the
-lsn's just by looking at that flush lsn.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-fil_reset_too_high_lsns(
-/*====================*/
-	const char*	name,		/*!< in: table name in the
+	const char*	tablename,	/*!< in: table name in the
 					databasename/tablename format */
-	lsn_t		current_lsn);	/*!< in: reset lsn's if the lsn stamped
-					to FIL_PAGE_FILE_FLUSH_LSN in the
-					first page is too high */
+	const char*	filepath)	/*!< in: tablespace filepath */
+	__attribute__((nonnull(5), warn_unused_result));
+
 #endif /* !UNIV_HOTBACKUP */
 /********************************************************************//**
 At the server startup, if we need crash recovery, scans the database
@@ -520,7 +607,7 @@ in the doublewrite buffer, also to know where to apply log records where the
 space id is != 0.
 @return	DB_SUCCESS or error number */
 UNIV_INTERN
-ulint
+dberr_t
 fil_load_single_table_tablespaces(void);
 /*===================================*/
 /*******************************************************************//**
@@ -562,11 +649,15 @@ fil_space_for_table_exists_in_mem(
 					data dictionary, so that
 					we can print a warning about orphaned
 					tablespaces */
-	ibool		print_error_if_does_not_exist);
+	ibool		print_error_if_does_not_exist,
 					/*!< in: print detailed error
 					information to the .err log if a
 					matching tablespace is not found from
 					memory */
+	bool		adjust_space,	/*!< in: whether to adjust space id
+					when find table space mismatch */
+	mem_heap_t*	heap,		/*!< in: heap memory */
+	table_id_t	table_id);	/*!< in: table id */
 #else /* !UNIV_HOTBACKUP */
 /********************************************************************//**
 Extends all tablespaces to the size stored in the space header. During the
@@ -625,7 +716,7 @@ Reads or writes data. This operation is asynchronous (aio).
 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
 i/o on a tablespace which does not exist */
 UNIV_INTERN
-ulint
+dberr_t
 fil_io(
 /*===*/
 	ulint	type,		/*!< in: OS_FILE_READ or OS_FILE_WRITE,
@@ -651,8 +742,9 @@ fil_io(
 	void*	buf,		/*!< in/out: buffer where to store read data
 				or from where to write; in aio this must be
 				appropriately aligned */
-	void*	message);	/*!< in: message for aio handler if non-sync
+	void*	message)	/*!< in: message for aio handler if non-sync
 				aio used, else ignored */
+	__attribute__((nonnull(8)));
 /**********************************************************************//**
 Waits for an aio operation to complete. This function is used to write the
 handler for completed requests. The aio array of pending requests is divided
@@ -739,8 +831,154 @@ fil_tablespace_is_being_deleted(
 /*============================*/
 	ulint		id);	/*!< in: space id */
 
-typedef	struct fil_space_struct	fil_space_t;
+/********************************************************************//**
+Delete the tablespace file and any related files like .cfg.
+This should not be called for temporary tables. */
+UNIV_INTERN
+void
+fil_delete_file(
+/*============*/
+	const char*	path);	/*!< in: filepath of the ibd tablespace */
+
+/** Callback functor. */
+struct PageCallback {
+
+	/**
+	Default constructor */
+	PageCallback()
+		:
+		m_zip_size(),
+		m_page_size(),
+		m_filepath() UNIV_NOTHROW {}
+
+	virtual ~PageCallback() UNIV_NOTHROW {}
+
+	/**
+	Called for page 0 in the tablespace file at the start.
+	@param file_size - size of the file in bytes
+	@param block - contents of the first page in the tablespace file
+	@retval DB_SUCCESS or error code.*/
+	virtual dberr_t init(
+		os_offset_t		file_size,
+		const buf_block_t*	block) UNIV_NOTHROW = 0;
+
+	/**
+	Called for every page in the tablespace. If the page was not
+	updated then its state must be set to BUF_PAGE_NOT_USED. For
+	compressed tables the page descriptor memory will be at offset:
+       		block->frame + UNIV_PAGE_SIZE;
+	@param offset - physical offset within the file
+	@param block - block read from file, note it is not from the buffer pool
+	@retval DB_SUCCESS or error code. */
+	virtual dberr_t operator()(
+		os_offset_t 	offset,
+		buf_block_t*	block) UNIV_NOTHROW = 0;
+
+	/**
+	Set the name of the physical file and the file handle that is used
+	to open it for the file that is being iterated over.
+	@param filename - then physical name of the tablespace file.
+	@param file - OS file handle */
+	void set_file(const char* filename, os_file_t file) UNIV_NOTHROW
+	{
+		m_file = file;
+		m_filepath = filename;
+	}
+
+	/**
+	@return the space id of the tablespace */
+	virtual ulint get_space_id() const UNIV_NOTHROW = 0;
+
+	/** The compressed page size
+	@return the compressed page size */
+	ulint get_zip_size() const
+	{
+		return(m_zip_size);
+	}
+
+	/**
+	Set the tablespace compressed table size.
+	@return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
+	dberr_t set_zip_size(const buf_frame_t* page) UNIV_NOTHROW;
+
+	/** The compressed page size
+	@return the compressed page size */
+	ulint get_page_size() const
+	{
+		return(m_page_size);
+	}
+
+	/** Compressed table page size */
+	ulint			m_zip_size;
+
+	/** The tablespace page size. */
+	ulint			m_page_size;
+
+	/** File handle to the tablespace */
+	os_file_t		m_file;
+
+	/** Physical file path. */
+	const char*		m_filepath;
+
+protected:
+	// Disable copying
+	PageCallback(const PageCallback&);
+	PageCallback& operator=(const PageCallback&);
+};
+
+/********************************************************************//**
+Iterate over all the pages in the tablespace.
+@param table - the table definiton in the server
+@param n_io_buffers - number of blocks to read and write together
+@param callback - functor that will do the page updates
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_tablespace_iterate(
+/*===================*/
+	dict_table_t*		table,
+	ulint			n_io_buffers,
+	PageCallback&		callback)
+	__attribute__((nonnull, warn_unused_result));
 
-#endif /* !UNIV_INNOCHECKSUM */
+/*******************************************************************//**
+Checks if a single-table tablespace for a given table name exists in the
+tablespace memory cache.
+@return	space id, ULINT_UNDEFINED if not found */
+UNIV_INTERN
+ulint
+fil_get_space_id_for_table(
+/*=======================*/
+	const char*	name);	/*!< in: table name in the standard
+				'databasename/tablename' format */
+
+/**
+Iterate over all the spaces in the space list and fetch the
+tablespace names. It will return a copy of the name that must be
+freed by the caller using: delete[].
+@return DB_SUCCESS if all OK. */
+UNIV_INTERN
+dberr_t
+fil_get_space_names(
+/*================*/
+	space_name_list_t&	space_name_list)
+				/*!< in/out: Vector for collecting the names. */
+	__attribute__((warn_unused_result));
 
-#endif
+/****************************************************************//**
+Generate redo logs for swapping two .ibd files */
+UNIV_INTERN
+void
+fil_mtr_rename_log(
+/*===============*/
+	ulint		old_space_id,	/*!< in: tablespace id of the old
+					table. */
+	const char*	old_name,	/*!< in: old table name */
+	ulint		new_space_id,	/*!< in: tablespace id of the new
+					table */
+	const char*	new_name,	/*!< in: new table name */
+	const char*	tmp_name);	/*!< in: temp table name used while
+					swapping */
+
+#endif /* !UNIV_INNOCHECKSUM */
+#endif /* fil0fil_h */
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index 994783c2db9..a587ccc9f20 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -50,11 +50,15 @@ to the two Barracuda row formats COMPRESSED and DYNAMIC. */
 #define FSP_FLAGS_WIDTH_ATOMIC_BLOBS	1
 /** Number of flag bits used to indicate the tablespace page size */
 #define FSP_FLAGS_WIDTH_PAGE_SSIZE	4
+/** Width of the DATA_DIR flag.  This flag indicates that the tablespace
+is found in a remote location, not the default data directory. */
+#define FSP_FLAGS_WIDTH_DATA_DIR	1
 /** Width of all the currently known tablespace flags */
 #define FSP_FLAGS_WIDTH		(FSP_FLAGS_WIDTH_POST_ANTELOPE	\
 				+ FSP_FLAGS_WIDTH_ZIP_SSIZE	\
 				+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS	\
-				+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
+				+ FSP_FLAGS_WIDTH_PAGE_SSIZE	\
+				+ FSP_FLAGS_WIDTH_DATA_DIR)
 
 /** A mask of all the known/used bits in tablespace flags */
 #define FSP_FLAGS_MASK		(~(~0 << FSP_FLAGS_WIDTH))
@@ -71,8 +75,11 @@ to the two Barracuda row formats COMPRESSED and DYNAMIC. */
 #define FSP_FLAGS_POS_PAGE_SSIZE	(FSP_FLAGS_POS_ATOMIC_BLOBS	\
 					+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
 /** Zero relative shift position of the start of the UNUSED bits */
-#define FSP_FLAGS_POS_UNUSED		(FSP_FLAGS_POS_PAGE_SSIZE	\
+#define FSP_FLAGS_POS_DATA_DIR		(FSP_FLAGS_POS_PAGE_SSIZE	\
 					+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
+/** Zero relative shift position of the start of the UNUSED bits */
+#define FSP_FLAGS_POS_UNUSED		(FSP_FLAGS_POS_DATA_DIR	\
+					+ FSP_FLAGS_WIDTH_DATA_DIR)
 
 /** Bit mask of the POST_ANTELOPE field */
 #define FSP_FLAGS_MASK_POST_ANTELOPE				\
@@ -90,6 +97,10 @@ to the two Barracuda row formats COMPRESSED and DYNAMIC. */
 #define FSP_FLAGS_MASK_PAGE_SSIZE				\
 		((~(~0 << FSP_FLAGS_WIDTH_PAGE_SSIZE))		\
 		<< FSP_FLAGS_POS_PAGE_SSIZE)
+/** Bit mask of the DATA_DIR field */
+#define FSP_FLAGS_MASK_DATA_DIR					\
+		((~(~0 << FSP_FLAGS_WIDTH_DATA_DIR))		\
+		<< FSP_FLAGS_POS_DATA_DIR)
 
 /** Return the value of the POST_ANTELOPE field */
 #define FSP_FLAGS_GET_POST_ANTELOPE(flags)			\
@@ -107,6 +118,10 @@ to the two Barracuda row formats COMPRESSED and DYNAMIC. */
 #define FSP_FLAGS_GET_PAGE_SSIZE(flags)				\
 		((flags & FSP_FLAGS_MASK_PAGE_SSIZE)		\
 		>> FSP_FLAGS_POS_PAGE_SSIZE)
+/** Return the value of the DATA_DIR field */
+#define FSP_FLAGS_HAS_DATA_DIR(flags)				\
+		((flags & FSP_FLAGS_MASK_DATA_DIR)		\
+		>> FSP_FLAGS_POS_DATA_DIR)
 /** Return the contents of the UNUSED bits */
 #define FSP_FLAGS_GET_UNUSED(flags)				\
 		(flags >> FSP_FLAGS_POS_UNUSED)
@@ -555,6 +570,17 @@ fseg_free_page(
 	ulint		page,	/*!< in: page offset */
 	mtr_t*		mtr);	/*!< in/out: mini-transaction */
 /**********************************************************************//**
+Checks if a single page of a segment is free.
+@return	true if free */
+UNIV_INTERN
+bool
+fseg_page_is_free(
+/*==============*/
+	fseg_header_t*	seg_header,	/*!< in: segment header */
+	ulint		space,		/*!< in: space id */
+	ulint		page)		/*!< in: page offset */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************************//**
 Frees part of a segment. This function can be used to free a segment
 by repeatedly calling this function in different mini-transactions.
 Doing the freeing in a single mini-transaction might result in
@@ -643,12 +669,13 @@ tablespace header at offset FSP_SPACE_FLAGS.  They should be 0 for
 ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT. The newer row formats,
 COMPRESSED and DYNAMIC, use a file format > Antelope so they should
 have a file format number plus the DICT_TF_COMPACT bit set.
-@return	ulint containing the validated tablespace flags. */
+@return	true if check ok */
 UNIV_INLINE
-ulint
-fsp_flags_validate(
+bool
+fsp_flags_is_valid(
 /*===============*/
-	ulint	flags);		/*!< in: tablespace flags */
+	ulint	flags)		/*!< in: tablespace flags */
+	__attribute__((warn_unused_result, const));
 /********************************************************************//**
 Determine if the tablespace is compressed from dict_table_t::flags.
 @return	TRUE if compressed, FALSE if not compressed */
@@ -658,6 +685,40 @@ fsp_flags_is_compressed(
 /*====================*/
 	ulint	flags);	/*!< in: tablespace flags */
 
+/********************************************************************//**
+Calculates the descriptor index within a descriptor page.
+@return	descriptor index */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_index(
+/*=======================*/
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	offset);	/*!< in: page offset */
+
+/**********************************************************************//**
+Gets a descriptor bit of a page.
+@return	TRUE if free */
+UNIV_INLINE
+ibool
+xdes_get_bit(
+/*=========*/
+	const xdes_t*	descr,	/*!< in: descriptor */
+	ulint		bit,	/*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+	ulint		offset);/*!< in: page offset within extent:
+				0 ... FSP_EXTENT_SIZE - 1 */
+
+/********************************************************************//**
+Calculates the page where the descriptor of a page resides.
+@return	descriptor page offset */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_page(
+/*======================*/
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	offset);	/*!< in: page offset */
+
 #endif /* !UNIV_INNOCHECKSUM */
 
 /********************************************************************//**
@@ -669,7 +730,7 @@ UNIV_INLINE
 ulint
 fsp_flags_get_zip_size(
 /*====================*/
-	ulint	flags);	/*!< in: tablespace flags */
+	ulint	flags);		/*!< in: tablespace flags */
 /********************************************************************//**
 Extract the page size from tablespace flags.
 @return	page size of the tablespace in bytes */
@@ -677,16 +738,7 @@ UNIV_INLINE
 ulint
 fsp_flags_get_page_size(
 /*====================*/
-	ulint	flags);	/*!< in: tablespace flags */
-
-/********************************************************************//**
-Set page size */
-UNIV_INLINE
-ulint
-fsp_flags_set_page_size(
-/*====================*/
-	ulint	flags,		/*!< in: tablespace flags */
-	ulint	page_size);	/*!< in: page size in bytes */
+	ulint	flags);		/*!< in: tablespace flags */
 
 #ifndef UNIV_NONINL
 #include "fsp0fsp.ic"
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
index 498f9000888..0d81e817cc9 100644
--- a/storage/innobase/include/fsp0fsp.ic
+++ b/storage/innobase/include/fsp0fsp.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -51,11 +51,10 @@ tablespace header at offset FSP_SPACE_FLAGS.  They should be 0 for
 ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT. The newer row formats,
 COMPRESSED and DYNAMIC, use a file format > Antelope so they should
 have a file format number plus the DICT_TF_COMPACT bit set.
-@return	Same as input after validating it as FSP_SPACE_FLAGS.
-If there is an error, trigger assertion failure. */
+@return	true if check ok */
 UNIV_INLINE
-ulint
-fsp_flags_validate(
+bool
+fsp_flags_is_valid(
 /*===============*/
 	ulint	flags)		/*!< in: tablespace flags */
 {
@@ -65,16 +64,20 @@ fsp_flags_validate(
 	ulint	page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
 	ulint	unused = FSP_FLAGS_GET_UNUSED(flags);
 
-	/* Make sure there are no bits that we do not know about. */
-	ut_a(unused == 0);
+	DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false););
 
 	/* fsp_flags is zero unless atomic_blobs is set. */
-	ut_a(flags != 1);
-	if (post_antelope) {
+	/* Make sure there are no bits that we do not know about. */
+	if (unused != 0 || flags == 1) {
+		return(false);
+	} else if (post_antelope) {
 		/* The Antelope row formats REDUNDANT and COMPACT did
 		not use tablespace flags, so this flag and the entire
 		4-byte field is zero for Antelope row formats. */
-		ut_a(atomic_blobs);
+
+		if (!atomic_blobs) {
+			return(false);
+		}
 	}
 
 	if (!atomic_blobs) {
@@ -82,27 +85,33 @@ fsp_flags_validate(
 		the page structure introduced for the COMPACT row format
 		by allowing long fields to be broken into prefix and
 		externally stored parts. */
-		ut_a(!post_antelope);
-		ut_a(zip_ssize == 0);
-	} else {
-		ut_a(post_antelope);
 
-		/* Validate the zip shift size is within allowed range. */
-		ut_a(zip_ssize <= PAGE_ZIP_SSIZE_MAX);
-	}
+		if (post_antelope || zip_ssize != 0) {
+			return(false);
+		}
+
+	} else if (!post_antelope || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+		return(false);
+	} else if (page_ssize > UNIV_PAGE_SSIZE_MAX) {
+
+		/* The page size field can be used for any row type, or it may
+		be zero for an original 16k page size.
+		Validate the page shift size is within allowed range. */
+
+		return(false);
 
-	/* The page size field can be used for any row type, or it may
-	be zero for an original 16k page size.
-	Validate the page shift size is within allowed range. */
-	ut_a(page_ssize <= UNIV_PAGE_SSIZE_MAX);
-	ut_a((UNIV_PAGE_SIZE == UNIV_PAGE_SIZE_ORIG) || (page_ssize));
+	} else if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_ORIG && !page_ssize) {
+		return(false);
+	}
 
 #if UNIV_FORMAT_MAX != UNIV_FORMAT_B
 # error "UNIV_FORMAT_MAX != UNIV_FORMAT_B, Add more validations."
 #endif
 
-	/* Return the flags sent in if we did not fail an assert. */
-	return(flags);
+	/* The DATA_DIR field can be used for any row type so there is
+	nothing here to validate. */
+
+	return(true);
 }
 
 /********************************************************************//**
@@ -208,9 +217,98 @@ fsp_flags_set_page_size(
 
 	flags = FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize);
 
-	ut_ad(flags == fsp_flags_validate(flags));
+	ut_ad(fsp_flags_is_valid(flags));
 
 	return(flags);
 }
 
+/********************************************************************//**
+Calculates the descriptor index within a descriptor page.
+@return	descriptor index */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_index(
+/*=======================*/
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	offset)		/*!< in: page offset */
+{
+	ut_ad(ut_is_2pow(zip_size));
+
+	if (zip_size == 0) {
+		return(ut_2pow_remainder(offset, UNIV_PAGE_SIZE)
+		       / FSP_EXTENT_SIZE);
+	} else {
+		return(ut_2pow_remainder(offset, zip_size) / FSP_EXTENT_SIZE);
+	}
+}
+
+/**********************************************************************//**
+Gets a descriptor bit of a page.
+@return	TRUE if free */
+UNIV_INLINE
+ibool
+xdes_get_bit(
+/*=========*/
+	const xdes_t*	descr,	/*!< in: descriptor */
+	ulint		bit,	/*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+	ulint		offset)	/*!< in: page offset within extent:
+				0 ... FSP_EXTENT_SIZE - 1 */
+{
+	ut_ad(offset < FSP_EXTENT_SIZE);
+	ut_ad(bit == XDES_FREE_BIT || bit == XDES_CLEAN_BIT);
+
+	ulint	index = bit + XDES_BITS_PER_PAGE * offset;
+
+	ulint	bit_index = index % 8;
+	ulint	byte_index = index / 8;
+
+	return(ut_bit_get_nth(
+			mach_read_ulint(descr + XDES_BITMAP + byte_index,
+					MLOG_1BYTE),
+			bit_index));
+}
+
+/********************************************************************//**
+Calculates the page where the descriptor of a page resides.
+@return	descriptor page offset */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_page(
+/*======================*/
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	offset)		/*!< in: page offset */
+{
+#ifndef DOXYGEN /* Doxygen gets confused by these */
+# if UNIV_PAGE_SIZE_MAX <= XDES_ARR_OFFSET				\
+			   + (UNIV_PAGE_SIZE_MAX / FSP_EXTENT_SIZE_MAX)	\
+			   * XDES_SIZE_MAX
+#  error
+# endif
+# if UNIV_ZIP_SIZE_MIN <= XDES_ARR_OFFSET				\
+			  + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE_MIN)	\
+			  * XDES_SIZE_MIN
+#  error
+# endif
+#endif /* !DOXYGEN */
+
+	ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET
+	      + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE)
+	      * XDES_SIZE);
+	ut_ad(UNIV_ZIP_SIZE_MIN > XDES_ARR_OFFSET
+	      + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE)
+	      * XDES_SIZE);
+
+	ut_ad(ut_is_2pow(zip_size));
+
+	if (zip_size == 0) {
+		return(ut_2pow_round(offset, UNIV_PAGE_SIZE));
+	} else {
+		ut_ad(zip_size > XDES_ARR_OFFSET
+		      + (zip_size / FSP_EXTENT_SIZE) * XDES_SIZE);
+		return(ut_2pow_round(offset, zip_size));
+	}
+}
+
 #endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/fts0ast.h b/storage/innobase/include/fts0ast.h
index da40e2bbc96..7f2525dc450 100644
--- a/storage/innobase/include/fts0ast.h
+++ b/storage/innobase/include/fts0ast.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,7 @@ Created 2007/03/16/03 Sunny Bains
 #include "mem0mem.h"
 
 /* The type of AST Node */
-enum fts_ast_type_enum {
+enum fts_ast_type_t {
 	FTS_AST_OPER,				/*!< Operator */
 	FTS_AST_NUMB,				/*!< Number */
 	FTS_AST_TERM,				/*!< Term (or word) */
@@ -39,7 +39,7 @@ enum fts_ast_type_enum {
 };
 
 /* The FTS query operators that we support */
-enum fts_ast_oper_enum {
+enum fts_ast_oper_t {
 	FTS_NONE,				/*!< No operator */
 
 	FTS_IGNORE,				/*!< Ignore rows that contain
@@ -58,20 +58,18 @@ enum fts_ast_oper_enum {
 	FTS_DECR_RATING,			/*!< Decrease the rank for this
 						word*/
 
-	FTS_DISTANCE				/*!< Proximity distance */
+	FTS_DISTANCE,				/*!< Proximity distance */
+	FTS_IGNORE_SKIP				/*!< Transient node operator
+						signifies that this is a
+						FTS_IGNORE node, and ignored in
+						the first pass of
+						fts_ast_visit() */
 };
 
-/* Enum types used by the FTS parser */
-typedef enum fts_ast_type_enum fts_ast_type_t;
-typedef enum fts_ast_oper_enum fts_ast_oper_t;
-
 /* Data types used by the FTS parser */
-typedef struct fts_lexer_struct fts_lexer_t;
-typedef struct fts_ast_text_struct fts_ast_text_t;
-typedef struct fts_ast_term_struct fts_ast_term_t;
-typedef struct fts_ast_node_struct fts_ast_node_t;
-typedef struct fts_ast_list_struct fts_ast_list_t;
-typedef struct fts_ast_state_struct fts_ast_state_t;
+struct fts_lexer_t;
+struct fts_ast_node_t;
+struct fts_ast_state_t;
 
 typedef ulint (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);
 
@@ -180,60 +178,76 @@ fts_ast_state_free(
 /*===============*/
 	fts_ast_state_t*state);			/*!< in: state instance
 						to free */
-/********************************************************************
-Traverse the AST.*/
-ulint
+/******************************************************************//**
+Traverse the AST - in-order traversal.
+@return DB_SUCCESS if all went well */
+UNIV_INTERN
+dberr_t
 fts_ast_visit(
 /*==========*/
 	fts_ast_oper_t		oper,		/*!< in: FTS operator */
 	fts_ast_node_t*		node,		/*!< in: instance to traverse*/
 	fts_ast_callback	visitor,	/*!< in: callback */
-	void*			arg);		/*!< in: callback arg */
-/********************************************************************
-Traverse the sub expression list.*/
-ulint
+	void*			arg,		/*!< in: callback arg */
+	bool*			has_ignore)	/*!< out: whether we encounter
+						and ignored processing an
+						operator, currently we only
+						ignore FTS_IGNORE operator */
+	__attribute__((nonnull, warn_unused_result));
+/*****************************************************************//**
+Process (nested) sub-expression, create a new result set to store the
+sub-expression result by processing nodes under current sub-expression
+list. Merge the sub-expression result with that of parent expression list.
+@return DB_SUCCESS if all went well */
+UNIV_INTERN
+dberr_t
 fts_ast_visit_sub_exp(
-/*==========*/
+/*==================*/
 	fts_ast_node_t*		node,		/*!< in: instance to traverse*/
 	fts_ast_callback	visitor,	/*!< in: callback */
-	void*			arg);		/*!< in: callback arg */
+	void*			arg)		/*!< in: callback arg */
+	__attribute__((nonnull, warn_unused_result));
 /********************************************************************
 Create a lex instance.*/
+UNIV_INTERN
 fts_lexer_t*
 fts_lexer_create(
 /*=============*/
 	ibool		boolean_mode,		/*!< in: query type */
 	const byte*	query,			/*!< in: query string */
-	ulint		query_len);		/*!< in: query string len */
+	ulint		query_len)		/*!< in: query string len */
+	__attribute__((nonnull, malloc, warn_unused_result));
 /********************************************************************
 Free an fts_lexer_t instance.*/
+UNIV_INTERN
 void
 fts_lexer_free(
 /*===========*/
-	fts_lexer_t*	fts_lexer);		/*!< in: lexer instance to
+	fts_lexer_t*	fts_lexer)		/*!< in: lexer instance to
 						free */
+	__attribute__((nonnull));
 
 /* Query term type */
-struct fts_ast_term_struct {
+struct fts_ast_term_t {
 	byte*		ptr;			/*!< Pointer to term string.*/
 	ibool		wildcard;		/*!< TRUE if wild card set.*/
 };
 
 /* Query text type */
-struct fts_ast_text_struct {
+struct fts_ast_text_t {
 	byte*		ptr;			/*!< Pointer to term string.*/
 	ulint		distance;		/*!< > 0 if proximity distance
 						set */
 };
 
 /* The list of nodes in an expr list */
-struct fts_ast_list_struct {
+struct fts_ast_list_t {
 	fts_ast_node_t*	head;			/*!< Children list head */
 	fts_ast_node_t*	tail;			/*!< Children list tail */
 };
 
 /* FTS AST node to store the term, text, operator and sub-expressions.*/
-struct fts_ast_node_struct {
+struct fts_ast_node_t {
 	fts_ast_type_t	type;			/*!< The type of node */
 	fts_ast_text_t	text;			/*!< Text node */
 	fts_ast_term_t	term;			/*!< Term node */
@@ -241,10 +255,12 @@ struct fts_ast_node_struct {
 	fts_ast_list_t	list;			/*!< Expression list */
 	fts_ast_node_t*	next;			/*!< Link for expr list */
 	fts_ast_node_t*	next_alloc;		/*!< For tracking allocations */
+	bool		visited;		/*!< whether this node is
+						already processed */
 };
 
 /* To track state during parsing */
-struct fts_ast_state_struct {
+struct fts_ast_state_t {
 	mem_heap_t*	heap;			/*!< Heap to use for alloc */
 	fts_ast_node_t*	root;			/*!< If all goes OK, then this
 						will point to the root.*/
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index e515772bdbd..f2f8617012a 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -114,16 +114,16 @@ to mark invalid states.
 
 NOTE: Do not change the order or value of these, fts_trx_row_get_new_state
 depends on them being exactly as they are. */
-typedef enum {
+enum fts_row_state {
 	FTS_INSERT = 0,
 	FTS_MODIFY,
 	FTS_DELETE,
 	FTS_NOTHING,
 	FTS_INVALID
-} fts_row_state;
+};
 
 /** The FTS table types. */
-enum fts_table_type_enum {
+enum fts_table_type_t {
 	FTS_INDEX_TABLE,		/*!< FTS auxiliary table that is
 					specific to a particular FTS index
 					on a table */
@@ -132,21 +132,11 @@ enum fts_table_type_enum {
 					for all FTS index on a table */
 };
 
-typedef struct fts_struct fts_t;
-typedef struct fts_doc_struct fts_doc_t;
-typedef struct fts_trx_struct fts_trx_t;
-typedef struct fts_table_struct fts_table_t;
-typedef struct fts_cache_struct fts_cache_t;
-typedef struct fts_token_struct fts_token_t;
-typedef struct fts_string_struct fts_string_t;
-typedef	struct fts_result_struct fts_result_t;
-typedef struct fts_ranking_struct fts_ranking_t;
-typedef struct fts_trx_row_struct fts_trx_row_t;
-typedef struct fts_doc_ids_struct fts_doc_ids_t;
-typedef enum fts_table_type_enum fts_table_type_t;
-typedef struct fts_trx_table_struct fts_trx_table_t;
-typedef	struct fts_savepoint_struct fts_savepoint_t;
-typedef struct fts_index_cache_struct fts_index_cache_t;
+struct fts_doc_t;
+struct fts_cache_t;
+struct fts_token_t;
+struct fts_doc_ids_t;
+struct fts_index_cache_t;
 
 
 /** Initialize the "fts_table" for internal query into FTS auxiliary
@@ -172,7 +162,7 @@ do {								\
 
 /** Information about changes in a single transaction affecting
 the FTS system. */
-struct fts_trx_struct {
+struct fts_trx_t {
 	trx_t*		trx;		/*!< InnoDB transaction */
 
 	ib_vector_t*	savepoints;	/*!< Active savepoints, must have at
@@ -184,7 +174,7 @@ struct fts_trx_struct {
 };
 
 /** Information required for transaction savepoint handling. */
-struct fts_savepoint_struct {
+struct fts_savepoint_t {
 	char*		name;		/*!< First entry is always NULL, the
 					default instance. Otherwise the name
 					of the savepoint */
@@ -193,7 +183,7 @@ struct fts_savepoint_struct {
 };
 
 /** Information about changed rows in a transaction for a single table. */
-struct fts_trx_table_struct {
+struct fts_trx_table_t {
 	dict_table_t*	table;		/*!< table */
 
 	fts_trx_t*	fts_trx;	/*!< link to parent */
@@ -209,7 +199,7 @@ struct fts_trx_table_struct {
 };
 
 /** Information about one changed row in a transaction. */
-struct fts_trx_row_struct {
+struct fts_trx_row_t {
 	doc_id_t	doc_id;		/*!< Id of the ins/upd/del document */
 
 	fts_row_state	state;		/*!< state of the row */
@@ -220,7 +210,7 @@ struct fts_trx_row_struct {
 /** List of document ids that were added during a transaction. This
 list is passed on to a background 'Add' thread and OPTIMIZE, so it
 needs its own memory heap. */
-struct fts_doc_ids_struct {
+struct fts_doc_ids_t {
 	ib_vector_t*	doc_ids;	/*!< document ids (each element is
 					of type doc_id_t). */
 
@@ -237,7 +227,7 @@ as our in-memory format. This typedef is a single such character. */
 typedef unsigned short ib_uc_t;
 
 /** An UTF-16 ro UTF-8 string. */
-struct fts_string_struct {
+struct fts_string_t {
 	byte*		f_str;		/*!< string, not necessary terminated in
 					any way */
 	ulint		f_len;		/*!< Length of the string in bytes */
@@ -245,7 +235,7 @@ struct fts_string_struct {
 };
 
 /** Query ranked doc ids. */
-struct fts_ranking_struct {
+struct fts_ranking_t {
 	doc_id_t	doc_id;		/*!< Document id */
 
 	fts_rank_t	rank;		/*!< Rank is between 0 .. 1 */
@@ -256,7 +246,7 @@ struct fts_ranking_struct {
 };
 
 /** Query result. */
-struct fts_result_struct {
+struct fts_result_t {
 	ib_rbt_node_t*	current;	/*!< Current element */
 
 	ib_rbt_t*	rankings_by_id;	/*!< RB tree of type fts_ranking_t
@@ -268,7 +258,7 @@ struct fts_result_struct {
 /** This is used to generate the FTS auxiliary table name, we need the
 table id and the index id to generate the column specific FTS auxiliary
 table name. */
-struct fts_table_struct {
+struct fts_table_t {
 	const char*	parent;		/*!< Parent table name, this is
 					required only for the database
 					name */
@@ -311,10 +301,10 @@ enum	fts_status {
 typedef	enum fts_status	fts_status_t;
 
 /** The state of the FTS sub system. */
-struct fts_struct {
+struct fts_t {
 					/*!< mutex protecting bg_threads* and
 					fts_add_wq. */
-	mutex_t		bg_threads_mutex;
+	ib_mutex_t		bg_threads_mutex;
 
 	ulint		bg_threads;	/*!< number of background threads
 					accessing this table */
@@ -339,10 +329,10 @@ struct fts_struct {
 
 	ib_vector_t*	indexes;	/*!< Vector of FTS indexes, this is
 					mainly for caching purposes. */
-	mem_heap_t*	fts_heap;	/*!< heap for fts_struct allocation */
+	mem_heap_t*	fts_heap;	/*!< heap for fts_t allocation */
 };
 
-typedef struct fts_stopword_struct	fts_stopword_t;
+struct fts_stopword_t;
 
 /** status bits for fts_stopword_t status field. */
 #define STOPWORD_NOT_INIT               0x1
@@ -395,15 +385,15 @@ fts_cache_index_cache_create(
 
 /******************************************************************//**
 Get the next available document id. This function creates a new
-transaction to generate the document id. */
+transaction to generate the document id.
+@return DB_SUCCESS if OK */
 UNIV_INTERN
-ulint
+dberr_t
 fts_get_next_doc_id(
 /*================*/
-						/*!< out: DB_SUCCESS if OK */
-	const dict_table_t*	table,		/*!< in: table */
-	doc_id_t*		doc_id);	/*!< out: new document id */
-
+	const dict_table_t*	table,	/*!< in: table */
+	doc_id_t*		doc_id)	/*!< out: new document id */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Update the next and last Doc ID in the CONFIG table to be the input
 "doc_id" value (+ 1). We would do so after each FTS index build or
@@ -412,28 +402,17 @@ UNIV_INTERN
 void
 fts_update_next_doc_id(
 /*===================*/
+	trx_t*			trx,		/*!< in/out: transaction */
 	const dict_table_t*	table,		/*!< in: table */
-	const char*		table_name,	/*!< in: table name */
-	doc_id_t		doc_id);	/*!< in: DOC ID to set */
-
-/******************************************************************//**
-Update the last document id. This function could create a new
-transaction to update the last document id. */
-UNIV_INTERN
-ulint
-fts_update_sync_doc_id(
-/*===================*/
-						/*!< out: DB_SUCCESS if OK */
-	const dict_table_t*	table,		/*!< in: table */
-	const char*		table_name,	/*!< in: table name */
-	doc_id_t		doc_id,		/*!< in: last document id */
-	trx_t*			trx);		/*!< in: update trx */
+	const char*		table_name,	/*!< in: table name, or NULL */
+	doc_id_t		doc_id)		/*!< in: DOC ID to set */
+	__attribute__((nonnull(2)));
 
 /******************************************************************//**
 Create a new document id .
 @return DB_SUCCESS if all went well else error */
 UNIV_INTERN
-ulint
+dberr_t
 fts_create_doc_id(
 /*==============*/
 	dict_table_t*	table,			/*!< in: row is of this
@@ -442,8 +421,8 @@ fts_create_doc_id(
 						value to this row. This is the
 						current row that is being
 						inserted. */
-	mem_heap_t*	heap);			/*!< in: heap */
-
+	mem_heap_t*	heap)			/*!< in: heap */
+	__attribute__((nonnull));
 /******************************************************************//**
 Create a new fts_doc_ids_t.
 @return new fts_doc_ids_t. */
@@ -488,7 +467,7 @@ on the given table. row_mysql_lock_data_dictionary must have been
 called before this.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_create_common_tables(
 /*=====================*/
 	trx_t*		trx,			/*!< in: transaction handle */
@@ -496,27 +475,27 @@ fts_create_common_tables(
 			table,			/*!< in: table with one FTS
 						index */
 	const char*	name,			/*!< in: table name */
-	ibool		skip_doc_id_index);
-						/*!< in: Skip index on doc id */
+	bool		skip_doc_id_index)	/*!< in: Skip index on doc id */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Wrapper function of fts_create_index_tables_low(), create auxiliary
 tables for an FTS index
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_create_index_tables(
 /*====================*/
 	trx_t*			trx,		/*!< in: transaction handle */
-	const dict_index_t*	index);		/*!< in: the FTS index
+	const dict_index_t*	index)		/*!< in: the FTS index
 						instance */
-
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Creates the column specific ancillary tables needed for supporting an
 FTS index on the given table. row_mysql_lock_data_dictionary must have
 been called before this.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_create_index_tables_low(
 /*========================*/
 	trx_t*		trx,			/*!< in: transaction handle */
@@ -524,16 +503,17 @@ fts_create_index_tables_low(
 			index,			/*!< in: the FTS index
 						instance */
 	const char*	table_name,		/*!< in: the table name */
-	table_id_t	table_id);		/*!< in: the table id */
-
+	table_id_t	table_id)		/*!< in: the table id */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Add the FTS document id hidden column. */
 UNIV_INTERN
 void
 fts_add_doc_id_column(
 /*==================*/
-	dict_table_t*	table);			/*!< in/out: Table with
-						FTS index */
+	dict_table_t*	table,	/*!< in/out: Table with FTS index */
+	mem_heap_t*	heap)	/*!< in: temporary memory heap, or NULL */
+	__attribute__((nonnull(1)));
 
 /*********************************************************************//**
 Drops the ancillary tables needed for supporting an FTS index on the
@@ -541,28 +521,29 @@ given table. row_mysql_lock_data_dictionary must have been called before
 this.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_drop_tables(
 /*============*/
 	trx_t*		trx,			/*!< in: transaction */
-	dict_table_t*	table);			/*!< in: table has the FTS
+	dict_table_t*	table)			/*!< in: table has the FTS
 						index */
-
+	__attribute__((nonnull));
 /******************************************************************//**
 The given transaction is about to be committed; do whatever is necessary
 from the FTS system's POV.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_commit(
 /*=======*/
-	trx_t*		trx);			/*!< in: transaction */
+	trx_t*		trx)			/*!< in: transaction */
+	__attribute__((nonnull, warn_unused_result));
 
 /*******************************************************************//**
 FTS Query entry point.
 @return DB_SUCCESS if successful otherwise error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_query(
 /*======*/
 	trx_t*		trx,			/*!< in: transaction */
@@ -571,8 +552,9 @@ fts_query(
 	const byte*	query,			/*!< in: FTS query */
 	ulint		query_len,		/*!< in: FTS query string len
 						in bytes */
-	fts_result_t**	result);		/*!< out: query result, to be
+	fts_result_t**	result)			/*!< out: query result, to be
 						freed by the caller.*/
+	__attribute__((nonnull, warn_unused_result));
 
 /******************************************************************//**
 Retrieve the FTS Relevance Ranking result for doc with doc_id
@@ -686,10 +668,11 @@ fts_free(
 Run OPTIMIZE on the given table.
 @return DB_SUCCESS if all OK */
 UNIV_INTERN
-ulint
+dberr_t
 fts_optimize_table(
 /*===============*/
-	dict_table_t*	table);			/*!< in: table to optimiza */
+	dict_table_t*	table)			/*!< in: table to optimiza */
+	__attribute__((nonnull));
 
 /**********************************************************************//**
 Startup the optimize thread and create the work queue. */
@@ -710,11 +693,12 @@ fts_optimize_is_init(void);
 Drops index ancillary tables for a FTS index
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_drop_index_tables(
 /*==================*/
 	trx_t*		trx,			/*!< in: transaction */
-	dict_index_t*	index);			/*!< in: Index to drop */
+	dict_index_t*	index)			/*!< in: Index to drop */
+	__attribute__((nonnull, warn_unused_result));
 
 /******************************************************************//**
 Remove the table from the OPTIMIZER's list. We do wait for
@@ -740,24 +724,22 @@ fts_optimize_end(void);
 /*===================*/
 
 /**********************************************************************//**
-Take a FTS savepoint.
-@return DB_SUCCESS or error code */
+Take a FTS savepoint. */
 UNIV_INTERN
 void
 fts_savepoint_take(
 /*===============*/
 	trx_t*		trx,			/*!< in: transaction */
-	const char*	name);			/*!< in: savepoint name */
-
+	const char*	name)			/*!< in: savepoint name */
+	__attribute__((nonnull));
 /**********************************************************************//**
-Refresh last statement savepoint.
-@return DB_SUCCESS or error code */
+Refresh last statement savepoint. */
 UNIV_INTERN
 void
 fts_savepoint_laststmt_refresh(
 /*===========================*/
-	trx_t*		trx);			/*!< in: transaction */
-
+	trx_t*		trx)			/*!< in: transaction */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Release the savepoint data identified by  name. */
 UNIV_INTERN
@@ -821,26 +803,26 @@ fts_drop_orphaned_tables(void);
 /*==========================*/
 
 /******************************************************************//**
-Since we do a horizontal split on the index table, we need to drop the
-all the split tables. */
+Since we do a horizontal split on the index table, we need to drop
+all the split tables.
+@return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_drop_index_split_tables(
 /*========================*/
-						/*!< out: DB_SUCCESS
-						or error code */
 	trx_t*		trx,			/*!< in: transaction */
-	dict_index_t*	index);			/*!< in: fts instance */
+	dict_index_t*	index)			/*!< in: fts instance */
+	__attribute__((nonnull, warn_unused_result));
 
 /****************************************************************//**
 Run SYNC on the table, i.e., write out data from the cache to the
-FTS auxiliary INDEX table and clear the cache at the end.
-@return DB_SUCCESS if all OK */
+FTS auxiliary INDEX table and clear the cache at the end. */
 UNIV_INTERN
-ulint
+void
 fts_sync_table(
 /*===========*/
-	dict_table_t*	table);			/*!< in: table */
+	dict_table_t*	table)			/*!< in: table */
+	__attribute__((nonnull));
 
 /****************************************************************//**
 Free the query graph but check whether dict_sys->mutex is already
@@ -978,9 +960,9 @@ fts_get_docs_create(
 
 /****************************************************************//**
 Read the rows from the FTS index
-@return vector of rows fetched */
+@return DB_SUCCESS if OK */
 UNIV_INTERN
-ulint
+dberr_t
 fts_table_fetch_doc_ids(
 /*====================*/
 	trx_t*		trx,			/*!< in: transaction */
@@ -1011,12 +993,13 @@ fts_add_index(
 Drop auxiliary tables related to an FTS index
 @return DB_SUCCESS or error number */
 UNIV_INTERN
-ulint
+dberr_t
 fts_drop_index(
 /*===========*/
 	dict_table_t*	table,	/*!< in: Table where indexes are dropped */
 	dict_index_t*	index,	/*!< in: Index to be dropped */
-	trx_t*		trx);	/*!< in: Transaction for the drop */
+	trx_t*		trx)	/*!< in: Transaction for the drop */
+	__attribute__((nonnull));
 
 /*******************************************************************//**
 Check indexes in the fts->indexes is also present in index cache and
diff --git a/storage/innobase/include/fts0priv.h b/storage/innobase/include/fts0priv.h
index 8524f988e47..c6aca27f6ec 100644
--- a/storage/innobase/include/fts0priv.h
+++ b/storage/innobase/include/fts0priv.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -113,23 +113,25 @@ component.
 
 /******************************************************************//**
 Parse an SQL string. %s is replaced with the table's id.
-@return DB_SUCCESS or error code */
+@return query graph */
 UNIV_INTERN
 que_t*
 fts_parse_sql(
 /*==========*/
 	fts_table_t*	fts_table,	/*!< in: FTS aux table */
 	pars_info_t*	info,		/*!< in: info struct, or NULL */
-	const char*	sql);		/*!< in: SQL string to evaluate */
+	const char*	sql)		/*!< in: SQL string to evaluate */
+	__attribute__((nonnull(3), malloc, warn_unused_result));
 /******************************************************************//**
 Evaluate a parsed SQL statement
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_eval_sql(
 /*=========*/
 	trx_t*		trx,		/*!< in: transaction */
-	que_t*		graph);		/*!< in: Parsed statement */
+	que_t*		graph)		/*!< in: Parsed statement */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Construct the name of an ancillary FTS table for the given table.
 @return own: table name, must be freed with mem_free() */
@@ -138,7 +140,8 @@ char*
 fts_get_table_name(
 /*===============*/
 	const fts_table_t*
-			fts_table);	/*!< in: FTS aux table info */
+			fts_table)	/*!< in: FTS aux table info */
+	__attribute__((nonnull, malloc, warn_unused_result));
 /******************************************************************//**
 Construct the column specification part of the SQL string for selecting the
 indexed FTS columns for the given table. Adds the necessary bound
@@ -160,7 +163,8 @@ fts_get_select_columns_str(
 /*=======================*/
 	dict_index_t*	index,		/*!< in: FTS index */
 	pars_info_t*	info,		/*!< in/out: parser info */
-	mem_heap_t*	heap);		/*!< in: memory heap */
+	mem_heap_t*	heap)		/*!< in: memory heap */
+	__attribute__((nonnull, warn_unused_result));
 
 /** define for fts_doc_fetch_by_doc_id() "option" value, defines whether
 we want to get Doc whose ID is equal to or greater or smaller than supplied
@@ -174,41 +178,45 @@ Fetch document (= a single row's indexed text) with the given
 document id.
 @return: DB_SUCCESS if fetch is successful, else error */
 UNIV_INTERN
-ulint
+dberr_t
 fts_doc_fetch_by_doc_id(
 /*====================*/
 	fts_get_doc_t*	get_doc,	/*!< in: state */
 	doc_id_t	doc_id,		/*!< in: id of document to fetch */
-	dict_index_t*	index_to_use,	/*!< in: caller supplied FTS index */
+	dict_index_t*	index_to_use,	/*!< in: caller supplied FTS index,
+					or NULL */
 	ulint		option,         /*!< in: search option, if it is
                                         greater than doc_id or equal */
 	fts_sql_callback
 			callback,	/*!< in: callback to read
 					records */
-	void*		arg);		/*!< in: callback arg */
+	void*		arg)		/*!< in: callback arg */
+	__attribute__((nonnull(6)));
 
 /*******************************************************************//**
 Callback function for fetch that stores the text of an FTS document,
 converting each column to UTF-16.
-@return: always returns NULL */
+@return always FALSE */
 UNIV_INTERN
 ibool
 fts_query_expansion_fetch_doc(
 /*==========================*/
 	void*		row,		/*!< in: sel_node_t* */
-	void*		user_arg);	/*!< in: fts_doc_t* */
+	void*		user_arg)	/*!< in: fts_doc_t* */
+	__attribute__((nonnull));
 /********************************************************************
 Write out a single word's data as new entry/entries in the INDEX table.
 @return DB_SUCCESS if all OK. */
 UNIV_INTERN
-ulint
+dberr_t
 fts_write_node(
 /*===========*/
 	trx_t*		trx,		/*!< in: transaction */
 	que_t**		graph,		/*!< in: query graph */
 	fts_table_t*	fts_table,	/*!< in: the FTS aux index */
 	fts_string_t*	word,		/*!< in: word in UTF-8 */
-	fts_node_t*	node);		/*!< in: node columns */
+	fts_node_t*	node)		/*!< in: node columns */
+	__attribute__((nonnull, warn_unused_result));
 /*******************************************************************//**
 Tokenize a document. */
 UNIV_INTERN
@@ -217,8 +225,10 @@ fts_tokenize_document(
 /*==================*/
 	fts_doc_t*	doc,		/*!< in/out: document to
 					tokenize */
-	fts_doc_t*	result);	/*!< out: if provided, save
+	fts_doc_t*	result)		/*!< out: if provided, save
 					result tokens here */
+	__attribute__((nonnull(1)));
+
 /*******************************************************************//**
 Continue to tokenize a document. */
 UNIV_INTERN
@@ -229,16 +239,18 @@ fts_tokenize_document_next(
 					tokenize */
 	ulint		add_pos,	/*!< in: add this position to all
 					tokens from this tokenization */
-	fts_doc_t*	result);	/*!< out: if provided, save
+	fts_doc_t*	result)		/*!< out: if provided, save
 					result tokens here */
+	__attribute__((nonnull(1)));
 /******************************************************************//**
-Create a new empty document.
-@return own: new document */
+Initialize a document. */
 UNIV_INTERN
-fts_doc_t*
+void
 fts_doc_init(
 /*=========*/
-	fts_doc_t*	doc);		/*!< in: doc to initialize */
+	fts_doc_t*	doc)		/*!< in: doc to initialize */
+	__attribute__((nonnull));
+
 /******************************************************************//**
 Do a binary search for a doc id in the array
 @return +ve index if found -ve index where it should be
@@ -250,26 +262,29 @@ fts_bsearch(
 	fts_update_t*	array,		/*!< in: array to sort */
 	int		lower,		/*!< in: lower bound of array*/
 	int		upper,		/*!< in: upper bound of array*/
-	doc_id_t	doc_id);	/*!< in: doc id to lookup */
+	doc_id_t	doc_id)		/*!< in: doc id to lookup */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Free document. */
 UNIV_INTERN
 void
 fts_doc_free(
 /*=========*/
-	fts_doc_t*	doc);		/*!< in: document */
+	fts_doc_t*	doc)		/*!< in: document */
+	__attribute__((nonnull));
 /******************************************************************//**
 Free fts_optimizer_word_t instanace.*/
-
+UNIV_INTERN
 void
 fts_word_free(
 /*==========*/
-	fts_word_t*	word);		/*!< in: instance to free.*/
+	fts_word_t*	word)		/*!< in: instance to free.*/
+	__attribute__((nonnull));
 /******************************************************************//**
 Read the rows from the FTS inde
-@return vector of rows fetched */
+@return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_index_fetch_nodes(
 /*==================*/
 	trx_t*		trx,		/*!< in: transaction */
@@ -277,7 +292,8 @@ fts_index_fetch_nodes(
 	fts_table_t*	fts_table,	/*!< in: FTS aux table */
 	const fts_string_t*
 			word,		/*!< in: the word to fetch */
-	fts_fetch_t*	fetch);		/*!< in: fetch callback.*/
+	fts_fetch_t*	fetch)		/*!< in: fetch callback.*/
+	__attribute__((nonnull));
 /******************************************************************//**
 Create a fts_optimizer_word_t instance.
 @return new instance */
@@ -287,7 +303,8 @@ fts_word_init(
 /*==========*/
 	fts_word_t*	word,		/*!< in: word to initialize */
 	byte*		utf8,		/*!< in: UTF-8 string */
-	ulint		len);		/*!< in: length of string in bytes */
+	ulint		len)		/*!< in: length of string in bytes */
+	__attribute__((nonnull));
 /******************************************************************//**
 Compare two fts_trx_table_t instances, we actually compare the
 table id's here.
@@ -297,7 +314,8 @@ int
 fts_trx_table_cmp(
 /*==============*/
 	const void*	v1,		/*!< in: id1 */
-	const void*	v2);		/*!< in: id2 */
+	const void*	v2)		/*!< in: id2 */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Compare a table id with a trx_table_t table id.
 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
@@ -306,23 +324,26 @@ int
 fts_trx_table_id_cmp(
 /*=================*/
 	const void*	p1,		/*!< in: id1 */
-	const void*	p2);		/*!< in: id2 */
+	const void*	p2)		/*!< in: id2 */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Commit a transaction.
 @return DB_SUCCESS if all OK */
 UNIV_INTERN
-ulint
+dberr_t
 fts_sql_commit(
 /*===========*/
-	trx_t*		trx);		/*!< in: transaction */
+	trx_t*		trx)		/*!< in: transaction */
+	__attribute__((nonnull));
 /******************************************************************//**
 Rollback a transaction.
 @return DB_SUCCESS if all OK */
 UNIV_INTERN
-ulint
+dberr_t
 fts_sql_rollback(
 /*=============*/
-	trx_t*		trx);		/*!< in: transaction */
+	trx_t*		trx)		/*!< in: transaction */
+	__attribute__((nonnull));
 /******************************************************************//**
 Parse an SQL string. %s is replaced with the table's id. Don't acquire
 the dict mutex
@@ -333,41 +354,44 @@ fts_parse_sql_no_dict_lock(
 /*=======================*/
 	fts_table_t*	fts_table,	/*!< in: table with FTS index */
 	pars_info_t*	info,		/*!< in: parser info */
-	const char*	sql);		/*!< in: SQL string to evaluate */
+	const char*	sql)		/*!< in: SQL string to evaluate */
+	__attribute__((nonnull(3), malloc, warn_unused_result));
 /******************************************************************//**
 Get value from config table. The caller must ensure that enough
 space is allocated for value to hold the column contents
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_get_value(
 /*=================*/
 	trx_t*		trx,		/* transaction */
 	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
 	const char*	name,		/*!< in: get config value for
 					this parameter name */
-	fts_string_t*	value);		/*!< out: value read from
+	fts_string_t*	value)		/*!< out: value read from
 					config table */
+	__attribute__((nonnull));
 /******************************************************************//**
 Get value specific to an FTS index from the config table. The caller
 must ensure that enough space is allocated for value to hold the
 column contents.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_get_index_value(
 /*=======================*/
 	trx_t*		trx,		/*!< transaction */
 	dict_index_t*	index,		/*!< in: index */
 	const char*	param,		/*!< in: get config value for
 					this parameter name */
-	fts_string_t*	value);		/*!< out: value read from
+	fts_string_t*	value)		/*!< out: value read from
 					config table */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Set the value in the config table for name.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_set_value(
 /*=================*/
 	trx_t*		trx,		/*!< transaction */
@@ -375,89 +399,96 @@ fts_config_set_value(
 	const char*	name,		/*!< in: get config value for
 					this parameter name */
 	const fts_string_t*
-			value);		/*!< in: value to update */
+			value)		/*!< in: value to update */
+	__attribute__((nonnull));
 /****************************************************************//**
 Set an ulint value in the config table.
 @return DB_SUCCESS if all OK else error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_set_ulint(
 /*=================*/
 	trx_t*		trx,		/*!< in: transaction */
 	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
 	const char*	name,		/*!< in: param name */
-	ulint		int_value);	/*!< in: value */
-
+	ulint		int_value)	/*!< in: value */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Set the value specific to an FTS index in the config table.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_set_index_value(
 /*=======================*/
 	trx_t*		trx,		/*!< transaction */
 	dict_index_t*	index,		/*!< in: index */
 	const char*	param,		/*!< in: get config value for
 					this parameter name */
-	fts_string_t*	value);		/*!< out: value read from
+	fts_string_t*	value)		/*!< out: value read from
 					config table */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Increment the value in the config table for column name.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_increment_value(
 /*=======================*/
 	trx_t*		trx,		/*!< transaction */
 	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
 	const char*	name,		/*!< in: increment config value
 					for this parameter name */
-	ulint		delta);		/*!< in: increment by this much */
+	ulint		delta)		/*!< in: increment by this much */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Increment the per index value in the config table for column name.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_increment_index_value(
 /*=============================*/
 	trx_t*		trx,		/*!< transaction */
 	dict_index_t*	index,		/*!< in: FTS index */
 	const char*	name,		/*!< in: increment config value
 					for this parameter name */
-	ulint		delta);		/*!< in: increment by this much */
+	ulint		delta)		/*!< in: increment by this much */
+	__attribute__((nonnull));
 /******************************************************************//**
 Get an ulint value from the config table.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_get_index_ulint(
 /*=======================*/
 	trx_t*		trx,		/*!< in: transaction */
 	dict_index_t*	index,		/*!< in: FTS index */
 	const char*	name,		/*!< in: param name */
-	ulint*		int_value);	/*!< out: value */
+	ulint*		int_value)	/*!< out: value */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Set an ulint value int the config table.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_set_index_ulint(
 /*=======================*/
 	trx_t*		trx,		/*!< in: transaction */
 	dict_index_t*	index,		/*!< in: FTS index */
 	const char*	name,		/*!< in: param name */
-	ulint		int_value);	/*!< in: value */
+	ulint		int_value)	/*!< in: value */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Get an ulint value from the config table.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_config_get_ulint(
 /*=================*/
 	trx_t*		trx,		/*!< in: transaction */
 	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
 	const char*	name,		/*!< in: param name */
-	ulint*		int_value);	/*!< out: value */
+	ulint*		int_value)	/*!< out: value */
+	__attribute__((nonnull));
 /******************************************************************//**
 Search cache for word.
 @return the word node vector if found else NULL */
@@ -468,7 +499,8 @@ fts_cache_find_word(
 	const fts_index_cache_t*
 			index_cache,	/*!< in: cache to search */
 	const fts_string_t*
-			text);		/*!< in: word to search for */
+			text)		/*!< in: word to search for */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Check cache for deleted doc id.
 @return TRUE if deleted */
@@ -478,7 +510,8 @@ fts_cache_is_deleted_doc_id(
 /*========================*/
 	const fts_cache_t*
 			cache,		/*!< in: cache ito search */
-	doc_id_t	doc_id);	/*!< in: doc id to search for */
+	doc_id_t	doc_id)		/*!< in: doc id to search for */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Append deleted doc ids to vector and sort the vector. */
 UNIV_INTERN
@@ -502,35 +535,31 @@ fts_wait_for_background_thread_to_start(
 	ulint		max_wait);	/*!< in: time in microseconds, if set
 					to 0 then it disables timeout
 					checking */
-/*********************************************************************//**
-Get the total number of documents in the FTS.
-@return estimated number of rows in the table */
-UNIV_INTERN
-ulint
-fts_get_total_document_count(
-/*=========================*/
-	dict_table_t*	table);		/*!< in: table instance */
+#ifdef FTS_DOC_STATS_DEBUG
 /******************************************************************//**
 Get the total number of words in the FTS for a particular FTS index.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_get_total_word_count(
 /*=====================*/
 	trx_t*		trx,		/*!< in: transaction */
 	dict_index_t*	index,		/*!< in: for this index */
-	ulint*		total);		/*!< out: total words */
+	ulint*		total)		/*!< out: total words */
+	__attribute__((nonnull, warn_unused_result));
+#endif
 /******************************************************************//**
 Search the index specific cache for a particular FTS index.
 @return the index specific cache else NULL */
 UNIV_INTERN
-const fts_index_cache_t*
+fts_index_cache_t*
 fts_find_index_cache(
 /*================*/
 	const fts_cache_t*
 			cache,		/*!< in: cache to search */
 	const dict_index_t*
-			index);		/*!< in: index to search for */
+			index)		/*!< in: index to search for */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Write the table id to the given buffer (including final NUL). Buffer must be
 at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
@@ -539,8 +568,9 @@ UNIV_INLINE
 int
 fts_write_object_id(
 /*================*/
-	ib_id_t	id,		/*!< in: a table/index id */
-	char*		str);		/*!< in: buffer to write the id to */
+	ib_id_t		id,		/*!< in: a table/index id */
+	char*		str)		/*!< in: buffer to write the id to */
+	__attribute__((nonnull));
 /******************************************************************//**
 Read the table id from the string generated by fts_write_object_id().
 @return TRUE if parse successful */
@@ -549,7 +579,8 @@ ibool
 fts_read_object_id(
 /*===============*/
 	ib_id_t*	id,		/*!< out: a table id */
-	const char*	str);		/*!< in: buffer to read from */
+	const char*	str)		/*!< in: buffer to read from */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Get the table id.
 @return number of bytes written */
@@ -559,23 +590,26 @@ fts_get_table_id(
 /*=============*/
 	const fts_table_t*
 			fts_table,	/*!< in: FTS Auxiliary table */
-	char*		table_id);	/*!< out: table id, must be at least
+	char*		table_id)	/*!< out: table id, must be at least
 					FTS_AUX_MIN_TABLE_ID_LENGTH bytes
 					long */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Add the table to add to the OPTIMIZER's list. */
 UNIV_INTERN
 void
 fts_optimize_add_table(
 /*===================*/
-	dict_table_t*	table);		/*!< in: table to add */
+	dict_table_t*	table)		/*!< in: table to add */
+	__attribute__((nonnull));
 /******************************************************************//**
 Optimize a table. */
 UNIV_INTERN
 void
 fts_optimize_do_table(
 /*==================*/
-	dict_table_t*	table);		/*!< in: table to optimize */
+	dict_table_t*	table)		/*!< in: table to optimize */
+	__attribute__((nonnull));
 /******************************************************************//**
 Construct the prefix name of an FTS table.
 @return own: table name, must be freed with mem_free() */
@@ -584,7 +618,8 @@ char*
 fts_get_table_name_prefix(
 /*======================*/
 	const fts_table_t*
-			fts_table);	/*!< in: Auxiliary table type */
+			fts_table)	/*!< in: Auxiliary table type */
+	__attribute__((nonnull, malloc, warn_unused_result));
 /******************************************************************//**
 Add node positions. */
 UNIV_INTERN
@@ -594,7 +629,8 @@ fts_cache_node_add_positions(
 	fts_cache_t*	cache,		/*!< in: cache */
 	fts_node_t*	node,		/*!< in: word node */
 	doc_id_t	doc_id,		/*!< in: doc id */
-	ib_vector_t*	positions);	/*!< in: fts_token_t::positions */
+	ib_vector_t*	positions)	/*!< in: fts_token_t::positions */
+	__attribute__((nonnull(2,4)));
 
 /******************************************************************//**
 Create the config table name for retrieving index specific value.
@@ -604,7 +640,8 @@ char*
 fts_config_create_index_param_name(
 /*===============================*/
 	const char*		param,		/*!< in: base name of param */
-	const dict_index_t*	index);		/*!< in: index for config */
+	const dict_index_t*	index)		/*!< in: index for config */
+	__attribute__((nonnull, malloc, warn_unused_result));
 
 #ifndef UNIV_NONINL
 #include "fts0priv.ic"
diff --git a/storage/innobase/include/fts0priv.ic b/storage/innobase/include/fts0priv.ic
index 716ea4713b5..268bb7e2227 100644
--- a/storage/innobase/include/fts0priv.ic
+++ b/storage/innobase/include/fts0priv.ic
@@ -31,15 +31,9 @@ UNIV_INLINE
 int
 fts_write_object_id(
 /*================*/
-	ib_id_t         id,		/* in: a table/index id */
+	ib_id_t		id,		/* in: a table/index id */
 	char*		str)		/* in: buffer to write the id to */
 {
-#ifdef __WIN__
-# define UINT64PFx	"%016I64u"
-#else
-# define UINT64PFx	"%016"PRIx64
-# endif /* __WIN__ */
-
         // FIXME: Use ut_snprintf()
 	return(sprintf(str, UINT64PFx, id));
 }
@@ -54,6 +48,45 @@ fts_read_object_id(
 	ib_id_t*	id,		/* out: an id */
 	const char*	str)		/* in: buffer to read from */
 {
-	return(sscanf(str, IB_ID_FMT, id) == 2);
+	return(sscanf(str, UINT64PFx, id) == 1);
+}
+
+/******************************************************************//**
+Compare two fts_trx_table_t instances.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2  */
+UNIV_INLINE
+int
+fts_trx_table_cmp(
+/*==============*/
+	const void*	p1,			/*!< in: id1 */
+	const void*	p2)			/*!< in: id2 */
+{
+	const dict_table_t* table1 = (*(const fts_trx_table_t**) p1)->table;
+	const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+
+	return((table1->id > table2->id)
+	       ? 1
+	       : (table1->id == table2->id)
+		  ? 0
+		  : -1);
 }
 
+/******************************************************************//**
+Compare a table id with a fts_trx_table_t table id.
+@return < 0 if n1 < n2, 0 if n1 == n2,> 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_table_id_cmp(
+/*=================*/
+	const void*	p1,			/*!< in: id1 */
+	const void*	p2)			/*!< in: id2 */
+{
+	const ullint* table_id = (const ullint*) p1;
+	const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+
+	return((*table_id > table2->id)
+	       ? 1
+	       : (*table_id == table2->id)
+		  ? 0
+		  : -1);
+}
diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h
index 5b28f2c9473..8fc52c9fc5e 100644
--- a/storage/innobase/include/fts0types.h
+++ b/storage/innobase/include/fts0types.h
@@ -32,41 +32,35 @@ Created 2007-03-27 Sunny Bains
 #include "ut0rbt.h"
 #include "fts0fts.h"
 
-/** Types (aliases) used within FTS. */
-typedef struct fts_que_struct fts_que_t;
-typedef struct fts_node_struct fts_node_t;
-typedef struct fts_word_struct fts_word_t;
-typedef struct fts_fetch_struct fts_fetch_t;
-typedef struct fts_update_struct fts_update_t;
-typedef struct fts_get_doc_struct fts_get_doc_t;
-typedef struct fts_utf8_str_struct fts_utf8_str_t;
-typedef struct fts_doc_stats_struct fts_doc_stats_t;
-typedef struct fts_tokenizer_word_struct fts_tokenizer_word_t;
-typedef struct fts_index_selector_struct fts_index_selector_t;
+/** Types used within FTS. */
+struct fts_que_t;
+struct fts_node_t;
+struct fts_utf8_str_t;
 
 /** Callbacks used within FTS. */
 typedef pars_user_func_cb_t fts_sql_callback;
 typedef void (*fts_filter)(void*, fts_node_t*, void*, ulint len);
 
 /** Statistics relevant to a particular document, used during retrieval. */
-struct fts_doc_stats_struct {
+struct fts_doc_stats_t {
 	doc_id_t	doc_id;		/*!< Document id */
 	ulint		word_count;	/*!< Total words in the document */
 };
 
 /** It's main purpose is to store the SQL prepared statements that
 are required to retrieve a document from the database. */
-struct fts_get_doc_struct {
+struct fts_get_doc_t {
 	fts_index_cache_t*
 			index_cache;	/*!< The index cache instance */
 
 					/*!< Parsed sql statement */
 	que_t*		get_document_graph;
+	fts_cache_t*	cache;		/*!< The parent cache */
 };
 
 /** Since we can have multiple FTS indexes on a table, we keep a
 per index cache of words etc. */
-struct fts_index_cache_struct {
+struct fts_index_cache_t {
 	dict_index_t*	index;		/*!< The FTS index instance */
 
 	ib_rbt_t*	words;		/*!< Nodes; indexed by fts_string_t*,
@@ -88,7 +82,7 @@ struct fts_index_cache_struct {
 /** For supporting the tracking of updates on multiple FTS indexes we need
 to track which FTS indexes need to be updated. For INSERT and DELETE we
 update all fts indexes. */
-struct fts_update_struct {
+struct fts_update_t {
 	doc_id_t	doc_id;		/*!< The doc id affected */
 
 	ib_vector_t*	fts_indexes;	/*!< The FTS indexes that need to be
@@ -100,7 +94,7 @@ struct fts_update_struct {
 };
 
 /** Stop word control infotmation. */
-struct fts_stopword_struct {
+struct fts_stopword_t {
 	ulint		status;		/*!< Status of the stopword tree */
 	ib_alloc_t*	heap;		/*!< The memory allocator to use */
 	ib_rbt_t*	cached_stopword;/*!< This stores all active stopwords */
@@ -109,7 +103,7 @@ struct fts_stopword_struct {
 
 /** The SYNC state of the cache. There is one instance of this struct
 associated with each ADD thread. */
-struct fts_sync_struct {
+struct fts_sync_t {
 	trx_t*		trx;		/*!< The transaction used for SYNCing
 					the cache to disk */
 	dict_table_t*	table;		/*!< Table with FTS index(es) */
@@ -131,12 +125,10 @@ struct fts_sync_struct {
         ib_time_t	start_time;	/*!< SYNC start time */
 };
 
-typedef struct fts_sync_struct	fts_sync_t;
-
 /** The cache for the FTS system. It is a memory-based inverted index
 that new entries are added to, until it grows over the configured maximum
 size, at which time its contents are written to the INDEX table. */
-struct fts_cache_struct {
+struct fts_cache_t {
 	rw_lock_t	lock;		/*!< lock protecting all access to the
 					memory buffer. FIXME: this needs to
 					be our new upgrade-capable rw-lock */
@@ -145,11 +137,11 @@ struct fts_cache_struct {
 					intialization, it has different
 					SYNC level as above cache lock */
 
-	mutex_t		optimize_lock;	/*!< Lock for OPTIMIZE */
+	ib_mutex_t		optimize_lock;	/*!< Lock for OPTIMIZE */
 
-	mutex_t		deleted_lock;	/*!< Lock covering deleted_doc_ids */
+	ib_mutex_t		deleted_lock;	/*!< Lock covering deleted_doc_ids */
 
-	mutex_t		doc_id_lock;	/*!< Lock covering Doc ID */
+	ib_mutex_t		doc_id_lock;	/*!< Lock covering Doc ID */
 
 	ib_vector_t*	deleted_doc_ids;/*!< Array of deleted doc ids, each
 					element is of type fts_update_t */
@@ -200,7 +192,7 @@ struct fts_cache_struct {
 };
 
 /** Columns of the FTS auxiliary INDEX table */
-struct fts_node_struct {
+struct fts_node_t {
 	doc_id_t	first_doc_id;	/*!< First document id in ilist. */
 
 	doc_id_t	last_doc_id;	/*!< Last document id in ilist. */
@@ -223,7 +215,7 @@ struct fts_node_struct {
 };
 
 /** A tokenizer word. Contains information about one word. */
-struct fts_tokenizer_word_struct {
+struct fts_tokenizer_word_t {
 	fts_string_t	text;		/*!< Token text. */
 
 	ib_vector_t*	nodes;		/*!< Word node ilists, each element is
@@ -231,7 +223,7 @@ struct fts_tokenizer_word_struct {
 };
 
 /** Word text plus it's array of nodes as on disk in FTS index */
-struct fts_word_struct {
+struct fts_word_t {
 	fts_string_t	text;		/*!< Word value in UTF-8 */
 	ib_vector_t*	nodes;		/*!< Nodes read from disk */
 
@@ -239,7 +231,7 @@ struct fts_word_struct {
 };
 
 /** Callback for reading and filtering nodes that are read from FTS index */
-struct fts_fetch_struct {
+struct fts_fetch_t {
 	void*		read_arg;	/*!< Arg for the sql_callback */
 
 	fts_sql_callback
@@ -248,7 +240,7 @@ struct fts_fetch_struct {
 };
 
 /** For horizontally splitting an FTS auxiliary index */
-struct fts_index_selector_struct {
+struct fts_index_selector_t {
 	ulint		value;		/*!< Character value at which
 					to split */
 
@@ -256,7 +248,7 @@ struct fts_index_selector_struct {
 };
 
 /** This type represents a single document. */
-struct fts_doc_struct {
+struct fts_doc_t {
 	fts_string_t	text;		/*!< document text */
 
 	ibool		found;		/*!< TRUE if the document was found
@@ -276,7 +268,7 @@ struct fts_doc_struct {
 };
 
 /** A token and its positions within a document. */
-struct fts_token_struct {
+struct fts_token_t {
 	fts_string_t	text;		/*!< token text */
 
 	ib_vector_t*	positions;	/*!< an array of the positions the
diff --git a/storage/innobase/include/fts0types.ic b/storage/innobase/include/fts0types.ic
index 2734a331a86..b96c3f9dac8 100644
--- a/storage/innobase/include/fts0types.ic
+++ b/storage/innobase/include/fts0types.ic
@@ -37,46 +37,6 @@ extern const ulint UTF8_ERROR;
 #define fts_utf8_is_valid(b) (((b) & 0xC0) == 0x80)
 
 /******************************************************************//**
-Compare two fts_trx_table_t instances.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2  */
-UNIV_INLINE
-int
-fts_trx_table_cmp(
-/*==============*/
-	const void*	p1,			/*!< in: id1 */
-	const void*	p2)			/*!< in: id2 */
-{
-	const dict_table_t* table1 = (*(const fts_trx_table_t**) p1)->table;
-	const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
-
-	return((table1->id > table2->id)
-	       ? 1
-	       : (table1->id == table2->id)
-		  ? 0
-		  : -1);
-}
-
-/******************************************************************//**
-Compare a table id with a fts_trx_table_t table id.
-@return < 0 if n1 < n2, 0 if n1 == n2,> 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_trx_table_id_cmp(
-/*=================*/
-	const void*	p1,			/*!< in: id1 */
-	const void*	p2)			/*!< in: id2 */
-{
-	const ullint* table_id = (const ullint*) p1;
-	const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
-
-	return((*table_id > table2->id)
-	       ? 1
-	       : (*table_id == table2->id)
-		  ? 0
-		  : -1);
-}
-
-/******************************************************************//**
 Duplicate an UTF-8 string.
 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
 UNIV_INLINE
diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h
index 1a2b8dac014..2e4397ea5fc 100644
--- a/storage/innobase/include/ha0ha.h
+++ b/storage/innobase/include/ha0ha.h
@@ -221,10 +221,7 @@ ha_print_info(
 #endif /* !UNIV_HOTBACKUP */
 
 /** The hash table external chain node */
-typedef struct ha_node_struct ha_node_t;
-
-/** The hash table external chain node */
-struct ha_node_struct {
+struct ha_node_t {
 	ha_node_t*	next;	/*!< next chain node or NULL if none */
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	buf_block_t*	block;	/*!< buffer block containing the data, or NULL */
diff --git a/storage/innobase/include/ha0storage.h b/storage/innobase/include/ha0storage.h
index caf42abfcfe..0073930b502 100644
--- a/storage/innobase/include/ha0storage.h
+++ b/storage/innobase/include/ha0storage.h
@@ -39,7 +39,7 @@ constant per ha_storage's lifetime. */
 #define HA_STORAGE_DEFAULT_HASH_CELLS	4096
 
 /** Hash storage */
-typedef struct ha_storage_struct	ha_storage_t;
+struct ha_storage_t;
 
 /*******************************************************************//**
 Creates a hash storage. If any of the parameters is 0, then a default
diff --git a/storage/innobase/include/ha0storage.ic b/storage/innobase/include/ha0storage.ic
index ce6e7406b43..7150ca045ec 100644
--- a/storage/innobase/include/ha0storage.ic
+++ b/storage/innobase/include/ha0storage.ic
@@ -31,7 +31,7 @@ Created September 24, 2007 Vasil Dimov
 #include "mem0mem.h"
 
 /** Hash storage for strings */
-struct ha_storage_struct {
+struct ha_storage_t {
 	mem_heap_t*	heap;	/*!< memory heap from which memory is
 				allocated */
 	hash_table_t*	hash;	/*!< hash table used to avoid
@@ -39,9 +39,7 @@ struct ha_storage_struct {
 };
 
 /** Objects of this type are stored in ha_storage_t */
-typedef struct ha_storage_node_struct ha_storage_node_t;
-/** Objects of this type are stored in ha_storage_struct */
-struct ha_storage_node_struct {
+struct ha_storage_node_t {
 	ulint			data_len;/*!< length of the data */
 	const void*		data;	/*!< pointer to data */
 	ha_storage_node_t*	next;	/*!< next node in hash chain */
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index 5512bf7c62f..fb4b0120bbb 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,15 +28,19 @@ Created 5/11/2006 Osku Salerma
 #define HA_INNODB_PROTOTYPES_H
 
 #include "my_dbug.h"
+#include "mysqld_error.h"
 #include "my_compare.h"
 #include "my_sys.h"
 #include "m_string.h"
+#include "debug_sync.h"
+#include "my_base.h"
 
 #include "trx0types.h"
 #include "m_ctype.h" /* CHARSET_INFO */
 
-// Forward declaration
-typedef struct fts_string_struct fts_string_t;
+// Forward declarations
+class Field;
+struct fts_string_t;
 
 /*********************************************************************//**
 Wrapper around MySQL's copy_and_convert function.
@@ -105,7 +109,7 @@ innobase_convert_name(
 	ulint		buflen,	/*!< in: length of buf, in bytes */
 	const char*	id,	/*!< in: identifier to convert */
 	ulint		idlen,	/*!< in: length of id, in bytes */
-	void*		thd,	/*!< in: MySQL connection thread, or NULL */
+	THD*		thd,	/*!< in: MySQL connection thread, or NULL */
 	ibool		table_id);/*!< in: TRUE=id is a table or database name;
 				FALSE=id is an index name */
 
@@ -120,7 +124,19 @@ UNIV_INTERN
 ibool
 thd_is_replication_slave_thread(
 /*============================*/
-	void*	thd);	/*!< in: thread handle (THD*) */
+	THD*	thd);	/*!< in: thread handle */
+
+/******************************************************************//**
+Gets information on the durability property requested by thread.
+Used when writing either a prepare or commit record to the log
+buffer.
+@return the durability property. */
+UNIV_INTERN
+enum durability_properties
+thd_requested_durability(
+/*=====================*/
+	const THD* thd)	/*!< in: thread handle */
+	__attribute__((nonnull, warn_unused_result));
 
 /******************************************************************//**
 Returns true if the transaction this thread is processing has edited
@@ -132,7 +148,7 @@ UNIV_INTERN
 ibool
 thd_has_edited_nontrans_tables(
 /*===========================*/
-	void*	thd);	/*!< in: thread handle (THD*) */
+	THD*	thd);	/*!< in: thread handle */
 
 /*************************************************************//**
 Prints info of a THD object (== user session thread) to the given file. */
@@ -141,21 +157,10 @@ void
 innobase_mysql_print_thd(
 /*=====================*/
 	FILE*	f,		/*!< in: output stream */
-	void*	thd,		/*!< in: pointer to a MySQL THD object */
+	THD*	thd,		/*!< in: pointer to a MySQL THD object */
 	uint	max_query_len);	/*!< in: max query length to print, or 0 to
 				   use the default max length */
 
-/*****************************************************************//**
-Log code calls this whenever log has been written and/or flushed up
-to a new position. We use this to notify upper layer of a new commit
-checkpoint when necessary.*/
-UNIV_INTERN
-void
-innobase_mysql_log_notify(
-/*===============*/
-	ib_uint64_t	write_lsn,	/*!< in: LSN written to log file */
-	ib_uint64_t	flush_lsn);	/*!< in: LSN flushed to disk */
-
 /*************************************************************//**
 InnoDB uses this function to compare two data fields for which the data type
 is such that we must use MySQL code to compare them.
@@ -173,6 +178,18 @@ innobase_mysql_cmp(
 	unsigned int	b_length)	/*!< in: data field length,
 					not UNIV_SQL_NULL */
 	__attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Log code calls this whenever log has been written and/or flushed up
+to a new position. We use this to notify upper layer of a new commit
+checkpoint when necessary.*/
+extern "C" UNIV_INTERN
+void
+innobase_mysql_log_notify(
+/*===============*/
+	ib_uint64_t	write_lsn,	/*!< in: LSN written to log file */
+	ib_uint64_t	flush_lsn);	/*!< in: LSN flushed to disk */
+
 /**************************************************************//**
 Converts a MySQL type to an InnoDB type. Note that this function returns
 the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
@@ -233,11 +250,11 @@ innobase_basename(
 /******************************************************************//**
 Returns true if the thread is executing a SELECT statement.
 @return	true if thd is executing SELECT */
-
+UNIV_INTERN
 ibool
 thd_is_select(
 /*==========*/
-	const void*	thd);	/*!< in: thread handle (THD*) */
+	const THD*	thd);	/*!< in: thread handle */
 
 /******************************************************************//**
 Converts an identifier to a table name. */
@@ -276,7 +293,7 @@ UNIV_INTERN
 struct charset_info_st*
 innobase_get_charset(
 /*=================*/
-	void*	mysql_thd);	/*!< in: MySQL thread handle */
+	THD*	thd);	/*!< in: MySQL thread handle */
 /**********************************************************************//**
 Determines the current SQL statement.
 @return	SQL statement string */
@@ -284,7 +301,7 @@ UNIV_INTERN
 const char*
 innobase_get_stmt(
 /*==============*/
-	void*	mysql_thd,	/*!< in: MySQL thread handle */
+	THD*	thd,		/*!< in: MySQL thread handle */
 	size_t*	length)		/*!< out: length of the SQL statement */
 	__attribute__((nonnull));
 /******************************************************************//**
@@ -321,17 +338,17 @@ UNIV_INTERN
 ibool
 thd_supports_xa(
 /*============*/
-	void*	thd);	/*!< in: thread handle (THD*), or NULL to query
+	THD*	thd);	/*!< in: thread handle, or NULL to query
 			the global innodb_supports_xa */
 
 /******************************************************************//**
 Returns the lock wait timeout for the current connection.
 @return	the lock wait timeout, in seconds */
-
+UNIV_INTERN
 ulong
 thd_lock_wait_timeout(
 /*==================*/
-	void*	thd);	/*!< in: thread handle (THD*), or NULL to query
+	THD*	thd);	/*!< in: thread handle, or NULL to query
 			the global innodb_lock_wait_timeout */
 /******************************************************************//**
 Add up the time waited for the lock for the current query. */
@@ -339,7 +356,7 @@ UNIV_INTERN
 void
 thd_set_lock_wait_time(
 /*===================*/
-	void*	thd,	/*!< in: thread handle (THD*) */
+	THD*	thd,	/*!< in/out: thread handle */
 	ulint	value);	/*!< in: time waited for the lock */
 
 /**********************************************************************//**
@@ -363,6 +380,15 @@ ulint
 innobase_get_lower_case_table_names(void);
 /*=====================================*/
 
+/*****************************************************************//**
+Frees a possible InnoDB trx object associated with the current THD.
+@return 0 or error number */
+UNIV_INTERN
+int
+innobase_close_thd(
+/*===============*/
+	THD*	thd);		/*!< in: MySQL thread handle for
+				which to close the connection */
 /*************************************************************//**
 Get the next token from the given string and store it in *token. */
 UNIV_INTERN
@@ -414,7 +440,7 @@ UNIV_INTERN
 ibool
 thd_trx_is_read_only(
 /*=================*/
-	void*	thd);	/*!< in: thread handle (THD*) */
+	THD*	thd);	/*!< in/out: thread handle */
 
 /******************************************************************//**
 Check if the transaction is an auto-commit transaction. TRUE also
@@ -424,5 +450,139 @@ UNIV_INTERN
 ibool
 thd_trx_is_auto_commit(
 /*===================*/
-	void*	thd);	/*!< in: thread handle (THD*) can be NULL */
+	THD*	thd);	/*!< in: thread handle, or NULL */
+
+/*****************************************************************//**
+A wrapper function of innobase_convert_name(), convert a table or
+index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
+@return	pointer to the end of buf */
+UNIV_INTERN
+void
+innobase_format_name(
+/*==================*/
+	char*		buf,		/*!< out: buffer for converted
+					identifier */
+	ulint		buflen,		/*!< in: length of buf, in bytes */
+	const char*	name,		/*!< in: index or table name
+					to format */
+	ibool		is_index_name)	/*!< in: index name */
+	__attribute__((nonnull));
+
+/** Corresponds to Sql_condition:enum_warning_level. */
+enum ib_log_level_t {
+	IB_LOG_LEVEL_INFO,
+	IB_LOG_LEVEL_WARN,
+	IB_LOG_LEVEL_ERROR,
+	IB_LOG_LEVEL_FATAL
+};
+
+/******************************************************************//**
+Use this when the args are first converted to a formatted string and then
+passed to the format string from errmsg-utf8.txt. The error message format
+must be: "Some string ... %s".
+
+Push a warning message to the client, it is a wrapper around:
+
+void push_warning_printf(
+	THD *thd, Sql_condition::enum_warning_level level,
+	uint code, const char *format, ...);
+*/
+UNIV_INTERN
+void
+ib_errf(
+/*====*/
+	THD*		thd,		/*!< in/out: session */
+	ib_log_level_t	level,		/*!< in: warning level */
+	ib_uint32_t	code,		/*!< MySQL error code */
+	const char*	format,		/*!< printf format */
+	...)				/*!< Args */
+	__attribute__((format(printf, 4, 5)));
+
+/******************************************************************//**
+Use this when the args are passed to the format string from
+errmsg-utf8.txt directly as is.
+
+Push a warning message to the client, it is a wrapper around:
+
+void push_warning_printf(
+	THD *thd, Sql_condition::enum_warning_level level,
+	uint code, const char *format, ...);
+*/
+UNIV_INTERN
+void
+ib_senderrf(
+/*========*/
+	THD*		thd,		/*!< in/out: session */
+	ib_log_level_t	level,		/*!< in: warning level */
+	ib_uint32_t	code,		/*!< MySQL error code */
+	...);				/*!< Args */
+
+/******************************************************************//**
+Write a message to the MySQL log, prefixed with "InnoDB: ".
+Wrapper around sql_print_information() */
+UNIV_INTERN
+void
+ib_logf(
+/*====*/
+	ib_log_level_t	level,		/*!< in: warning level */
+	const char*	format,		/*!< printf format */
+	...)				/*!< Args */
+	__attribute__((format(printf, 2, 3)));
+
+/******************************************************************//**
+Returns the NUL terminated value of glob_hostname.
+@return	pointer to glob_hostname. */
+UNIV_INTERN
+const char*
+server_get_hostname();
+/*=================*/
+
+/******************************************************************//**
+Get the error message format string.
+@return the format string or 0 if not found. */
+UNIV_INTERN
+const char*
+innobase_get_err_msg(
+/*=================*/
+	int	error_code);	/*!< in: MySQL error code */
+
+/*********************************************************************//**
+Compute the next autoinc value.
+
+For MySQL replication the autoincrement values can be partitioned among
+the nodes. The offset is the start or origin of the autoincrement value
+for a particular node. For n nodes the increment will be n and the offset
+will be in the interval [1, n]. The formula tries to allocate the next
+value for a particular node.
+
+Note: This function is also called with increment set to the number of
+values we want to reserve for multi-value inserts e.g.,
+
+	INSERT INTO T VALUES(), (), ();
+
+innobase_next_autoinc() will be called with increment set to 3 where
+autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
+the multi-value INSERT above.
+@return	the next value */
+UNIV_INTERN
+ulonglong
+innobase_next_autoinc(
+/*==================*/
+	ulonglong	current,	/*!< in: Current value */
+	ulonglong	need,		/*!< in: count of values needed */
+	ulonglong	step,		/*!< in: AUTOINC increment step */
+	ulonglong	offset,		/*!< in: AUTOINC offset */
+	ulonglong	max_value)	/*!< in: max value for type */
+	__attribute__((pure, warn_unused_result));
+
+/********************************************************************//**
+Get the upper limit of the MySQL integral and floating-point type.
+@return maximum allowed value for the field */
+UNIV_INTERN
+ulonglong
+innobase_get_int_col_max_value(
+/*===========================*/
+	const Field*	field)	/*!< in: MySQL field */
+	__attribute__((nonnull, pure, warn_unused_result));
+
 #endif /* HA_INNODB_PROTOTYPES_H */
diff --git a/storage/innobase/include/handler0alter.h b/storage/innobase/include/handler0alter.h
index c5d439ef21b..52aaf2d25ef 100644
--- a/storage/innobase/include/handler0alter.h
+++ b/storage/innobase/include/handler0alter.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2005, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,11 +27,34 @@ UNIV_INTERN
 void
 innobase_rec_to_mysql(
 /*==================*/
-	struct TABLE*		table,		/*!< in/out: MySQL table */
-	const rec_t*		rec,		/*!< in: record */
-	const dict_index_t*	index,		/*!< in: index */
-	const ulint*		offsets);	/*!< in: rec_get_offsets(
-						rec, index, ...) */
+	struct TABLE*		table,	/*!< in/out: MySQL table */
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: index */
+	const ulint*		offsets)/*!< in: rec_get_offsets(
+					rec, index, ...) */
+	__attribute__((nonnull));
+
+/*************************************************************//**
+Copies an InnoDB index entry to table->record[0]. */
+UNIV_INTERN
+void
+innobase_fields_to_mysql(
+/*=====================*/
+	struct TABLE*		table,	/*!< in/out: MySQL table */
+	const dict_index_t*	index,	/*!< in: InnoDB index */
+	const dfield_t*		fields)	/*!< in: InnoDB index fields */
+	__attribute__((nonnull));
+
+/*************************************************************//**
+Copies an InnoDB row to table->record[0]. */
+UNIV_INTERN
+void
+innobase_row_to_mysql(
+/*==================*/
+	struct TABLE*		table,	/*!< in/out: MySQL table */
+	const dict_table_t*	itab,	/*!< in: InnoDB table */
+	const dtuple_t*		row)	/*!< in: InnoDB row */
+	__attribute__((nonnull));
 
 /*************************************************************//**
 Resets table->record[0]. */
@@ -39,4 +62,53 @@ UNIV_INTERN
 void
 innobase_rec_reset(
 /*===============*/
-	struct TABLE*		table);		/*!< in/out: MySQL table */
+	struct TABLE*		table)		/*!< in/out: MySQL table */
+	__attribute__((nonnull));
+
+/** Generate the next autoinc based on a snapshot of the session
+auto_increment_increment and auto_increment_offset variables. */
+struct ib_sequence_t {
+
+	/**
+	@param thd - the session
+	@param start_value - the lower bound
+	@param max_value - the upper bound (inclusive) */
+	ib_sequence_t(THD* thd, ulonglong start_value, ulonglong max_value);
+
+	/**
+	Postfix increment
+	@return the value to insert */
+	ulonglong operator++(int) UNIV_NOTHROW;
+
+	/** Check if the autoinc "sequence" is exhausted.
+	@return true if the sequence is exhausted */
+	bool eof() const UNIV_NOTHROW
+	{
+		return(m_eof);
+	}
+
+	/**
+	@return the next value in the sequence */
+	ulonglong last() const UNIV_NOTHROW
+	{
+		ut_ad(m_next_value > 0);
+
+		return(m_next_value);
+	}
+
+	/** Maximum calumn value if adding an AUTOINC column else 0. Once
+	we reach the end of the sequence it will be set to ~0. */
+	const ulonglong	m_max_value;
+
+	/** Value of auto_increment_increment */
+	ulong		m_increment;
+
+	/** Value of auto_increment_offset */
+	ulong		m_offset;
+
+	/** Next value in the sequence */
+	ulonglong	m_next_value;
+
+	/** true if no more values left in the sequence */
+	bool		m_eof;
+};
diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h
index 1c19ea53a23..6f9a628df5d 100644
--- a/storage/innobase/include/hash0hash.h
+++ b/storage/innobase/include/hash0hash.h
@@ -33,8 +33,8 @@ Created 5/20/1997 Heikki Tuuri
 # include "sync0rw.h"
 #endif /* !UNIV_HOTBACKUP */
 
-typedef struct hash_table_struct hash_table_t;
-typedef struct hash_cell_struct hash_cell_t;
+struct hash_table_t;
+struct hash_cell_t;
 
 typedef void*	hash_node_t;
 
@@ -382,7 +382,7 @@ hash_get_heap(
 Gets the nth mutex in a hash table.
 @return	mutex */
 UNIV_INLINE
-mutex_t*
+ib_mutex_t*
 hash_get_nth_mutex(
 /*===============*/
 	hash_table_t*	table,	/*!< in: hash table */
@@ -400,7 +400,7 @@ hash_get_nth_lock(
 Gets the mutex for a fold value in a hash table.
 @return	mutex */
 UNIV_INLINE
-mutex_t*
+ib_mutex_t*
 hash_get_mutex(
 /*===========*/
 	hash_table_t*	table,	/*!< in: hash table */
@@ -451,7 +451,7 @@ void
 hash_mutex_exit_all_but(
 /*====================*/
 	hash_table_t*	table,		/*!< in: hash table */
-	mutex_t*	keep_mutex);	/*!< in: mutex to keep */
+	ib_mutex_t*	keep_mutex);	/*!< in: mutex to keep */
 /************************************************************//**
 s-lock a lock for a fold value in a hash table. */
 UNIV_INTERN
@@ -524,12 +524,12 @@ hash_unlock_x_all_but(
 # define hash_unlock_x_all_but(t, l)	((void) 0)
 #endif /* !UNIV_HOTBACKUP */
 
-struct hash_cell_struct{
+struct hash_cell_t{
 	void*	node;	/*!< hash chain node, NULL if none */
 };
 
 /* The hash table structure */
-struct hash_table_struct {
+struct hash_table_t {
 	enum hash_table_sync_t	type;	/*<! type of hash_table. */
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 # ifndef UNIV_HOTBACKUP
@@ -547,7 +547,7 @@ struct hash_table_struct {
 					rw_locks depending on the type.
 					Must be a power of 2 */
 	union {
-		mutex_t*	mutexes;/* NULL, or an array of mutexes
+		ib_mutex_t*	mutexes;/* NULL, or an array of mutexes
 					used to protect segments of the
 					hash table */
 		rw_lock_t*	rw_locks;/* NULL, or an array of rw_lcoks
diff --git a/storage/innobase/include/hash0hash.ic b/storage/innobase/include/hash0hash.ic
index 1e5474601d5..254f3f82e5d 100644
--- a/storage/innobase/include/hash0hash.ic
+++ b/storage/innobase/include/hash0hash.ic
@@ -150,7 +150,7 @@ hash_get_heap(
 Gets the nth mutex in a hash table.
 @return	mutex */
 UNIV_INLINE
-mutex_t*
+ib_mutex_t*
 hash_get_nth_mutex(
 /*===============*/
 	hash_table_t*	table,	/*!< in: hash table */
@@ -168,7 +168,7 @@ hash_get_nth_mutex(
 Gets the mutex for a fold value in a hash table.
 @return	mutex */
 UNIV_INLINE
-mutex_t*
+ib_mutex_t*
 hash_get_mutex(
 /*===========*/
 	hash_table_t*	table,	/*!< in: hash table */
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
index f405ebf8d11..e64f067d364 100644
--- a/storage/innobase/include/ibuf0ibuf.h
+++ b/storage/innobase/include/ibuf0ibuf.h
@@ -376,24 +376,16 @@ will be merged from ibuf trees to the pages read, 0 if ibuf is
 empty */
 UNIV_INTERN
 ulint
-ibuf_contract(
-/*==========*/
-	ibool	sync);	/*!< in: TRUE if the caller wants to wait for the
-			issued read with the highest tablespace address
-			to complete */
-/*********************************************************************//**
-Contracts insert buffer trees by reading pages to the buffer pool.
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read, 0 if ibuf is
-empty */
-UNIV_INTERN
-ulint
 ibuf_contract_in_background(
 /*========================*/
-	ibool	full);	/*!< in: TRUE if the caller wants to do a full
-			contract based on PCT_IO(100). If FALSE then
-			the size of contract batch is determined based
-			on the current size of the ibuf tree. */
+	table_id_t	table_id,	/*!< in: if merge should be done only
+					for a specific table, for all tables
+					this should be 0 */
+	ibool		full);		/*!< in: TRUE if the caller wants to
+					do a full contract based on PCT_IO(100).
+					If FALSE then the size of contract
+					batch is determined based on the
+					current size of the ibuf tree. */
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Parses a redo log record of an ibuf bitmap page init.
@@ -449,6 +441,17 @@ void
 ibuf_close(void);
 /*============*/
 
+/******************************************************************//**
+Checks the insert buffer bitmaps on IMPORT TABLESPACE.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+ibuf_check_bitmap_on_import(
+/*========================*/
+	const trx_t*	trx,		/*!< in: transaction */
+	ulint		space_id)	/*!< in: tablespace identifier */
+	__attribute__((nonnull, warn_unused_result));
+
 #define IBUF_HEADER_PAGE_NO	FSP_IBUF_HEADER_PAGE_NO
 #define IBUF_TREE_ROOT_PAGE_NO	FSP_IBUF_TREE_ROOT_PAGE_NO
 
@@ -462,36 +465,6 @@ for the file segment from which the pages for the ibuf tree are allocated */
 /* The insert buffer tree itself is always located in space 0. */
 #define IBUF_SPACE_ID		0
 
-/** Insert buffer struct */
-struct ibuf_struct{
-	ulint		size;		/*!< current size of the ibuf index
-					tree, in pages */
-	ulint		max_size;	/*!< recommended maximum size of the
-					ibuf index tree, in pages */
-	ulint		seg_size;	/*!< allocated pages of the file
-					segment containing ibuf header and
-					tree */
-	ibool		empty;		/*!< Protected by the page
-					latch of the root page of the
-					insert buffer tree
-					(FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE
-					if and only if the insert
-					buffer tree is empty. */
-	ulint		free_list_len;	/*!< length of the free list */
-	ulint		height;		/*!< tree height */
-	dict_index_t*	index;		/*!< insert buffer index */
-
-	ulint		n_merges;	/*!< number of pages merged */
-	ulint		n_merged_ops[IBUF_OP_COUNT];
-					/*!< number of operations of each type
-					merged to index pages */
-	ulint		n_discarded_ops[IBUF_OP_COUNT];
-					/*!< number of operations of each type
-					discarded without merging due to the
-					tablespace being deleted or the
-					index being dropped */
-};
-
 #ifndef UNIV_NONINL
 #include "ibuf0ibuf.ic"
 #endif
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
index 8a4ec633b01..92ca2cbb9a2 100644
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ b/storage/innobase/include/ibuf0ibuf.ic
@@ -58,6 +58,36 @@ ibuf_mtr_commit(
 	mtr_commit(mtr);
 }
 
+/** Insert buffer struct */
+struct ibuf_t{
+	ulint		size;		/*!< current size of the ibuf index
+					tree, in pages */
+	ulint		max_size;	/*!< recommended maximum size of the
+					ibuf index tree, in pages */
+	ulint		seg_size;	/*!< allocated pages of the file
+					segment containing ibuf header and
+					tree */
+	ibool		empty;		/*!< Protected by the page
+					latch of the root page of the
+					insert buffer tree
+					(FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE
+					if and only if the insert
+					buffer tree is empty. */
+	ulint		free_list_len;	/*!< length of the free list */
+	ulint		height;		/*!< tree height */
+	dict_index_t*	index;		/*!< insert buffer index */
+
+	ulint		n_merges;	/*!< number of pages merged */
+	ulint		n_merged_ops[IBUF_OP_COUNT];
+					/*!< number of operations of each type
+					merged to index pages */
+	ulint		n_discarded_ops[IBUF_OP_COUNT];
+					/*!< number of operations of each type
+					discarded without merging due to the
+					tablespace being deleted or the
+					index being dropped */
+};
+
 /************************************************************************//**
 Sets the free bit of the page in the ibuf bitmap. This is done in a separate
 mini-transaction, hence this operation does not restrict further work to only
@@ -97,6 +127,7 @@ ibuf_should_try(
 	return(ibuf_use != IBUF_USE_NONE
 	       && ibuf->max_size != 0
 	       && !dict_index_is_clust(index)
+	       && index->table->quiesce == QUIESCE_NONE
 	       && (ignore_sec_unique || !dict_index_is_unique(index)));
 }
 
diff --git a/storage/innobase/include/ibuf0types.h b/storage/innobase/include/ibuf0types.h
index e404b62a011..3fdbf078b0b 100644
--- a/storage/innobase/include/ibuf0types.h
+++ b/storage/innobase/include/ibuf0types.h
@@ -26,6 +26,6 @@ Created 7/29/1997 Heikki Tuuri
 #ifndef ibuf0types_h
 #define ibuf0types_h
 
-typedef	struct ibuf_struct	ibuf_t;
+struct ibuf_t;
 
 #endif
diff --git a/storage/innobase/include/lock0iter.h b/storage/innobase/include/lock0iter.h
index 42b4f7281e4..0054850b526 100644
--- a/storage/innobase/include/lock0iter.h
+++ b/storage/innobase/include/lock0iter.h
@@ -29,13 +29,13 @@ Created July 16, 2007 Vasil Dimov
 #include "univ.i"
 #include "lock0types.h"
 
-typedef struct lock_queue_iterator_struct {
+struct lock_queue_iterator_t {
 	const lock_t*	current_lock;
 	/* In case this is a record lock queue (not table lock queue)
 	then bit_no is the record number within the heap in which the
 	record is stored. */
 	ulint		bit_no;
-} lock_queue_iterator_t;
+};
 
 /*******************************************************************//**
 Initialize lock queue iterator so that it starts to iterate from
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index a1ffe87d5bd..8e6fdaed3d5 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -275,7 +275,7 @@ the query thread to the lock wait state and inserts a waiting request
 for a gap x-lock to the lock queue.
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_rec_insert_check_and_lock(
 /*===========================*/
 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is
@@ -285,10 +285,11 @@ lock_rec_insert_check_and_lock(
 	dict_index_t*	index,	/*!< in: index */
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr,	/*!< in/out: mini-transaction */
-	ibool*		inherit);/*!< out: set to TRUE if the new
+	ibool*		inherit)/*!< out: set to TRUE if the new
 				inserted record maybe should inherit
 				LOCK_GAP type locks from the successor
 				record */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Checks if locks of other transactions prevent an immediate modify (update,
 delete mark, or delete unmark) of a clustered index record. If they do,
@@ -298,7 +299,7 @@ lock wait state and inserts a waiting request for a record x-lock to the
 lock queue.
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_clust_rec_modify_check_and_lock(
 /*=================================*/
 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -308,13 +309,14 @@ lock_clust_rec_modify_check_and_lock(
 					modified */
 	dict_index_t*		index,	/*!< in: clustered index */
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
-	que_thr_t*		thr);	/*!< in: query thread */
+	que_thr_t*		thr)	/*!< in: query thread */
+	__attribute__((warn_unused_result, nonnull));
 /*********************************************************************//**
 Checks if locks of other transactions prevent an immediate modify
 (delete mark or delete unmark) of a secondary index record.
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_sec_rec_modify_check_and_lock(
 /*===============================*/
 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -326,15 +328,17 @@ lock_sec_rec_modify_check_and_lock(
 				clustered index record first: see the
 				comment below */
 	dict_index_t*	index,	/*!< in: secondary index */
-	que_thr_t*	thr,	/*!< in: query thread */
-	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+	que_thr_t*	thr,	/*!< in: query thread
+				(can be NULL if BTR_NO_LOCKING_FLAG) */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((warn_unused_result, nonnull(2,3,4,6)));
 /*********************************************************************//**
 Like lock_clust_rec_read_check_and_lock(), but reads a
 secondary index record.
 @return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-enum db_err
+dberr_t
 lock_sec_rec_read_check_and_lock(
 /*=============================*/
 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -364,7 +368,7 @@ lock on the record.
 @return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-enum db_err
+dberr_t
 lock_clust_rec_read_check_and_lock(
 /*===============================*/
 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -395,7 +399,7 @@ lock_clust_rec_read_check_and_lock() that does not require the parameter
 "offsets".
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_clust_rec_read_check_and_lock_alt(
 /*===================================*/
 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -413,13 +417,14 @@ lock_clust_rec_read_check_and_lock_alt(
 					SELECT FOR UPDATE */
 	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
 					LOCK_REC_NOT_GAP */
-	que_thr_t*		thr);	/*!< in: query thread */
+	que_thr_t*		thr)	/*!< in: query thread */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Checks that a record is seen in a consistent read.
-@return TRUE if sees, or FALSE if an earlier version of the record
+@return true if sees, or false if an earlier version of the record
 should be retrieved */
 UNIV_INTERN
-ibool
+bool
 lock_clust_rec_cons_read_sees(
 /*==========================*/
 	const rec_t*	rec,	/*!< in: user record which should be read or
@@ -431,26 +436,27 @@ lock_clust_rec_cons_read_sees(
 Checks that a non-clustered index record is seen in a consistent read.
 
 NOTE that a non-clustered index page contains so little information on
-its modifications that also in the case FALSE, the present version of
+its modifications that also in the case false, the present version of
 rec may be the right, but we must check this from the clustered index
 record.
 
-@return TRUE if certainly sees, or FALSE if an earlier version of the
+@return true if certainly sees, or false if an earlier version of the
 clustered index record might be needed */
 UNIV_INTERN
-ulint
+bool
 lock_sec_rec_cons_read_sees(
 /*========================*/
 	const rec_t*		rec,	/*!< in: user record which
 					should be read or passed over
 					by a read cursor */
-	const read_view_t*	view);	/*!< in: consistent read view */
+	const read_view_t*	view)	/*!< in: consistent read view */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Locks the specified database table in the mode given. If the lock cannot
 be granted immediately, the query thread is put to wait.
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_table(
 /*=======*/
 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is set,
@@ -458,7 +464,8 @@ lock_table(
 	dict_table_t*	table,	/*!< in/out: database table
 				in dictionary cache */
 	enum lock_mode	mode,	/*!< in: lock mode */
-	que_thr_t*	thr);	/*!< in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
+	__attribute__((nonnull, warn_unused_result));
 /*************************************************************//**
 Removes a granted record lock of a transaction from the queue and grants
 locks to other transactions waiting in the queue if they now are entitled
@@ -780,7 +787,7 @@ was selected as a deadlock victim, or if it has to wait then cancel
 the wait lock.
 @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
 UNIV_INTERN
-enum db_err
+dberr_t
 lock_trx_handle_wait(
 /*=================*/
 	trx_t*	trx)	/*!< in/out: trx lock state */
@@ -864,29 +871,35 @@ lock_trx_has_sys_table_locks(
 				remains set when the waiting lock is granted,
 				or if the lock is inherited to a neighboring
 				record */
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
+#define LOCK_CONV_BY_OTHER 4096 /*!< this bit is set when the lock is created
+				by other transaction */
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_CONV_BY_OTHER)&LOCK_MODE_MASK
 # error
 #endif
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_TYPE_MASK
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_CONV_BY_OTHER)&LOCK_TYPE_MASK
 # error
 #endif
 /* @} */
 
+/** Checks if this is a waiting lock created by lock->trx itself.
+@param type_mode lock->type_mode
+@return whether it is a waiting lock belonging to lock->trx */
+#define lock_is_wait_not_by_other(type_mode) \
+	((type_mode & (LOCK_CONV_BY_OTHER | LOCK_WAIT)) == LOCK_WAIT)
+
 /** Lock operation struct */
-typedef struct lock_op_struct	lock_op_t;
-/** Lock operation struct */
-struct lock_op_struct{
+struct lock_op_t{
 	dict_table_t*	table;	/*!< table to be locked */
 	enum lock_mode	mode;	/*!< lock mode */
 };
 
 /** The lock system struct */
-struct lock_sys_struct{
-	mutex_t		mutex;			/*!< Mutex protecting the
+struct lock_sys_t{
+	ib_mutex_t	mutex;			/*!< Mutex protecting the
 						locks */
 	hash_table_t*	rec_hash;		/*!< hash table of the record
 						locks */
-	mutex_t		wait_mutex;		/*!< Mutex protecting the
+	ib_mutex_t	wait_mutex;		/*!< Mutex protecting the
 						next two fields */
 	srv_slot_t*	waiting_threads;	/*!< Array  of user threads
 						suspended while waiting for
@@ -901,6 +914,16 @@ struct lock_sys_struct{
 						recovered transactions is
 						complete. Protected by
 						lock_sys->mutex */
+
+	ulint		n_lock_max_wait_time;	/*!< Max wait time */
+
+	os_event_t	timeout_event;		/*!< Set to the event that is
+						created in the lock wait monitor
+						thread. A value of 0 means the
+						thread is not active */
+
+	bool		timeout_thread_active;	/*!< True if the timeout thread
+						is running */
 };
 
 /** The lock system */
@@ -935,14 +958,6 @@ extern lock_sys_t*	lock_sys;
 	mutex_exit(&lock_sys->wait_mutex);	\
 } while (0)
 
-// FIXME: Move these to lock_sys_t
-extern	ibool		srv_lock_timeout_active;
-extern	ulint		srv_n_lock_wait_count;
-extern	ulint		srv_n_lock_wait_current_count;
-extern	ib_int64_t	srv_n_lock_wait_time;
-extern	ulint		srv_n_lock_max_wait_time;
-extern	os_event_t	srv_lock_timeout_thread_event;
-
 #ifndef UNIV_NONINL
 #include "lock0lock.ic"
 #endif
diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h
index d516289e1f2..9f7ab9f76b6 100644
--- a/storage/innobase/include/lock0priv.h
+++ b/storage/innobase/include/lock0priv.h
@@ -40,9 +40,7 @@ those functions in lock/ */
 #include "ut0lst.h"
 
 /** A table lock */
-typedef struct lock_table_struct	lock_table_t;
-/** A table lock */
-struct lock_table_struct {
+struct lock_table_t {
 	dict_table_t*	table;		/*!< database table in dictionary
 					cache */
 	UT_LIST_NODE_T(lock_t)
@@ -51,9 +49,7 @@ struct lock_table_struct {
 };
 
 /** Record lock for a page */
-typedef struct lock_rec_struct		lock_rec_t;
-/** Record lock for a page */
-struct lock_rec_struct {
+struct lock_rec_t {
 	ulint	space;			/*!< space id */
 	ulint	page_no;		/*!< page number */
 	ulint	n_bits;			/*!< number of bits in the lock
@@ -63,7 +59,7 @@ struct lock_rec_struct {
 };
 
 /** Lock struct; protected by lock_sys->mutex */
-struct lock_struct {
+struct lock_t {
 	trx_t*		trx;		/*!< transaction owning the
 					lock */
 	UT_LIST_NODE_T(lock_t)
diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h
index 16e6b2e0113..cf32e72f864 100644
--- a/storage/innobase/include/lock0types.h
+++ b/storage/innobase/include/lock0types.h
@@ -27,8 +27,8 @@ Created 5/7/1996 Heikki Tuuri
 #define lock0types_h
 
 #define lock_t ib_lock_t
-typedef struct lock_struct	lock_t;
-typedef struct lock_sys_struct	lock_sys_t;
+struct lock_t;
+struct lock_sys_t;
 
 /* Basic lock modes */
 enum lock_mode {
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 5d72c7a96da..dd5e37012b7 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2009, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -48,9 +48,9 @@ typedef	ib_uint64_t		lsn_t;
 #define LSN_PF			UINT64PF
 
 /** Redo log buffer */
-typedef struct log_struct	log_t;
+struct log_t;
 /** Redo log group */
-typedef struct log_group_struct	log_group_t;
+struct log_group_t;
 
 #ifdef UNIV_DEBUG
 /** Flag: write to log file? */
@@ -67,7 +67,7 @@ extern	ibool	log_debug_writes;
 #define LOG_WAIT_ONE_GROUP	92
 #define	LOG_WAIT_ALL_GROUPS	93
 /* @} */
-/** Maximum number of log groups in log_group_struct::checkpoint_buf */
+/** Maximum number of log groups in log_group_t::checkpoint_buf */
 #define LOG_MAX_N_GROUPS	32
 
 /*******************************************************************//**
@@ -161,6 +161,14 @@ UNIV_INLINE
 lsn_t
 log_get_capacity(void);
 /*==================*/
+/****************************************************************
+Get log_sys::max_modified_age_async. It is OK to read the value without
+holding log_sys::mutex because it is constant.
+@return	max_modified_age_async */
+UNIV_INLINE
+lsn_t
+log_get_max_modified_age_async(void);
+/*================================*/
 /******************************************************//**
 Initializes the log. */
 UNIV_INTERN
@@ -223,15 +231,6 @@ void
 log_buffer_sync_in_background(
 /*==========================*/
 	ibool	flush);	/*<! in: flush the logs to disk */
-/****************************************************************//**
-Checks if an asynchronous flushing of dirty pages is required in the
-background. This function is only called from the page cleaner thread.
-@return lsn to which the flushing should happen or LSN_MAX
-if flushing is not required */
-UNIV_INTERN
-lsn_t
-log_async_flush_lsn(void);
-/*=====================*/
 /******************************************************//**
 Makes a checkpoint. Note that this function does not flush dirty
 blocks from the buffer pool: it only checks what is lsn of the oldest
@@ -550,13 +549,19 @@ UNIV_INTERN
 void
 log_refresh_stats(void);
 /*===================*/
-/**********************************************************
+/********************************************************//**
+Closes all log groups. */
+UNIV_INTERN
+void
+log_group_close_all(void);
+/*=====================*/
+/********************************************************//**
 Shutdown the log system but do not release all the memory. */
 UNIV_INTERN
 void
 log_shutdown(void);
 /*==============*/
-/**********************************************************
+/********************************************************//**
 Free the log system data structures. */
 UNIV_INTERN
 void
@@ -712,7 +717,7 @@ extern log_t*	log_sys;
 
 /** Log group consists of a number of log files, each of the same size; a log
 group is implemented as a space in the sense of the module fil0fil. */
-struct log_group_struct{
+struct log_group_t{
 	/* The following fields are protected by log_sys->mutex */
 	ulint		id;		/*!< log group id */
 	ulint		n_files;	/*!< number of files in the group */
@@ -764,7 +769,7 @@ struct log_group_struct{
 };
 
 /** Redo log buffer */
-struct log_struct{
+struct log_t{
 	byte		pad[64];	/*!< padding to prevent other memory
 					update hotspots from residing on the
 					same memory cache line */
@@ -772,9 +777,9 @@ struct log_struct{
 	ulint		buf_free;	/*!< first free offset within the log
 					buffer */
 #ifndef UNIV_HOTBACKUP
-	mutex_t		mutex;		/*!< mutex protecting the log */
+	ib_mutex_t		mutex;		/*!< mutex protecting the log */
 
-	mutex_t		log_flush_order_mutex;/*!< mutex to serialize access to
+	ib_mutex_t		log_flush_order_mutex;/*!< mutex to serialize access to
 					the flush list when we are putting
 					dirty blocks in the list. The idea
 					behind this mutex is to be able
diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic
index ad7b7e790a2..67792395ac9 100644
--- a/storage/innobase/include/log0log.ic
+++ b/storage/innobase/include/log0log.ic
@@ -446,6 +446,18 @@ log_get_capacity(void)
 	return(log_sys->log_group_capacity);
 }
 
+/****************************************************************
+Get log_sys::max_modified_age_async. It is OK to read the value without
+holding log_sys::mutex because it is constant.
+@return	max_modified_age_async */
+UNIV_INLINE
+lsn_t
+log_get_max_modified_age_async(void)
+/*================================*/
+{
+	return(log_sys->max_modified_age_async);
+}
+
 /***********************************************************************//**
 Checks if there is need for a log buffer flush or a new checkpoint, and does
 this if yes. Any database operation should call this when it has modified
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index 218298a1698..dcdd4bdd8aa 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -128,7 +128,7 @@ recv_recovery_from_checkpoint_finish should be called later to complete
 the recovery and free the resources used in it.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 recv_recovery_from_checkpoint_start_func(
 /*=====================================*/
 #ifdef UNIV_LOG_ARCHIVE
@@ -212,18 +212,18 @@ UNIV_INTERN
 void
 recv_reset_logs(
 /*============*/
-	lsn_t		lsn,		/*!< in: reset to this lsn
-					rounded up to be divisible by
-					OS_FILE_LOG_BLOCK_SIZE, after
-					which we add
-					LOG_BLOCK_HDR_SIZE */
 #ifdef UNIV_LOG_ARCHIVE
 	ulint		arch_log_no,	/*!< in: next archived log file number */
-#endif /* UNIV_LOG_ARCHIVE */
-	ibool		new_logs_created);/*!< in: TRUE if resetting logs
+	ibool		new_logs_created,/*!< in: TRUE if resetting logs
 					is done at the log creation;
 					FALSE if it is done after
 					archive recovery */
+#endif /* UNIV_LOG_ARCHIVE */
+	lsn_t		lsn);		/*!< in: reset to this lsn
+					rounded up to be divisible by
+					OS_FILE_LOG_BLOCK_SIZE, after
+					which we add
+					LOG_BLOCK_HDR_SIZE */
 #ifdef UNIV_HOTBACKUP
 /******************************************************//**
 Creates new log files after a backup has been restored. */
@@ -318,9 +318,7 @@ recv_recovery_from_archive_finish(void);
 #endif /* UNIV_LOG_ARCHIVE */
 
 /** Block of log record data */
-typedef struct recv_data_struct	recv_data_t;
-/** Block of log record data */
-struct recv_data_struct{
+struct recv_data_t{
 	recv_data_t*	next;	/*!< pointer to the next block or NULL */
 				/*!< the log record data is stored physically
 				immediately after this struct, max amount
@@ -328,9 +326,7 @@ struct recv_data_struct{
 };
 
 /** Stored log record struct */
-typedef struct recv_struct	recv_t;
-/** Stored log record struct */
-struct recv_struct{
+struct recv_t{
 	byte		type;	/*!< log record type */
 	ulint		len;	/*!< log record body length in bytes */
 	recv_data_t*	data;	/*!< chain of blocks containing the log record
@@ -347,7 +343,7 @@ struct recv_struct{
 			rec_list;/*!< list of log records for this page */
 };
 
-/** States of recv_addr_struct */
+/** States of recv_addr_t */
 enum recv_addr_state {
 	/** not yet processed */
 	RECV_NOT_PROCESSED,
@@ -361,9 +357,7 @@ enum recv_addr_state {
 };
 
 /** Hashed page file address struct */
-typedef struct recv_addr_struct	recv_addr_t;
-/** Hashed page file address struct */
-struct recv_addr_struct{
+struct recv_addr_t{
 	enum recv_addr_state state;
 				/*!< recovery state of the page */
 	unsigned	space:32;/*!< space id */
@@ -374,13 +368,14 @@ struct recv_addr_struct{
 };
 
 /** Recovery system data structure */
-typedef struct recv_sys_struct	recv_sys_t;
-/** Recovery system data structure */
-struct recv_sys_struct{
+struct recv_sys_t{
 #ifndef UNIV_HOTBACKUP
-	mutex_t		mutex;	/*!< mutex protecting the fields apply_log_recs,
+	ib_mutex_t		mutex;	/*!< mutex protecting the fields apply_log_recs,
 				n_addrs, and the state field in each recv_addr
 				struct */
+	ib_mutex_t		writer_mutex;/*!< mutex coordinating
+				flushing between recv_writer_thread and
+				the recovery thread. */
 #endif /* !UNIV_HOTBACKUP */
 	ibool		apply_log_recs;
 				/*!< this is TRUE when log rec application to
diff --git a/storage/innobase/include/mach0data.h b/storage/innobase/include/mach0data.h
index 3066070ef39..d0087f56aaa 100644
--- a/storage/innobase/include/mach0data.h
+++ b/storage/innobase/include/mach0data.h
@@ -374,6 +374,40 @@ mach_read_int_type(
 	const byte*	src,		/*!< in: where to read from */
 	ulint		len,		/*!< in: length of src */
 	ibool		unsigned_type);	/*!< in: signed or unsigned flag */
+/***********************************************************//**
+Convert integral type from host byte order to (big-endian) storage
+byte order. */
+UNIV_INLINE
+void
+mach_write_int_type(
+/*================*/
+	byte*		dest,		/*!< in: where to write*/
+	const byte*	src,		/*!< in: where to read from */
+	ulint		len,		/*!< in: length of src */
+	bool		usign);		/*!< in: signed or unsigned flag */
+
+/*************************************************************
+Convert a ulonglong integer from host byte order to (big-endian)
+storage byte order. */
+UNIV_INLINE
+void
+mach_write_ulonglong(
+/*=================*/
+	byte*		dest,		/*!< in: where to write */
+	ulonglong	src,		/*!< in: where to read from */
+	ulint		len,		/*!< in: length of dest */
+	bool		usign);		/*!< in: signed or unsigned flag */
+
+/********************************************************//**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return	value read */
+UNIV_INLINE
+ulint
+mach_read_ulint(
+/*============*/
+	const byte*	ptr,	/*!< in: pointer from where to read */
+	ulint		type);	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+
 #endif /* !UNIV_HOTBACKUP */
 #endif /* !UNIV_INNOCHECKSUM */
 
diff --git a/storage/innobase/include/mach0data.ic b/storage/innobase/include/mach0data.ic
index ec1a28bca47..fffef87f09d 100644
--- a/storage/innobase/include/mach0data.ic
+++ b/storage/innobase/include/mach0data.ic
@@ -776,5 +776,104 @@ mach_read_int_type(
 
 	return(ret);
 }
+/*********************************************************//**
+Swap byte ordering. */
+UNIV_INLINE
+void
+mach_swap_byte_order(
+/*=================*/
+        byte*           dest,           /*!< out: where to write */
+        const byte*     from,           /*!< in: where to read from */
+        ulint           len)            /*!< in: length of src */
+{
+        ut_ad(len > 0);
+        ut_ad(len <= 8);
+
+        dest += len;
+
+        switch (len & 0x7) {
+        case 0: *--dest = *from++;
+        case 7: *--dest = *from++;
+        case 6: *--dest = *from++;
+        case 5: *--dest = *from++;
+        case 4: *--dest = *from++;
+        case 3: *--dest = *from++;
+        case 2: *--dest = *from++;
+        case 1: *--dest = *from;
+        }
+}
+
+/*************************************************************
+Convert integral type from host byte order (big-endian) storage
+byte order. */
+UNIV_INLINE
+void
+mach_write_int_type(
+/*================*/
+	byte*		dest,		/*!< in: where to write */
+	const byte*	src,		/*!< in: where to read from */
+	ulint		len,		/*!< in: length of src */
+	bool		usign)		/*!< in: signed or unsigned flag */
+{
+#ifdef WORDS_BIGENDIAN
+        memcpy(dest, src, len);
+#else
+        mach_swap_byte_order(dest, src, len);
+#endif /* WORDS_BIGENDIAN */
+
+	if (!usign) {
+		*dest ^=  0x80;
+	}
+}
+
+/*************************************************************
+Convert a ulonglong integer from host byte order to (big-endian)
+storage byte order. */
+UNIV_INLINE
+void
+mach_write_ulonglong(
+/*=================*/
+	byte*		dest,		/*!< in: where to write */
+	ulonglong	src,		/*!< in: where to read from */
+	ulint		len,		/*!< in: length of dest */
+	bool		usign)		/*!< in: signed or unsigned flag */
+{
+	byte*		ptr = reinterpret_cast<byte*>(&src);
+
+	ut_ad(len <= sizeof(ulonglong));
+
+#ifdef WORDS_BIGENDIAN
+	memcpy(dest, ptr + (sizeof(src) - len), len);
+#else
+	mach_swap_byte_order(dest, reinterpret_cast<byte*>(ptr), len);
+#endif /* WORDS_BIGENDIAN */
+
+	if (!usign) {
+		*dest ^=  0x80;
+	}
+}
+
+/********************************************************//**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return	value read */
+UNIV_INLINE
+ulint
+mach_read_ulint(
+/*============*/
+	const byte*	ptr,	/*!< in: pointer from where to read */
+	ulint		type)	/*!< in: 1,2 or 4 bytes */
+{
+	switch (type) {
+	case 1:
+		return(mach_read_from_1(ptr));
+	case 2:
+		return(mach_read_from_2(ptr));
+	case 4:
+		return(mach_read_from_4(ptr));
+	default:
+		ut_error;
+	}
+}
+
 #endif /* !UNIV_HOTBACKUP */
 #endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/mem0dbg.h b/storage/innobase/include/mem0dbg.h
index 9f95e84c81e..cc339b82910 100644
--- a/storage/innobase/include/mem0dbg.h
+++ b/storage/innobase/include/mem0dbg.h
@@ -32,7 +32,7 @@ check fields whose sizes are given below */
 /* The mutex which protects in the debug version the hash table
 containing the list of live memory heaps, and also the global
 variables in mem0dbg.cc. */
-extern mutex_t	mem_hash_mutex;
+extern ib_mutex_t	mem_hash_mutex;
 # endif /* !UNIV_HOTBACKUP */
 
 #define MEM_FIELD_HEADER_SIZE	ut_calc_align(2 * sizeof(ulint),\
diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h
index 6851a5bc01b..c36ef06b554 100644
--- a/storage/innobase/include/mem0mem.h
+++ b/storage/innobase/include/mem0mem.h
@@ -38,15 +38,12 @@ Created 6/9/1994 Heikki Tuuri
 
 /* -------------------- MEMORY HEAPS ----------------------------- */
 
-/* The info structure stored at the beginning of a heap block */
-typedef struct mem_block_info_struct mem_block_info_t;
-
 /* A block of a memory heap consists of the info structure
 followed by an area of memory */
-typedef mem_block_info_t	mem_block_t;
+typedef struct mem_block_info_t	mem_block_t;
 
 /* A memory heap is a nonempty linear list of memory blocks */
-typedef mem_block_t	mem_heap_t;
+typedef mem_block_t		mem_heap_t;
 
 /* Types of allocation for memory heaps: DYNAMIC means allocation from the
 dynamic memory pool of the C compiler, BUFFER means allocation from the
@@ -343,9 +340,8 @@ mem_validate_all_blocks(void);
 
 /*#######################################################################*/
 
-/* The info header of a block in a memory heap */
-
-struct mem_block_info_struct {
+/** The info structure stored at the beginning of a heap block */
+struct mem_block_info_t {
 	ulint	magic_n;/* magic number for debugging */
 	char	file_name[8];/* file name where the mem heap was created */
 	ulint	line;	/*!< line number where the mem heap was created */
diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic
index eee3806dd52..7f0e128cc40 100644
--- a/storage/innobase/include/mem0mem.ic
+++ b/storage/innobase/include/mem0mem.ic
@@ -247,16 +247,13 @@ mem_heap_free_heap_top(
 {
 	mem_block_t*	block;
 	mem_block_t*	prev_block;
-#ifdef UNIV_MEM_DEBUG
+#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
 	ibool		error;
 	ulint		total_size;
 	ulint		size;
-#endif
 
 	ut_ad(mem_heap_check(heap));
 
-#ifdef UNIV_MEM_DEBUG
-
 	/* Validate the heap and get its total allocated size */
 	mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size,
 				   NULL, NULL);
@@ -294,9 +291,9 @@ mem_heap_free_heap_top(
 	/* Set the free field of block */
 	mem_block_set_free(block, old_top - (byte*) block);
 
-#ifdef UNIV_MEM_DEBUG
 	ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
-
+	UNIV_MEM_ASSERT_W(old_top, (byte*) block + block->len - old_top);
+#if defined UNIV_MEM_DEBUG
 	/* In the debug version erase block from top up */
 	mem_erase_buf(old_top, (byte*) block + block->len - old_top);
 
@@ -304,8 +301,6 @@ mem_heap_free_heap_top(
 	mutex_enter(&mem_hash_mutex);
 	mem_current_allocated_memory -= (total_size - size);
 	mutex_exit(&mem_hash_mutex);
-#else /* UNIV_MEM_DEBUG */
-	UNIV_MEM_ASSERT_W(old_top, (byte*) block + block->len - old_top);
 #endif /* UNIV_MEM_DEBUG */
 	UNIV_MEM_ALLOC(old_top, (byte*) block + block->len - old_top);
 
diff --git a/storage/innobase/include/mem0pool.h b/storage/innobase/include/mem0pool.h
index 451055e857f..a65ba50fdf9 100644
--- a/storage/innobase/include/mem0pool.h
+++ b/storage/innobase/include/mem0pool.h
@@ -30,17 +30,14 @@ Created 6/9/1994 Heikki Tuuri
 #include "os0file.h"
 #include "ut0lst.h"
 
-/** Memory area header */
-typedef struct mem_area_struct	mem_area_t;
 /** Memory pool */
-typedef struct mem_pool_struct	mem_pool_t;
+struct mem_pool_t;
 
 /** The common memory pool */
 extern mem_pool_t*	mem_comm_pool;
 
 /** Memory area header */
-
-struct mem_area_struct{
+struct mem_area_t{
 	ulint		size_and_free;	/*!< memory area size is obtained by
 					anding with ~MEM_AREA_FREE; area in
 					a free list if ANDing with
@@ -50,7 +47,7 @@ struct mem_area_struct{
 };
 
 /** Each memory area takes this many extra bytes for control information */
-#define MEM_AREA_EXTRA_SIZE	(ut_calc_align(sizeof(struct mem_area_struct),\
+#define MEM_AREA_EXTRA_SIZE	(ut_calc_align(sizeof(struct mem_area_t),\
 			UNIV_MEM_ALIGNMENT))
 
 /********************************************************************//**
diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h
index 1427a981bef..18a345d050f 100644
--- a/storage/innobase/include/mtr0log.h
+++ b/storage/innobase/include/mtr0log.h
@@ -32,8 +32,8 @@ Created 12/7/1995 Heikki Tuuri
 
 #ifndef UNIV_HOTBACKUP
 /********************************************************//**
-Writes 1 - 4 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
+Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
+record to the mini-transaction log if mtr is not NULL. */
 UNIV_INTERN
 void
 mlog_write_ulint(
@@ -43,8 +43,8 @@ mlog_write_ulint(
 	byte	type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
 	mtr_t*	mtr);	/*!< in: mini-transaction handle */
 /********************************************************//**
-Writes 8 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
+Writes 8 bytes to a file page. Writes the corresponding log
+record to the mini-transaction log, only if mtr is not NULL */
 UNIV_INTERN
 void
 mlog_write_ull(
@@ -217,12 +217,13 @@ UNIV_INTERN
 byte*
 mlog_open_and_write_index(
 /*======================*/
-	mtr_t*		mtr,	/*!< in: mtr */
-	const byte*	rec,	/*!< in: index record or page */
-	dict_index_t*	index,	/*!< in: record descriptor */
-	byte		type,	/*!< in: log item type */
-	ulint		size);	/*!< in: requested buffer size in bytes
-				(if 0, calls mlog_close() and returns NULL) */
+	mtr_t*			mtr,	/*!< in: mtr */
+	const byte*		rec,	/*!< in: index record or page */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	byte			type,	/*!< in: log item type */
+	ulint			size);	/*!< in: requested buffer size in bytes
+					(if 0, calls mlog_close() and
+					returns NULL) */
 #endif /* !UNIV_HOTBACKUP */
 
 /********************************************************//**
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index fd84f1119cc..f8c1874412c 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -39,6 +40,7 @@ Created 11/26/1995 Heikki Tuuri
 #define MTR_LOG_ALL		21	/* default mode: log all operations
 					modifying disk-based data */
 #define	MTR_LOG_NONE		22	/* log no operations */
+#define	MTR_LOG_NO_REDO		23	/* Don't generate REDO */
 /*#define	MTR_LOG_SPACE	23 */	/* log only operations modifying
 					file space page allocation data
 					(operations in fsp0fsp.* ) */
@@ -180,7 +182,11 @@ For 1 - 8 bytes, the flag value must give the length also! @{ */
 #define MLOG_ZIP_WRITE_HEADER	((byte)50)	/*!< write to compressed page
 						header */
 #define MLOG_ZIP_PAGE_COMPRESS	((byte)51)	/*!< compress an index page */
-#define MLOG_BIGGEST_TYPE	((byte)51)	/*!< biggest value (used in
+#define MLOG_ZIP_PAGE_COMPRESS_NO_DATA	((byte)52)/*!< compress an index page
+						without logging it's image */
+#define MLOG_ZIP_PAGE_REORGANIZE ((byte)53)	/*!< reorganize a compressed
+						page */
+#define MLOG_BIGGEST_TYPE	((byte)53)	/*!< biggest value (used in
 						assertions) */
 /* @} */
 
@@ -358,15 +364,14 @@ mtr_memo_push(
 	void*	object,	/*!< in: object */
 	ulint	type);	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
 
-/* Type definition of a mini-transaction memo stack slot. */
-typedef	struct mtr_memo_slot_struct	mtr_memo_slot_t;
-struct mtr_memo_slot_struct{
+/** Mini-transaction memo stack slot. */
+struct mtr_memo_slot_t{
 	ulint	type;	/*!< type of the stored object (MTR_MEMO_S_LOCK, ...) */
 	void*	object;	/*!< pointer to the object */
 };
 
 /* Mini-transaction handle and buffer */
-struct mtr_struct{
+struct mtr_t{
 #ifdef UNIV_DEBUG
 	ulint		state;	/*!< MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
 #endif
diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic
index dcd9826b380..bb24734c9bb 100644
--- a/storage/innobase/include/mtr0mtr.ic
+++ b/storage/innobase/include/mtr0mtr.ic
@@ -269,7 +269,7 @@ mtr_s_lock_func(
 	ut_ad(mtr);
 	ut_ad(lock);
 
-	rw_lock_s_lock_func(lock, 0, file, line);
+	rw_lock_s_lock_inline(lock, 0, file, line);
 
 	mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK);
 }
@@ -288,7 +288,7 @@ mtr_x_lock_func(
 	ut_ad(mtr);
 	ut_ad(lock);
 
-	rw_lock_x_lock_func(lock, 0, file, line);
+	rw_lock_x_lock_inline(lock, 0, file, line);
 
 	mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK);
 }
diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h
index 7a2bcefadb9..43368c0b726 100644
--- a/storage/innobase/include/mtr0types.h
+++ b/storage/innobase/include/mtr0types.h
@@ -26,6 +26,6 @@ Created 11/26/1995 Heikki Tuuri
 #ifndef mtr0types_h
 #define mtr0types_h
 
-typedef struct mtr_struct	mtr_t;
+struct mtr_t;
 
 #endif
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 8f84193cb0f..4a744c1b268 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -1,6 +1,6 @@
 /***********************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2009, Percona Inc.
 
 Portions of this file contain modifications contributed and copyrighted
@@ -44,7 +44,7 @@ Created 10/21/1995 Heikki Tuuri
 #endif
 
 /** File node of a tablespace or the log data space */
-typedef	struct fil_node_struct	fil_node_t;
+struct fil_node_t;
 
 extern ibool	os_has_said_disk_full;
 /** Flag: enable debug printout for asynchronous i/o */
@@ -102,7 +102,7 @@ log. */
 #define OS_FILE_LOG_BLOCK_SIZE		512
 
 /** Options for os_file_create_func @{ */
-typedef enum os_file_create_enum {
+enum os_file_create_t {
 	OS_FILE_OPEN = 51,		/*!< to open an existing file (if
 					doesn't exist, error) */
 	OS_FILE_CREATE,			/*!< to create new file (if
@@ -122,7 +122,7 @@ typedef enum os_file_create_enum {
 					the log unless it is a fatal error,
 					this flag is only used if
 					ON_ERROR_NO_EXIT is set */
-} os_file_create_t;
+};
 
 #define OS_FILE_READ_ONLY		333
 #define	OS_FILE_READ_WRITE		444
@@ -217,10 +217,10 @@ used to register actual file read, write and flush */
 # define register_pfs_file_open_begin(state, locker, key, op, name,	\
 				      src_file, src_line)		\
 do {									\
-	locker = PSI_CALL(get_thread_file_name_locker)(			\
+	locker = PSI_FILE_CALL(get_thread_file_name_locker)(		\
 		state, key, op, name, &locker);				\
 	if (UNIV_LIKELY(locker != NULL)) {				\
-		PSI_CALL(start_file_open_wait)(				\
+		PSI_FILE_CALL(start_file_open_wait)(			\
 			locker, src_file, src_line);			\
 	}								\
 } while (0)
@@ -228,7 +228,7 @@ do {									\
 # define register_pfs_file_open_end(locker, file)			\
 do {									\
 	if (UNIV_LIKELY(locker != NULL)) {				\
-		PSI_CALL(end_file_open_wait_and_bind_to_descriptor)(	\
+		PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(\
 			locker, file);					\
 	}								\
 } while (0)
@@ -236,10 +236,10 @@ do {									\
 # define register_pfs_file_io_begin(state, locker, file, count, op,	\
 				    src_file, src_line)			\
 do {									\
-	locker = PSI_CALL(get_thread_file_descriptor_locker)(		\
+	locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(	\
 		state, file, op);					\
 	if (UNIV_LIKELY(locker != NULL)) {				\
-		PSI_CALL(start_file_wait)(				\
+		PSI_FILE_CALL(start_file_wait)(				\
 			locker, count, src_file, src_line);		\
 	}								\
 } while (0)
@@ -247,7 +247,7 @@ do {									\
 # define register_pfs_file_io_end(locker, count)			\
 do {									\
 	if (UNIV_LIKELY(locker != NULL)) {				\
-		PSI_CALL(end_file_wait)(locker, count);			\
+		PSI_FILE_CALL(end_file_wait)(locker, count);		\
 	}								\
 } while (0)
 #endif /* UNIV_PFS_IO  */
@@ -345,13 +345,12 @@ to original un-instrumented file I/O APIs */
 
 /* File types for directory entry data type */
 
-enum os_file_type_enum{
+enum os_file_type_t {
 	OS_FILE_TYPE_UNKNOWN = 0,
 	OS_FILE_TYPE_FILE,			/* regular file */
 	OS_FILE_TYPE_DIR,			/* directory */
 	OS_FILE_TYPE_LINK			/* symbolic link */
 };
-typedef enum os_file_type_enum	  os_file_type_t;
 
 /* Maximum path string length in bytes when referring to tables with in the
 './databasename/tablename.ibd' path format; we can allocate at least 2 buffers
@@ -359,16 +358,18 @@ of this size from the thread stack; that is why this should not be made much
 bigger than 4000 bytes */
 #define OS_FILE_MAX_PATH	4000
 
-/* Struct used in fetching information of a file in a directory */
-struct os_file_stat_struct{
+/** Struct used in fetching information of a file in a directory */
+struct os_file_stat_t {
 	char		name[OS_FILE_MAX_PATH];	/*!< path to a file */
 	os_file_type_t	type;			/*!< file type */
 	ib_int64_t	size;			/*!< file size */
 	time_t		ctime;			/*!< creation time */
 	time_t		mtime;			/*!< modification time */
 	time_t		atime;			/*!< access time */
+	bool		rw_perm;		/*!< true if can be opened
+						in read-write mode. Only valid
+						if type == OS_FILE_TYPE_FILE */
 };
-typedef struct os_file_stat_struct	os_file_stat_t;
 
 #ifdef __WIN__
 typedef HANDLE	os_file_dir_t;	/*!< directory stream */
@@ -525,7 +526,7 @@ os_file_create_func(
 Deletes a file. The file has to be closed before calling this.
 @return	TRUE if success */
 UNIV_INTERN
-ibool
+bool
 os_file_delete(
 /*===========*/
 	const char*	name);	/*!< in: file path as a null-terminated
@@ -535,7 +536,7 @@ os_file_delete(
 Deletes a file if it exists. The file has to be closed before calling this.
 @return	TRUE if success */
 UNIV_INTERN
-ibool
+bool
 os_file_delete_if_exists(
 /*=====================*/
 	const char*	name);	/*!< in: file path as a null-terminated
@@ -826,7 +827,7 @@ UNIV_INTERN
 ulint
 os_file_get_last_error(
 /*===================*/
-	ibool	report_all_errors);	/*!< in: TRUE if we want an error message
+	bool	report_all_errors);	/*!< in: TRUE if we want an error message
 					printed of all errors */
 /*******************************************************************//**
 NOTE! Use the corresponding macro os_file_read(), not directly this function!
@@ -925,6 +926,60 @@ os_file_dirname(
 /*============*/
 	const char*	path);	/*!< in: pathname */
 /****************************************************************//**
+This function returns a new path name after replacing the basename
+in an old path with a new basename.  The old_path is a full path
+name including the extension.  The tablename is in the normal
+form "databasename/tablename".  The new base name is found after
+the forward slash.  Both input strings are null terminated.
+
+This function allocates memory to be returned.  It is the callers
+responsibility to free the return value after it is no longer needed.
+
+@return	own: new full pathname */
+UNIV_INTERN
+char*
+os_file_make_new_pathname(
+/*======================*/
+	const char*	old_path,	/*!< in: pathname */
+	const char*	new_name);	/*!< in: new file name */
+/****************************************************************//**
+This function returns a remote path name by combining a data directory
+path provided in a DATA DIRECTORY clause with the tablename which is
+in the form 'database/tablename'.  It strips the file basename (which
+is the tablename) found after the last directory in the path provided.
+The full filepath created will include the database name as a directory
+under the path provided.  The filename is the tablename with the '.ibd'
+extension. All input and output strings are null-terminated.
+
+This function allocates memory to be returned.  It is the callers
+responsibility to free the return value after it is no longer needed.
+
+@return	own: A full pathname; data_dir_path/databasename/tablename.ibd */
+UNIV_INTERN
+char*
+os_file_make_remote_pathname(
+/*=========================*/
+	const char*	data_dir_path,	/*!< in: pathname */
+	const char*	tablename,	/*!< in: tablename */
+	const char*	extention);	/*!< in: file extention; ibd,cfg*/
+/****************************************************************//**
+This function reduces a null-terminated full remote path name into
+the path that is sent by MySQL for DATA DIRECTORY clause.  It replaces
+the 'databasename/tablename.ibd' found at the end of the path with just
+'tablename'.
+
+Since the result is always smaller than the path sent in, no new memory
+is allocated. The caller should allocate memory for the path sent in.
+This function manipulates that path in place.
+
+If the path format is not as expected, just return.  The result is used
+to inform a SHOW CREATE TABLE command. */
+UNIV_INTERN
+void
+os_file_make_data_dir_path(
+/*========================*/
+	char*	data_dir_path);	/*!< in/out: full path/data_dir_path */
+/****************************************************************//**
 Creates all missing subdirectories along the given path.
 @return	TRUE if call succeeded FALSE otherwise */
 UNIV_INTERN
@@ -1108,14 +1163,16 @@ os_aio_all_slots_free(void);
 
 /*******************************************************************//**
 This function returns information about the specified file
-@return	TRUE if stat information found */
+@return	DB_SUCCESS if all OK */
 UNIV_INTERN
-ibool
+dberr_t
 os_file_get_status(
 /*===============*/
-	const char*	path,		/*!< in:	pathname of the file */
-	os_file_stat_t* stat_info);	/*!< information of a file in a
+	const char*	path,		/*!< in: pathname of the file */
+	os_file_stat_t* stat_info,	/*!< information of a file in a
 					directory */
+	bool		check_rw_perm);	/*!< in: for testing whether the
+					file can be opened in RW mode */
 
 #if !defined(UNIV_HOTBACKUP)
 /*********************************************************************//**
diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h
index d68823b72ca..d3ce68253ec 100644
--- a/storage/innobase/include/os0sync.h
+++ b/storage/innobase/include/os0sync.h
@@ -54,22 +54,19 @@ typedef pthread_cond_t		os_cond_t;
 
 /** Structure that includes Performance Schema Probe pfs_psi
 in the os_fast_mutex structure if UNIV_PFS_MUTEX is defined */
-typedef struct os_fast_mutex_struct {
+struct os_fast_mutex_t {
 	fast_mutex_t		mutex;	/*!< os_fast_mutex */
 #ifdef UNIV_PFS_MUTEX
 	struct PSI_mutex*	pfs_psi;/*!< The performance schema
 					instrumentation hook */
 #endif
-} os_fast_mutex_t;
-
+};
 
-/** Operating system event */
-typedef struct os_event_struct	os_event_struct_t;
 /** Operating system event handle */
-typedef os_event_struct_t*	os_event_t;
+typedef struct os_event*	os_event_t;
 
 /** An asynchronous signal sent between threads */
-struct os_event_struct {
+struct os_event {
 #ifdef __WIN__
 	HANDLE		handle;		/*!< kernel event object, slow,
 					used on older Windows */
@@ -84,7 +81,7 @@ struct os_event_struct {
 					the event becomes signaled */
 	os_cond_t	cond_var;	/*!< condition variable is used in
 					waiting for the event */
-	UT_LIST_NODE_T(os_event_struct_t) os_event_list;
+	UT_LIST_NODE_T(os_event_t) os_event_list;
 					/*!< list of all created events */
 };
 
@@ -94,16 +91,11 @@ struct os_event_struct {
 /** Return value of os_event_wait_time() when the time is exceeded */
 #define OS_SYNC_TIME_EXCEEDED   1
 
-/** Operating system mutex */
-typedef struct os_mutex_struct	os_mutex_str_t;
 /** Operating system mutex handle */
-typedef os_mutex_str_t*		os_mutex_t;
-
-/** Return value of os_event_wait_time() when the time is exceeded */
-#define OS_SYNC_TIME_EXCEEDED	1
+typedef struct os_mutex_t*	os_ib_mutex_t;
 
 /** Mutex protecting counts and the event and OS 'slow' mutex lists */
-extern os_mutex_t	os_sync_mutex;
+extern os_ib_mutex_t	os_sync_mutex;
 
 /** This is incremented by 1 in os_thread_create and decremented by 1 in
 os_thread_exit */
@@ -132,10 +124,8 @@ explicitly by calling sync_os_reset_event.
 @return	the event handle */
 UNIV_INTERN
 os_event_t
-os_event_create(
-/*============*/
-	const char*	name);	/*!< in: the name of the event, if NULL
-				the event is created without a name */
+os_event_create(void);
+/*==================*/
 /**********************************************************//**
 Sets an event semaphore to the signaled state: lets waiting threads
 proceed. */
@@ -191,7 +181,7 @@ os_event_wait_low(
 					os_event_reset(). */
 
 #define os_event_wait(event) os_event_wait_low(event, 0)
-#define os_event_wait_time(e, t) os_event_wait_time_low(event, t, 0)
+#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0)
 
 /**********************************************************//**
 Waits for an event object until it is in the signaled state or
@@ -210,10 +200,10 @@ os_event_wait_time_low(
 						os_event_reset(). */
 /*********************************************************//**
 Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
+mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible.
 @return	the mutex handle */
 UNIV_INTERN
-os_mutex_t
+os_ib_mutex_t
 os_mutex_create(void);
 /*=================*/
 /**********************************************************//**
@@ -222,21 +212,21 @@ UNIV_INTERN
 void
 os_mutex_enter(
 /*===========*/
-	os_mutex_t	mutex);	/*!< in: mutex to acquire */
+	os_ib_mutex_t	mutex);	/*!< in: mutex to acquire */
 /**********************************************************//**
 Releases ownership of a mutex. */
 UNIV_INTERN
 void
 os_mutex_exit(
 /*==========*/
-	os_mutex_t	mutex);	/*!< in: mutex to release */
+	os_ib_mutex_t	mutex);	/*!< in: mutex to release */
 /**********************************************************//**
 Frees an mutex object. */
 UNIV_INTERN
 void
 os_mutex_free(
 /*==========*/
-	os_mutex_t	mutex);	/*!< in: mutex to free */
+	os_ib_mutex_t	mutex);	/*!< in: mutex to free */
 /**********************************************************//**
 Acquires ownership of a fast mutex. Currently in Windows this is the same
 as os_fast_mutex_lock!
@@ -365,7 +355,11 @@ Atomic compare-and-swap and increment for InnoDB. */
 
 #if defined(HAVE_IB_GCC_ATOMIC_BUILTINS)
 
-#define HAVE_ATOMIC_BUILTINS
+# define HAVE_ATOMIC_BUILTINS
+
+# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_64
+#  define HAVE_ATOMIC_BUILTINS_64
+# endif
 
 /**********************************************************//**
 Returns true if swapped, ptr is pointer to target, old_val is value to
@@ -419,6 +413,9 @@ amount to decrement. */
 # define os_atomic_decrement_ulint(ptr, amount) \
 	os_atomic_decrement(ptr, amount)
 
+# define os_atomic_decrement_uint64(ptr, amount) \
+	os_atomic_decrement(ptr, amount)
+
 /**********************************************************//**
 Returns the old value of *ptr, atomically sets *ptr to new_val */
 
@@ -430,12 +427,13 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
 
 #elif defined(HAVE_IB_SOLARIS_ATOMICS)
 
-#define HAVE_ATOMIC_BUILTINS
+# define HAVE_ATOMIC_BUILTINS
+# define HAVE_ATOMIC_BUILTINS_64
 
 /* If not compiling with GCC or GCC doesn't support the atomic
 intrinsics and running on Solaris >= 10 use Solaris atomics */
 
-#include <atomic.h>
+# include <atomic.h>
 
 /**********************************************************//**
 Returns true if swapped, ptr is pointer to target, old_val is value to
@@ -487,6 +485,9 @@ amount to decrement. */
 # define os_atomic_decrement_ulint(ptr, amount) \
 	os_atomic_increment_ulint(ptr, -(amount))
 
+# define os_atomic_decrement_uint64(ptr, amount) \
+	os_atomic_increment_uint64(ptr, -(amount))
+
 /**********************************************************//**
 Returns the old value of *ptr, atomically sets *ptr to new_val */
 
@@ -498,7 +499,11 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
 
 #elif defined(HAVE_WINDOWS_ATOMICS)
 
-#define HAVE_ATOMIC_BUILTINS
+# define HAVE_ATOMIC_BUILTINS
+
+# ifndef _WIN32
+#  define HAVE_ATOMIC_BUILTINS_64
+# endif
 
 /**********************************************************//**
 Atomic compare and exchange of signed integers (both 32 and 64 bit).
@@ -574,8 +579,10 @@ amount of increment. */
 # define os_atomic_increment_ulint(ptr, amount) \
 	((ulint) (win_xchg_and_add((lint*) ptr, (lint) amount) + amount))
 
-# define os_atomic_increment_uint64(ptr, amount) \
-	((ulint) (win_xchg_and_add(ptr, (lint) amount) + amount))
+# define os_atomic_increment_uint64(ptr, amount)		\
+	((ib_uint64_t) (InterlockedExchangeAdd64(		\
+				(ib_int64_t*) ptr,		\
+				(ib_int64_t) amount) + amount))
 
 /**********************************************************//**
 Returns the resulting value, ptr is pointer to target, amount is the
@@ -587,6 +594,11 @@ amount to decrement. There is no atomic substract function on Windows */
 # define os_atomic_decrement_ulint(ptr, amount) \
 	((ulint) (win_xchg_and_add((lint*) ptr, -(lint) amount) - amount))
 
+# define os_atomic_decrement_uint64(ptr, amount)		\
+	((ib_uint64_t) (InterlockedExchangeAdd64(		\
+				(ib_int64_t*) ptr,		\
+				-(ib_int64_t) amount) - amount))
+
 /**********************************************************//**
 Returns the old value of *ptr, atomically sets *ptr to new_val.
 InterlockedExchange() operates on LONG, and the LONG will be
diff --git a/storage/innobase/include/os0sync.ic b/storage/innobase/include/os0sync.ic
index 0d907b31366..33c238ceb47 100644
--- a/storage/innobase/include/os0sync.ic
+++ b/storage/innobase/include/os0sync.ic
@@ -66,7 +66,7 @@ pfs_os_fast_mutex_init(
 	os_fast_mutex_t*	fast_mutex)	/*!< out: fast mutex */
 {
 #ifdef HAVE_PSI_MUTEX_INTERFACE
-	fast_mutex->pfs_psi = PSI_CALL(init_mutex)(key, &fast_mutex->mutex);
+	fast_mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, &fast_mutex->mutex);
 #else
 	fast_mutex->pfs_psi = NULL;
 #endif
@@ -86,7 +86,7 @@ pfs_os_fast_mutex_free(
 {
 #ifdef HAVE_PSI_MUTEX_INTERFACE
 	if (fast_mutex->pfs_psi != NULL)
-		PSI_CALL(destroy_mutex)(fast_mutex->pfs_psi);
+		PSI_MUTEX_CALL(destroy_mutex)(fast_mutex->pfs_psi);
 #endif
 	fast_mutex->pfs_psi = NULL;
 
@@ -112,13 +112,13 @@ pfs_os_fast_mutex_lock(
 		PSI_mutex_locker* 	locker;
 		PSI_mutex_locker_state	state;
 
-		locker = PSI_CALL(start_mutex_wait)(&state, fast_mutex->pfs_psi,
+		locker = PSI_MUTEX_CALL(start_mutex_wait)(&state, fast_mutex->pfs_psi,
 			PSI_MUTEX_LOCK, file_name, line);
 
 		os_fast_mutex_lock_func(&fast_mutex->mutex);
 
 		if (locker != NULL)
-			PSI_CALL(end_mutex_wait)(locker, 0);
+			PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
 	}
 	else
 #endif
@@ -141,7 +141,7 @@ pfs_os_fast_mutex_unlock(
 {
 #ifdef HAVE_PSI_MUTEX_INTERFACE
 	if (fast_mutex->pfs_psi != NULL)
-		PSI_CALL(unlock_mutex)(fast_mutex->pfs_psi);
+		PSI_MUTEX_CALL(unlock_mutex)(fast_mutex->pfs_psi);
 #endif
 
 	os_fast_mutex_unlock_func(&fast_mutex->mutex);
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
index 52f5c5de58a..038a05edbd0 100644
--- a/storage/innobase/include/page0cur.h
+++ b/storage/innobase/include/page0cur.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -170,8 +170,11 @@ page_cur_tuple_insert(
 	page_cur_t*	cursor,	/*!< in/out: a page cursor */
 	const dtuple_t*	tuple,	/*!< in: pointer to a data tuple */
 	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint**		offsets,/*!< out: offsets on *rec */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	mtr_t*		mtr);	/*!< in: mini-transaction handle, or NULL */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
+	__attribute__((nonnull(1,2,3,4,5), warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
 Inserts a record next to page cursor. Returns pointer to inserted record if
@@ -238,10 +241,11 @@ UNIV_INTERN
 void
 page_cur_delete_rec(
 /*================*/
-	page_cur_t*	cursor,	/*!< in/out: a page cursor */
-	dict_index_t*	index,	/*!< in: record descriptor */
-	const ulint*	offsets,/*!< in: rec_get_offsets(cursor->rec, index) */
-	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+	page_cur_t*		cursor,	/*!< in/out: a page cursor */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const ulint*		offsets,/*!< in: rec_get_offsets(
+					cursor->rec, index) */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 #ifndef UNIV_HOTBACKUP
 /****************************************************************//**
 Searches the right position for a page cursor.
@@ -331,10 +335,24 @@ page_cur_parse_delete_rec(
 	buf_block_t*	block,	/*!< in: page or NULL */
 	dict_index_t*	index,	/*!< in: record descriptor */
 	mtr_t*		mtr);	/*!< in: mtr or NULL */
+/*******************************************************//**
+Removes the record from a leaf page. This function does not log
+any changes. It is used by the IMPORT tablespace functions.
+@return	true if success, i.e., the page did not become too empty */
+UNIV_INTERN
+bool
+page_delete_rec(
+/*============*/
+	const dict_index_t*	index,	/*!< in: The index that the record
+					belongs to */
+	page_cur_t*		pcur,	/*!< in/out: page cursor on record
+					to delete */
+	page_zip_des_t*		page_zip,/*!< in: compressed page descriptor */
+	const ulint*		offsets);/*!< in: offsets for record */
 
 /** Index page cursor */
 
-struct page_cur_struct{
+struct page_cur_t{
 	byte*		rec;	/*!< pointer to a record on page */
 	buf_block_t*	block;	/*!< pointer to the block containing rec */
 };
diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic
index a065f9ff30d..90a5a690487 100644
--- a/storage/innobase/include/page0cur.ic
+++ b/storage/innobase/include/page0cur.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -245,33 +245,36 @@ page_cur_tuple_insert(
 	page_cur_t*	cursor,	/*!< in/out: a page cursor */
 	const dtuple_t*	tuple,	/*!< in: pointer to a data tuple */
 	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint**		offsets,/*!< out: offsets on *rec */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
 {
-	mem_heap_t*	heap;
-	ulint*		offsets;
 	ulint		size
 		= rec_get_converted_size(index, tuple, n_ext);
 	rec_t*		rec;
 
-	heap = mem_heap_create(size
-			       + (4 + REC_OFFS_HEADER_SIZE
-				  + dtuple_get_n_fields(tuple))
-			       * sizeof *offsets);
-	rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(heap, size),
+	if (!*heap) {
+		*heap = mem_heap_create(size
+					+ (4 + REC_OFFS_HEADER_SIZE
+					   + dtuple_get_n_fields(tuple))
+					* sizeof **offsets);
+	}
+
+	rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(*heap, size),
 					index, tuple, n_ext);
-	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+	*offsets = rec_get_offsets(
+		rec, index, *offsets, ULINT_UNDEFINED, heap);
 
 	if (buf_block_get_page_zip(cursor->block)) {
 		rec = page_cur_insert_rec_zip(&cursor->rec, cursor->block,
-					      index, rec, offsets, mtr);
+					      index, rec, *offsets, mtr);
 	} else {
 		rec = page_cur_insert_rec_low(cursor->rec,
-					      index, rec, offsets, mtr);
+					      index, rec, *offsets, mtr);
 	}
 
-	ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, offsets));
-	mem_heap_free(heap);
+	ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, *offsets));
 	return(rec);
 }
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index e4571b69376..773ec4c2177 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -551,6 +551,16 @@ page_rec_get_next_const(
 /*====================*/
 	const rec_t*	rec);	/*!< in: pointer to record */
 /************************************************************//**
+Gets the pointer to the next non delete-marked record on the page.
+If all subsequent records are delete-marked, then this function
+will return the supremum record.
+@return	pointer to next non delete-marked record or pointer to supremum */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_non_del_marked(
+/*=============================*/
+	const rec_t*	rec);	/*!< in: pointer to record */
+/************************************************************//**
 Sets the pointer to the next record on the page. */
 UNIV_INLINE
 void
@@ -737,11 +747,14 @@ UNIV_INLINE
 void
 page_mem_free(
 /*==========*/
-	page_t*		page,	/*!< in/out: index page */
-	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
-	rec_t*		rec,	/*!< in: pointer to the (origin of) record */
-	dict_index_t*	index,	/*!< in: index of rec */
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	page_t*			page,	/*!< in/out: index page */
+	page_zip_des_t*		page_zip,/*!< in/out: compressed page,
+					 or NULL */
+	rec_t*			rec,	/*!< in: pointer to the (origin of)
+					record */
+	const dict_index_t*	index,	/*!< in: index of rec */
+	const ulint*		offsets);/*!< in: array returned by
+					 rec_get_offsets() */
 /**********************************************************//**
 Create an uncompressed B-tree index page.
 @return	pointer to the page */
@@ -1031,7 +1044,6 @@ page_find_rec_with_heap_no(
 /*=======================*/
 	const page_t*	page,	/*!< in: index page */
 	ulint		heap_no);/*!< in: heap number */
-
 #ifdef UNIV_MATERIALIZE
 #undef UNIV_INLINE
 #define UNIV_INLINE  UNIV_INLINE_ORIGINAL
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
index e73e547e92b..c2e20d81a29 100644
--- a/storage/innobase/include/page0page.ic
+++ b/storage/innobase/include/page0page.ic
@@ -776,6 +776,30 @@ page_rec_get_next_const(
 }
 
 /************************************************************//**
+Gets the pointer to the next non delete-marked record on the page.
+If all subsequent records are delete-marked, then this function
+will return the supremum record.
+@return	pointer to next non delete-marked record or pointer to supremum */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_non_del_marked(
+/*=============================*/
+	const rec_t*	rec)	/*!< in: pointer to record */
+{
+	const rec_t*	r;
+	ulint		page_is_compact = page_rec_is_comp(rec);
+
+	for (r = page_rec_get_next_const(rec);
+	     !page_rec_is_supremum(r)
+	     && rec_get_deleted_flag(r, page_is_compact);
+	     r = page_rec_get_next_const(r)) {
+		/* noop */
+	}
+
+	return(r);
+}
+
+/************************************************************//**
 Sets the pointer to the next record on the page. */
 UNIV_INLINE
 void
@@ -1085,11 +1109,14 @@ UNIV_INLINE
 void
 page_mem_free(
 /*==========*/
-	page_t*		page,	/*!< in/out: index page */
-	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
-	rec_t*		rec,	/*!< in: pointer to the (origin of) record */
-	dict_index_t*	index,	/*!< in: index of rec */
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	page_t*			page,		/*!< in/out: index page */
+	page_zip_des_t*		page_zip,	/*!< in/out: compressed page,
+						or NULL */
+	rec_t*			rec,		/*!< in: pointer to the
+						(origin of) record */
+	const dict_index_t*	index,		/*!< in: index of rec */
+	const ulint*		offsets)	/*!< in: array returned by
+						rec_get_offsets() */
 {
 	rec_t*		free;
 	ulint		garbage;
diff --git a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h
index da2ac1c7de2..533b0d3cf98 100644
--- a/storage/innobase/include/page0types.h
+++ b/storage/innobase/include/page0types.h
@@ -26,6 +26,10 @@ Created 2/2/1994 Heikki Tuuri
 #ifndef page0types_h
 #define page0types_h
 
+using namespace std;
+
+#include <map>
+
 #include "univ.i"
 #include "dict0types.h"
 #include "mtr0types.h"
@@ -35,12 +39,12 @@ Created 2/2/1994 Heikki Tuuri
 /** Type of the index page */
 typedef	byte		page_t;
 /** Index page cursor */
-typedef struct page_cur_struct	page_cur_t;
+struct page_cur_t;
 
 /** Compressed index page */
 typedef byte				page_zip_t;
 /** Compressed page descriptor */
-typedef struct page_zip_des_struct	page_zip_des_t;
+struct page_zip_des_t;
 
 /* The following definitions would better belong to page0zip.h,
 but we cannot include page0zip.h from rem0rec.ic, because
@@ -60,12 +64,14 @@ ssize, which is the number of shifts from 512. */
 #endif
 
 /** Compressed page descriptor */
-struct page_zip_des_struct
+struct page_zip_des_t
 {
 	page_zip_t*	data;		/*!< compressed page data */
 
 #ifdef UNIV_DEBUG
 	unsigned	m_start:16;	/*!< start offset of modification log */
+	bool		m_external;	/*!< Allocated externally, not from the
+					buffer pool */
 #endif /* UNIV_DEBUG */
 	unsigned	m_end:16;	/*!< end offset of modification log */
 	unsigned	m_nonempty:1;	/*!< TRUE if the modification log
@@ -80,7 +86,7 @@ struct page_zip_des_struct
 };
 
 /** Compression statistics for a given page size */
-struct page_zip_stat_struct {
+struct page_zip_stat_t {
 	/** Number of page compressions */
 	ulint		compressed;
 	/** Number of successful page compressions */
@@ -91,13 +97,29 @@ struct page_zip_stat_struct {
 	ib_uint64_t	compressed_usec;
 	/** Duration of page decompressions in microseconds */
 	ib_uint64_t	decompressed_usec;
+	page_zip_stat_t() :
+		/* Initialize members to 0 so that when we do
+		stlmap[key].compressed++ and element with "key" does not
+		exist it gets inserted with zeroed members. */
+		compressed(0),
+		compressed_ok(0),
+		decompressed(0),
+		compressed_usec(0),
+		decompressed_usec(0)
+	{ }
 };
 
-/** Compression statistics */
-typedef struct page_zip_stat_struct page_zip_stat_t;
-
-/** Statistics on compression, indexed by page_zip_des_struct::ssize - 1 */
-extern page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+/** Compression statistics types */
+typedef map<index_id_t, page_zip_stat_t>	page_zip_stat_per_index_t;
+
+/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
+extern page_zip_stat_t				page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+/** Statistics on compression, indexed by dict_index_t::id */
+extern page_zip_stat_per_index_t		page_zip_stat_per_index;
+extern ib_mutex_t				page_zip_stat_per_index_mutex;
+#ifdef HAVE_PSI_INTERFACE
+extern mysql_pfs_key_t				page_zip_stat_per_index_mutex_key;
+#endif /* HAVE_PSI_INTERFACE */
 
 /**********************************************************************//**
 Write the "deleted" flag of a record on a compressed page.  The flag must
diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
index 2c84f75b2ab..12781bd61b8 100644
--- a/storage/innobase/include/page0zip.h
+++ b/storage/innobase/include/page0zip.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 2005, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -39,6 +40,16 @@ Created June 2005 by Marko Makela
 #include "trx0types.h"
 #include "mem0mem.h"
 
+/* Compression level to be used by zlib. Settable by user. */
+extern ulint	page_compression_level;
+
+/* Default compression level. */
+#define DEFAULT_COMPRESSION_LEVEL	6
+
+/* Whether or not to log compressed page images to avoid possible
+compression algorithm changes in zlib. */
+extern bool	page_log_compressed_pages;
+
 /**********************************************************************//**
 Determine the size of a compressed page in bytes.
 @return	size in bytes */
@@ -114,6 +125,7 @@ page_zip_compress(
 				m_start, m_end, m_nonempty */
 	const page_t*	page,	/*!< in: uncompressed page */
 	dict_index_t*	index,	/*!< in: index of the B-tree node */
+	ulint		level,	/*!< in: commpression level */
 	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
 	__attribute__((nonnull(1,2,3)));
 
@@ -337,11 +349,12 @@ UNIV_INTERN
 void
 page_zip_dir_delete(
 /*================*/
-	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
-	byte*		rec,	/*!< in: deleted record */
-	dict_index_t*	index,	/*!< in: index of rec */
-	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
-	const byte*	free)	/*!< in: previous start of the free list */
+	page_zip_des_t*		page_zip,	/*!< in/out: compressed page */
+	byte*			rec,		/*!< in: deleted record */
+	const dict_index_t*	index,		/*!< in: index of rec */
+	const ulint*		offsets,	/*!< in: rec_get_offsets(rec) */
+	const byte*		free)		/*!< in: previous start of
+						the free list */
 	__attribute__((nonnull(1,2,3,4)));
 
 /**********************************************************************//**
@@ -461,14 +474,49 @@ page_zip_verify_checksum(
 /*=====================*/
 	const void*	data,	/*!< in: compressed page */
 	ulint		size);	/*!< in: size of compressed page */
+/**********************************************************************//**
+Write a log record of compressing an index page without the data on the page. */
+UNIV_INLINE
+void
+page_zip_compress_write_log_no_data(
+/*================================*/
+	ulint		level,	/*!< in: compression level */
+	const page_t*	page,	/*!< in: page that is compressed */
+	dict_index_t*	index,	/*!< in: index */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**********************************************************************//**
+Parses a log record of compressing an index page without the data.
+@return	end of log record or NULL */
+UNIV_INLINE
+byte*
+page_zip_parse_compress_no_data(
+/*============================*/
+	byte*		ptr,		/*!< in: buffer */
+	byte*		end_ptr,	/*!< in: buffer end */
+	page_t*		page,		/*!< in: uncompressed page */
+	page_zip_des_t*	page_zip,	/*!< out: compressed page */
+	dict_index_t* index)		/*!< in: index */
+	__attribute__((nonnull(1,2)));
+
+/**********************************************************************//**
+Reset the counters used for filling
+INFORMATION_SCHEMA.innodb_cmp_per_index. */
+UNIV_INLINE
+void
+page_zip_reset_stat_per_index();
+/*===========================*/
 
 #ifndef UNIV_HOTBACKUP
 /** Check if a pointer to an uncompressed page matches a compressed page.
+When we IMPORT a tablespace the blocks and accompanying frames are allocted
+from outside the buffer pool.
 @param ptr	pointer to an uncompressed page frame
 @param page_zip	compressed page descriptor
 @return		TRUE if ptr and page_zip refer to the same block */
-# define PAGE_ZIP_MATCH(ptr, page_zip)			\
-	(buf_frame_get_page_zip(ptr) == (page_zip))
+# define PAGE_ZIP_MATCH(ptr, page_zip)					\
+	(((page_zip)->m_external					\
+	  && (page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data))	\
+	  || buf_frame_get_page_zip(ptr) == (page_zip))
 #else /* !UNIV_HOTBACKUP */
 /** Check if a pointer to an uncompressed page matches a compressed page.
 @param ptr	pointer to an uncompressed page frame
diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic
index c9300aa4e9f..0062e1cb39f 100644
--- a/storage/innobase/include/page0zip.ic
+++ b/storage/innobase/include/page0zip.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 2005, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,6 +30,7 @@ Created June 2005 by Marko Makela
 #endif
 
 #include "page0zip.h"
+#include "mtr0log.h"
 #include "page0page.h"
 
 /* The format of compressed pages is as follows.
@@ -389,6 +391,75 @@ page_zip_write_header(
 	}
 }
 
+/**********************************************************************//**
+Write a log record of compressing an index page without the data on the page. */
+UNIV_INLINE
+void
+page_zip_compress_write_log_no_data(
+/*================================*/
+	ulint		level,	/*!< in: compression level */
+	const page_t*	page,	/*!< in: page that is compressed */
+	dict_index_t*	index,	/*!< in: index */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	byte* log_ptr = mlog_open_and_write_index(
+		mtr, page, index, MLOG_ZIP_PAGE_COMPRESS_NO_DATA, 1);
+
+	if (log_ptr) {
+		mach_write_to_1(log_ptr, level);
+		mlog_close(mtr, log_ptr + 1);
+	}
+}
+
+/**********************************************************************//**
+Parses a log record of compressing an index page without the data.
+@return	end of log record or NULL */
+UNIV_INLINE
+byte*
+page_zip_parse_compress_no_data(
+/*============================*/
+	byte*		ptr,		/*!< in: buffer */
+	byte*		end_ptr,	/*!< in: buffer end */
+	page_t*		page,		/*!< in: uncompressed page */
+	page_zip_des_t*	page_zip,	/*!< out: compressed page */
+	dict_index_t* index)		/*!< in: index */
+{
+	ulint	level;
+	if (end_ptr == ptr) {
+		return(NULL);
+	}
+
+	level = mach_read_from_1(ptr);
+
+	/* If page compression fails then there must be something wrong
+	because a compress log record is logged only if the compression
+	was successful. Crash in this case. */
+
+	if (page
+	    && !page_zip_compress(page_zip, page, index, level, NULL)) {
+		ut_error;
+	}
+
+	return(ptr + 1);
+}
+
+/**********************************************************************//**
+Reset the counters used for filling
+INFORMATION_SCHEMA.innodb_cmp_per_index. */
+UNIV_INLINE
+void
+page_zip_reset_stat_per_index()
+/*===========================*/
+{
+	mutex_enter(&page_zip_stat_per_index_mutex);
+
+	page_zip_stat_per_index.erase(
+		page_zip_stat_per_index.begin(),
+		page_zip_stat_per_index.end());
+
+	mutex_exit(&page_zip_stat_per_index_mutex);
+}
+
 #ifdef UNIV_MATERIALIZE
 # undef UNIV_INLINE
 # define UNIV_INLINE	UNIV_INLINE_ORIGINAL
diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h
index 9eb8aeb747f..65ff7533828 100644
--- a/storage/innobase/include/pars0pars.h
+++ b/storage/innobase/include/pars0pars.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -469,9 +469,10 @@ que_thr_t*
 pars_complete_graph_for_exec(
 /*=========================*/
 	que_node_t*	node,	/*!< in: root node for an incomplete
-				query graph */
+				query graph, or NULL for dummy graph */
 	trx_t*		trx,	/*!< in: transaction handle */
-	mem_heap_t*	heap);	/*!< in: memory heap from which allocated */
+	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
+	__attribute__((nonnull(2,3), warn_unused_result));
 
 /****************************************************************//**
 Create parser info struct.
@@ -618,6 +619,18 @@ pars_info_add_ull_literal(
 	ib_uint64_t	val);		/*!< in: value */
 
 /****************************************************************//**
+If the literal value already exists then it rebinds otherwise it
+creates a new entry. */
+UNIV_INTERN
+void
+pars_info_bind_ull_literal(
+/*=======================*/
+	pars_info_t*		info,	/*!< in: info struct */
+	const char*		name,	/*!< in: name */
+	const ib_uint64_t*	val)	/*!< in: value */
+	__attribute__((nonnull));
+
+/****************************************************************//**
 Add bound id. */
 UNIV_INTERN
 void
@@ -628,16 +641,6 @@ pars_info_add_id(
 	const char*	id);		/*!< in: id */
 
 /****************************************************************//**
-Get user function with the given name.
-@return	user func, or NULL if not found */
-UNIV_INTERN
-pars_user_func_t*
-pars_info_get_user_func(
-/*====================*/
-	pars_info_t*		info,	/*!< in: info struct */
-	const char*		name);	/*!< in: function name to find*/
-
-/****************************************************************//**
 Get bound literal with the given name.
 @return	bound literal, or NULL if not found */
 UNIV_INTERN
@@ -665,7 +668,7 @@ pars_lexer_close(void);
 /*==================*/
 
 /** Extra information supplied for pars_sql(). */
-struct pars_info_struct {
+struct pars_info_t {
 	mem_heap_t*	heap;		/*!< our own memory heap */
 
 	ib_vector_t*	funcs;		/*!< user functions, or NUll
@@ -680,14 +683,14 @@ struct pars_info_struct {
 };
 
 /** User-supplied function and argument. */
-struct pars_user_func_struct {
+struct pars_user_func_t {
 	const char*		name;	/*!< function name */
 	pars_user_func_cb_t	func;	/*!< function address */
 	void*			arg;	/*!< user-supplied argument */
 };
 
 /** Bound literal. */
-struct pars_bound_lit_struct {
+struct pars_bound_lit_t {
 	const char*	name;		/*!< name */
 	const void*	address;	/*!< address */
 	ulint		length;		/*!< length of data */
@@ -697,20 +700,20 @@ struct pars_bound_lit_struct {
 };
 
 /** Bound identifier. */
-struct pars_bound_id_struct {
+struct pars_bound_id_t {
 	const char*	name;		/*!< name */
 	const char*	id;		/*!< identifier */
 };
 
 /** Struct used to denote a reserved word in a parsing tree */
-struct pars_res_word_struct{
+struct pars_res_word_t{
 	int	code;	/*!< the token code for the reserved word from
 			pars0grm.h */
 };
 
 /** A predefined function or operator node in a parsing tree; this construct
 is also used for some non-functions like the assignment ':=' */
-struct func_node_struct{
+struct func_node_t{
 	que_common_t	common;	/*!< type: QUE_NODE_FUNC */
 	int		func;	/*!< token code of the function name */
 	ulint		fclass;	/*!< class of the function */
@@ -725,14 +728,14 @@ struct func_node_struct{
 };
 
 /** An order-by node in a select */
-struct order_node_struct{
+struct order_node_t{
 	que_common_t	common;	/*!< type: QUE_NODE_ORDER */
 	sym_node_t*	column;	/*!< order-by column */
 	ibool		asc;	/*!< TRUE if ascending, FALSE if descending */
 };
 
 /** Procedure definition node */
-struct proc_node_struct{
+struct proc_node_t{
 	que_common_t	common;		/*!< type: QUE_NODE_PROC */
 	sym_node_t*	proc_id;	/*!< procedure name symbol in the symbol
 					table of this same procedure */
@@ -742,14 +745,14 @@ struct proc_node_struct{
 };
 
 /** elsif-element node */
-struct elsif_node_struct{
+struct elsif_node_t{
 	que_common_t	common;		/*!< type: QUE_NODE_ELSIF */
 	que_node_t*	cond;		/*!< if condition */
 	que_node_t*	stat_list;	/*!< statement list */
 };
 
 /** if-statement node */
-struct if_node_struct{
+struct if_node_t{
 	que_common_t	common;		/*!< type: QUE_NODE_IF */
 	que_node_t*	cond;		/*!< if condition */
 	que_node_t*	stat_list;	/*!< statement list */
@@ -758,14 +761,14 @@ struct if_node_struct{
 };
 
 /** while-statement node */
-struct while_node_struct{
+struct while_node_t{
 	que_common_t	common;		/*!< type: QUE_NODE_WHILE */
 	que_node_t*	cond;		/*!< while condition */
 	que_node_t*	stat_list;	/*!< statement list */
 };
 
 /** for-loop-statement node */
-struct for_node_struct{
+struct for_node_t{
 	que_common_t	common;		/*!< type: QUE_NODE_FOR */
 	sym_node_t*	loop_var;	/*!< loop variable: this is the
 					dereferenced symbol from the
@@ -782,24 +785,24 @@ struct for_node_struct{
 };
 
 /** exit statement node */
-struct exit_node_struct{
+struct exit_node_t{
 	que_common_t	common;		/*!< type: QUE_NODE_EXIT */
 };
 
 /** return-statement node */
-struct return_node_struct{
+struct return_node_t{
 	que_common_t	common;		/*!< type: QUE_NODE_RETURN */
 };
 
 /** Assignment statement node */
-struct assign_node_struct{
+struct assign_node_t{
 	que_common_t	common;		/*!< type: QUE_NODE_ASSIGNMENT */
 	sym_node_t*	var;		/*!< variable to set */
 	que_node_t*	val;		/*!< value to assign */
 };
 
 /** Column assignment node */
-struct col_assign_node_struct{
+struct col_assign_node_t{
 	que_common_t	common;		/*!< type: QUE_NODE_COL_ASSIGN */
 	sym_node_t*	col;		/*!< column to set */
 	que_node_t*	val;		/*!< value to assign */
diff --git a/storage/innobase/include/pars0sym.h b/storage/innobase/include/pars0sym.h
index 4b3b342a533..bcf73639228 100644
--- a/storage/innobase/include/pars0sym.h
+++ b/storage/innobase/include/pars0sym.h
@@ -119,9 +119,9 @@ sym_tab_add_bound_id(
 	sym_tab_t*	sym_tab,	/*!< in: symbol table */
 	const char*	name);		/*!< in: name of bound id */
 
-/** Index of sym_node_struct::field_nos corresponding to the clustered index */
+/** Index of sym_node_t::field_nos corresponding to the clustered index */
 #define	SYM_CLUST_FIELD_NO	0
-/** Index of sym_node_struct::field_nos corresponding to a secondary index */
+/** Index of sym_node_t::field_nos corresponding to a secondary index */
 #define	SYM_SEC_FIELD_NO	1
 
 /** Types of a symbol table node */
@@ -143,7 +143,7 @@ enum sym_tab_entry {
 };
 
 /** Symbol table node */
-struct sym_node_struct{
+struct sym_node_t{
 	que_common_t			common;		/*!< node type:
 							QUE_NODE_SYMBOL */
 	/* NOTE: if the data field in 'common.val' is not NULL and the symbol
@@ -227,7 +227,7 @@ struct sym_node_struct{
 };
 
 /** Symbol table */
-struct sym_tab_struct{
+struct sym_tab_t{
 	que_t*			query_graph;
 					/*!< query graph generated by the
 					parser */
diff --git a/storage/innobase/include/pars0types.h b/storage/innobase/include/pars0types.h
index 13ae53f3fd6..47f4b432d20 100644
--- a/storage/innobase/include/pars0types.h
+++ b/storage/innobase/include/pars0types.h
@@ -26,24 +26,24 @@ Created 1/11/1998 Heikki Tuuri
 #ifndef pars0types_h
 #define pars0types_h
 
-typedef struct pars_info_struct		pars_info_t;
-typedef struct pars_user_func_struct	pars_user_func_t;
-typedef struct pars_bound_lit_struct	pars_bound_lit_t;
-typedef struct pars_bound_id_struct	pars_bound_id_t;
-typedef struct sym_node_struct		sym_node_t;
-typedef struct sym_tab_struct		sym_tab_t;
-typedef struct pars_res_word_struct	pars_res_word_t;
-typedef struct func_node_struct		func_node_t;
-typedef struct order_node_struct	order_node_t;
-typedef struct proc_node_struct		proc_node_t;
-typedef struct elsif_node_struct	elsif_node_t;
-typedef struct if_node_struct		if_node_t;
-typedef struct while_node_struct	while_node_t;
-typedef struct for_node_struct		for_node_t;
-typedef struct exit_node_struct		exit_node_t;
-typedef struct return_node_struct	return_node_t;
-typedef struct assign_node_struct	assign_node_t;
-typedef struct col_assign_node_struct	col_assign_node_t;
+struct pars_info_t;
+struct pars_user_func_t;
+struct pars_bound_lit_t;
+struct pars_bound_id_t;
+struct sym_node_t;
+struct sym_tab_t;
+struct pars_res_word_t;
+struct func_node_t;
+struct order_node_t;
+struct proc_node_t;
+struct elsif_node_t;
+struct if_node_t;
+struct while_node_t;
+struct for_node_t;
+struct exit_node_t;
+struct return_node_t;
+struct assign_node_t;
+struct col_assign_node_t;
 
 typedef UT_LIST_BASE_NODE_T(sym_node_t)	sym_node_list_t;
 
diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h
index 531794ce688..ba8828623af 100644
--- a/storage/innobase/include/que0que.h
+++ b/storage/innobase/include/que0que.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -42,7 +42,7 @@ of SQL execution in the UNIV_SQL_DEBUG version */
 extern ibool	que_trace_on;
 
 /** Mutex protecting the query threads. */
-extern mutex_t	que_thr_mutex;
+extern ib_mutex_t	que_thr_mutex;
 
 /***********************************************************************//**
 Creates a query graph fork node.
@@ -310,7 +310,7 @@ que_node_print_info(
 Evaluate the given SQL
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-enum db_err
+dberr_t
 que_eval_sql(
 /*=========*/
 	pars_info_t*	info,	/*!< in: info struct, or NULL */
@@ -349,7 +349,7 @@ que_close(void);
 /* Query graph query thread node: the fields are protected by the
 trx_t::mutex with the exceptions named below */
 
-struct que_thr_struct{
+struct que_thr_t{
 	que_common_t	common;		/*!< type: QUE_NODE_THR */
 	ulint		magic_n;	/*!< magic number to catch memory
 					corruption */
@@ -374,7 +374,7 @@ struct que_thr_struct{
 					thus far */
 	ulint		lock_state;	/*!< lock state of thread (table or
 					row) */
-	struct srv_slot_struct*
+	struct srv_slot_t*
 			slot;		/* The thread slot in the wait
 					array in srv_sys_t */
 	/*------------------------------*/
@@ -398,7 +398,7 @@ struct que_thr_struct{
 #define QUE_THR_MAGIC_FREED	123461526
 
 /* Query graph fork node: its fields are protected by the query thread mutex */
-struct que_fork_struct{
+struct que_fork_t{
 	que_common_t	common;		/*!< type: QUE_NODE_FORK */
 	que_t*		graph;		/*!< query graph of this node */
 	ulint		fork_type;	/*!< fork type */
diff --git a/storage/innobase/include/que0types.h b/storage/innobase/include/que0types.h
index b165b817d87..0f11cad301a 100644
--- a/storage/innobase/include/que0types.h
+++ b/storage/innobase/include/que0types.h
@@ -32,18 +32,15 @@ Created 5/27/1996 Heikki Tuuri
 /* Pseudotype for all graph nodes */
 typedef void	que_node_t;
 
-typedef struct que_fork_struct	que_fork_t;
-
 /* Query graph root is a fork node */
-typedef	que_fork_t	que_t;
+typedef	struct que_fork_t	que_t;
 
-typedef struct que_thr_struct		que_thr_t;
-typedef struct que_common_struct	que_common_t;
+struct que_thr_t;
 
 /* Common struct at the beginning of each query graph node; the name of this
 substruct must be 'common' */
 
-struct que_common_struct{
+struct que_common_t{
 	ulint		type;	/*!< query node type */
 	que_node_t*	parent;	/*!< back pointer to parent node, or NULL */
 	que_node_t*	brother;/* pointer to a possible brother node */
diff --git a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
index 6ea57fffcd2..980faddf98e 100644
--- a/storage/innobase/include/read0read.h
+++ b/storage/innobase/include/read0read.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -57,12 +57,14 @@ read_view_purge_open(
 	mem_heap_t*	heap);		/*!< in: memory heap from which
 					allocated */
 /*********************************************************************//**
-Remove read view from the trx_sys->view_list. */
-UNIV_INTERN
+Remove a read view from the trx_sys->view_list. */
+UNIV_INLINE
 void
 read_view_remove(
 /*=============*/
-	read_view_t*	view);	/*!< in: read view */
+	read_view_t*	view,		/*!< in: read view, can be 0 */
+	bool		own_mutex);	/*!< in: true if caller owns the
+					trx_sys_t::mutex */
 /*********************************************************************//**
 Closes a consistent read view for MySQL. This function is called at an SQL
 statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
@@ -73,13 +75,14 @@ read_view_close_for_mysql(
 	trx_t*	trx);	/*!< in: trx which has a read view */
 /*********************************************************************//**
 Checks if a read view sees the specified transaction.
-@return	TRUE if sees */
+@return	true if sees */
 UNIV_INLINE
-ibool
+bool
 read_view_sees_trx_id(
 /*==================*/
 	const read_view_t*	view,	/*!< in: read view */
-	trx_id_t		trx_id);/*!< in: trx id */
+	trx_id_t		trx_id)	/*!< in: trx id */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Prints a read view to stderr. */
 UNIV_INTERN
@@ -119,7 +122,7 @@ read_cursor_set_for_mysql(
 /** Read view lists the trx ids of those transactions for which a consistent
 read should not see the modifications to the database. */
 
-struct read_view_struct{
+struct read_view_t{
 	ulint		type;	/*!< VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
 	undo_no_t	undo_no;/*!< 0 or if type is
 				VIEW_HIGH_GRANULARITY
@@ -145,7 +148,7 @@ struct read_view_struct{
 	trx_id_t*	trx_ids;/*!< Additional trx ids which the read should
 				not see: typically, these are the read-write
 				active transactions at the time when the read
-			       	is serialized, except the reading transaction
+				is serialized, except the reading transaction
 				itself; the trx ids in this array are in a
 				descending order. These trx_ids should be
 				between the "low" and "high" water marks,
@@ -173,7 +176,7 @@ struct read_view_struct{
 cursors. This struct holds both heap where consistent read view
 is allocated and pointer to a read view. */
 
-struct cursor_view_struct{
+struct cursor_view_t{
 	mem_heap_t*	heap;
 				/*!< Memory heap for the cursor view */
 	read_view_t*	read_view;
diff --git a/storage/innobase/include/read0read.ic b/storage/innobase/include/read0read.ic
index 436800e1585..82c1028f12e 100644
--- a/storage/innobase/include/read0read.ic
+++ b/storage/innobase/include/read0read.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,11 +23,64 @@ Cursor read
 Created 2/16/1997 Heikki Tuuri
 *******************************************************/
 
+#include "trx0sys.h"
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Validates a read view object. */
+static
+bool
+read_view_validate(
+/*===============*/
+	const read_view_t*	view)	/*!< in: view to validate */
+{
+	ut_ad(mutex_own(&trx_sys->mutex));
+
+	/* Check that the view->trx_ids array is in descending order. */
+	for (ulint i = 1; i < view->n_trx_ids; ++i) {
+
+		ut_a(view->trx_ids[i] < view->trx_ids[i - 1]);
+	}
+
+	return(true);
+}
+
+/** Functor to validate the view list. */
+struct	ViewCheck {
+
+	ViewCheck() : m_prev_view(0) { }
+
+	void	operator()(const read_view_t* view)
+	{
+		ut_a(m_prev_view == NULL
+		     || m_prev_view->low_limit_no >= view->low_limit_no);
+
+		m_prev_view = view;
+	}
+
+	const read_view_t*	m_prev_view;
+};
+
+/*********************************************************************//**
+Validates a read view list. */
+static
+bool
+read_view_list_validate(void)
+/*=========================*/
+{
+	ut_ad(mutex_own(&trx_sys->mutex));
+
+	ut_list_map(trx_sys->view_list, &read_view_t::view_list, ViewCheck());
+
+	return(true);
+}
+#endif /* UNIV_DEBUG */
+
 /*********************************************************************//**
 Checks if a read view sees the specified transaction.
-@return	TRUE if sees */
+@return	true if sees */
 UNIV_INLINE
-ibool
+bool
 read_view_sees_trx_id(
 /*==================*/
 	const read_view_t*	view,	/*!< in: read view */
@@ -35,10 +88,10 @@ read_view_sees_trx_id(
 {
 	if (trx_id < view->up_limit_id) {
 
-		return(TRUE);
+		return(true);
 	} else if (trx_id >= view->low_limit_id) {
 
-		return(FALSE);
+		return(false);
 	} else {
 		ulint	lower = 0;
 		ulint	upper = view->n_trx_ids - 1;
@@ -63,5 +116,33 @@ read_view_sees_trx_id(
 		} while (lower <= upper);
 	}
 
-	return(TRUE);
+	return(true);
+}
+
+/*********************************************************************//**
+Remove a read view from the trx_sys->view_list. */
+UNIV_INLINE
+void
+read_view_remove(
+/*=============*/
+	read_view_t*	view,		/*!< in: read view, can be 0 */
+	bool		own_mutex)	/*!< in: true if caller owns the
+					trx_sys_t::mutex */
+{
+	if (view != 0) {
+		if (!own_mutex) {
+			mutex_enter(&trx_sys->mutex);
+		}
+
+		ut_ad(read_view_validate(view));
+
+		UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
+
+		ut_ad(read_view_list_validate());
+
+		if (!own_mutex) {
+			mutex_exit(&trx_sys->mutex);
+		}
+	}
 }
+
diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h
index 0b6aa132b88..969f4ebb637 100644
--- a/storage/innobase/include/read0types.h
+++ b/storage/innobase/include/read0types.h
@@ -26,7 +26,7 @@ Created 2/16/1997 Heikki Tuuri
 #ifndef read0types_h
 #define read0types_h
 
-typedef struct read_view_struct	read_view_t;
-typedef struct cursor_view_struct	cursor_view_t;
+struct read_view_t;
+struct cursor_view_t;
 
 #endif
diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h
index ed6486aa603..cb3c85ac2c8 100644
--- a/storage/innobase/include/rem0cmp.h
+++ b/storage/innobase/include/rem0cmp.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -156,21 +156,28 @@ respectively, when only the common first fields are compared, or until
 the first externally stored field in rec */
 UNIV_INTERN
 int
-cmp_dtuple_rec_with_match(
-/*======================*/
+cmp_dtuple_rec_with_match_low(
+/*==========================*/
 	const dtuple_t*	dtuple,	/*!< in: data tuple */
 	const rec_t*	rec,	/*!< in: physical record which differs from
 				dtuple in some of the common fields, or which
 				has an equal number or more fields than
 				dtuple */
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint*		matched_fields, /*!< in/out: number of already completely
+	ulint		n_cmp,	/*!< in: number of fields to compare */
+	ulint*		matched_fields,
+				/*!< in/out: number of already completely
 				matched fields; when function returns,
 				contains the value for current comparison */
-	ulint*		matched_bytes); /*!< in/out: number of already matched
+	ulint*		matched_bytes)
+				/*!< in/out: number of already matched
 				bytes within the first field not completely
 				matched; when function returns, contains the
 				value for current comparison */
+	__attribute__((nonnull));
+#define cmp_dtuple_rec_with_match(tuple,rec,offsets,fields,bytes)	\
+	cmp_dtuple_rec_with_match_low(					\
+		tuple,rec,offsets,dtuple_get_n_fields_cmp(tuple),fields,bytes)
 /**************************************************************//**
 Compares a data tuple to a physical record.
 @see cmp_dtuple_rec_with_match
@@ -196,7 +203,9 @@ cmp_dtuple_is_prefix_of_rec(
 /*************************************************************//**
 Compare two physical records that contain the same number of columns,
 none of which are stored externally.
-@return	1, 0, -1 if rec1 is greater, equal, less, respectively, than rec2 */
+@retval 1 if rec1 (including non-ordering columns) is greater than rec2
+@retval -1 if rec1 (including non-ordering columns) is less than rec2
+@retval 0 if rec1 is a duplicate of rec2 */
 UNIV_INTERN
 int
 cmp_rec_rec_simple(
@@ -206,8 +215,10 @@ cmp_rec_rec_simple(
 	const ulint*		offsets1,/*!< in: rec_get_offsets(rec1, ...) */
 	const ulint*		offsets2,/*!< in: rec_get_offsets(rec2, ...) */
 	const dict_index_t*	index,	/*!< in: data dictionary index */
-	ibool*			null_eq);/*!< out: set to TRUE if
-					found matching null values */
+	struct TABLE*		table)	/*!< in: MySQL table, for reporting
+					duplicate key value if applicable,
+					or NULL */
+	__attribute__((nonnull(1,2,3,4), warn_unused_result));
 /*************************************************************//**
 This function is used to compare two physical records. Only the common
 first fields are compared, and if an externally stored field is
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
index c6c70bb5f09..2a84aee7a6f 100644
--- a/storage/innobase/include/rem0rec.h
+++ b/storage/innobase/include/rem0rec.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -66,6 +66,15 @@ The status is stored in the low-order bits. */
 /* Length of a B-tree node pointer, in bytes */
 #define REC_NODE_PTR_SIZE	4
 
+/** SQL null flag in a 1-byte offset of ROW_FORMAT=REDUNDANT records */
+#define REC_1BYTE_SQL_NULL_MASK	0x80UL
+/** SQL null flag in a 2-byte offset of ROW_FORMAT=REDUNDANT records */
+#define REC_2BYTE_SQL_NULL_MASK	0x8000UL
+
+/** In a 2-byte offset of ROW_FORMAT=REDUNDANT records, the second most
+significant bit denotes that the tail of a field is stored off-page. */
+#define REC_2BYTE_EXTERN_MASK	0x4000UL
+
 #ifdef UNIV_DEBUG
 /* Length of the rec_get_offsets() header */
 # define REC_OFFS_HEADER_SIZE	4
@@ -88,7 +97,8 @@ const rec_t*
 rec_get_next_ptr_const(
 /*===================*/
 	const rec_t*	rec,	/*!< in: physical record */
-	ulint		comp);	/*!< in: nonzero=compact page format */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 The following function is used to get the pointer of the next chained record
 on the same page.
@@ -98,7 +108,8 @@ rec_t*
 rec_get_next_ptr(
 /*=============*/
 	rec_t*	rec,	/*!< in: physical record */
-	ulint	comp);	/*!< in: nonzero=compact page format */
+	ulint	comp)	/*!< in: nonzero=compact page format */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 The following function is used to get the offset of the
 next chained record on the same page.
@@ -108,7 +119,8 @@ ulint
 rec_get_next_offs(
 /*==============*/
 	const rec_t*	rec,	/*!< in: physical record */
-	ulint		comp);	/*!< in: nonzero=compact page format */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 The following function is used to set the next record offset field
 of an old-style record. */
@@ -117,7 +129,8 @@ void
 rec_set_next_offs_old(
 /*==================*/
 	rec_t*	rec,	/*!< in: old-style physical record */
-	ulint	next);	/*!< in: offset of the next record */
+	ulint	next)	/*!< in: offset of the next record */
+	__attribute__((nonnull));
 /******************************************************//**
 The following function is used to set the next record offset field
 of a new-style record. */
@@ -126,7 +139,8 @@ void
 rec_set_next_offs_new(
 /*==================*/
 	rec_t*	rec,	/*!< in/out: new-style physical record */
-	ulint	next);	/*!< in: offset of the next record */
+	ulint	next)	/*!< in: offset of the next record */
+	__attribute__((nonnull));
 /******************************************************//**
 The following function is used to get the number of fields
 in an old-style record.
@@ -135,7 +149,8 @@ UNIV_INLINE
 ulint
 rec_get_n_fields_old(
 /*=================*/
-	const rec_t*	rec);	/*!< in: physical record */
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 The following function is used to get the number of fields
 in a record.
@@ -145,7 +160,8 @@ ulint
 rec_get_n_fields(
 /*=============*/
 	const rec_t*		rec,	/*!< in: physical record */
-	const dict_index_t*	index);	/*!< in: record descriptor */
+	const dict_index_t*	index)	/*!< in: record descriptor */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 The following function is used to get the number of records owned by the
 previous directory record.
@@ -154,7 +170,8 @@ UNIV_INLINE
 ulint
 rec_get_n_owned_old(
 /*================*/
-	const rec_t*	rec);	/*!< in: old-style physical record */
+	const rec_t*	rec)	/*!< in: old-style physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 The following function is used to set the number of owned records. */
 UNIV_INLINE
@@ -162,7 +179,8 @@ void
 rec_set_n_owned_old(
 /*================*/
 	rec_t*	rec,		/*!< in: old-style physical record */
-	ulint	n_owned);	/*!< in: the number of owned */
+	ulint	n_owned)	/*!< in: the number of owned */
+	__attribute__((nonnull));
 /******************************************************//**
 The following function is used to get the number of records owned by the
 previous directory record.
@@ -171,7 +189,8 @@ UNIV_INLINE
 ulint
 rec_get_n_owned_new(
 /*================*/
-	const rec_t*	rec);	/*!< in: new-style physical record */
+	const rec_t*	rec)	/*!< in: new-style physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 The following function is used to set the number of owned records. */
 UNIV_INLINE
@@ -180,7 +199,8 @@ rec_set_n_owned_new(
 /*================*/
 	rec_t*		rec,	/*!< in/out: new-style physical record */
 	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
-	ulint		n_owned);/*!< in: the number of owned */
+	ulint		n_owned)/*!< in: the number of owned */
+	__attribute__((nonnull(1)));
 /******************************************************//**
 The following function is used to retrieve the info bits of
 a record.
@@ -190,7 +210,8 @@ ulint
 rec_get_info_bits(
 /*==============*/
 	const rec_t*	rec,	/*!< in: physical record */
-	ulint		comp);	/*!< in: nonzero=compact page format */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 The following function is used to set the info bits of a record. */
 UNIV_INLINE
@@ -198,7 +219,8 @@ void
 rec_set_info_bits_old(
 /*==================*/
 	rec_t*	rec,	/*!< in: old-style physical record */
-	ulint	bits);	/*!< in: info bits */
+	ulint	bits)	/*!< in: info bits */
+	__attribute__((nonnull));
 /******************************************************//**
 The following function is used to set the info bits of a record. */
 UNIV_INLINE
@@ -206,7 +228,8 @@ void
 rec_set_info_bits_new(
 /*==================*/
 	rec_t*	rec,	/*!< in/out: new-style physical record */
-	ulint	bits);	/*!< in: info bits */
+	ulint	bits)	/*!< in: info bits */
+	__attribute__((nonnull));
 /******************************************************//**
 The following function retrieves the status bits of a new-style record.
 @return	status bits */
@@ -214,7 +237,8 @@ UNIV_INLINE
 ulint
 rec_get_status(
 /*===========*/
-	const rec_t*	rec);	/*!< in: physical record */
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
 
 /******************************************************//**
 The following function is used to set the status bits of a new-style record. */
@@ -223,7 +247,8 @@ void
 rec_set_status(
 /*===========*/
 	rec_t*	rec,	/*!< in/out: physical record */
-	ulint	bits);	/*!< in: info bits */
+	ulint	bits)	/*!< in: info bits */
+	__attribute__((nonnull));
 
 /******************************************************//**
 The following function is used to retrieve the info and status
@@ -234,7 +259,8 @@ ulint
 rec_get_info_and_status_bits(
 /*=========================*/
 	const rec_t*	rec,	/*!< in: physical record */
-	ulint		comp);	/*!< in: nonzero=compact page format */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 The following function is used to set the info and status
 bits of a record.  (Only compact records have status bits.) */
@@ -243,7 +269,8 @@ void
 rec_set_info_and_status_bits(
 /*=========================*/
 	rec_t*	rec,	/*!< in/out: compact physical record */
-	ulint	bits);	/*!< in: info bits */
+	ulint	bits)	/*!< in: info bits */
+	__attribute__((nonnull));
 
 /******************************************************//**
 The following function tells if record is delete marked.
@@ -253,7 +280,8 @@ ulint
 rec_get_deleted_flag(
 /*=================*/
 	const rec_t*	rec,	/*!< in: physical record */
-	ulint		comp);	/*!< in: nonzero=compact page format */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 The following function is used to set the deleted bit. */
 UNIV_INLINE
@@ -261,7 +289,8 @@ void
 rec_set_deleted_flag_old(
 /*=====================*/
 	rec_t*	rec,	/*!< in: old-style physical record */
-	ulint	flag);	/*!< in: nonzero if delete marked */
+	ulint	flag)	/*!< in: nonzero if delete marked */
+	__attribute__((nonnull));
 /******************************************************//**
 The following function is used to set the deleted bit. */
 UNIV_INLINE
@@ -270,7 +299,8 @@ rec_set_deleted_flag_new(
 /*=====================*/
 	rec_t*		rec,	/*!< in/out: new-style physical record */
 	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
-	ulint		flag);	/*!< in: nonzero if delete marked */
+	ulint		flag)	/*!< in: nonzero if delete marked */
+	__attribute__((nonnull(1)));
 /******************************************************//**
 The following function tells if a new-style record is a node pointer.
 @return	TRUE if node pointer */
@@ -278,7 +308,8 @@ UNIV_INLINE
 ibool
 rec_get_node_ptr_flag(
 /*==================*/
-	const rec_t*	rec);	/*!< in: physical record */
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 The following function is used to get the order number
 of an old-style record in the heap of the index page.
@@ -287,7 +318,8 @@ UNIV_INLINE
 ulint
 rec_get_heap_no_old(
 /*================*/
-	const rec_t*	rec);	/*!< in: physical record */
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 The following function is used to set the heap number
 field in an old-style record. */
@@ -296,7 +328,8 @@ void
 rec_set_heap_no_old(
 /*================*/
 	rec_t*	rec,	/*!< in: physical record */
-	ulint	heap_no);/*!< in: the heap number */
+	ulint	heap_no)/*!< in: the heap number */
+	__attribute__((nonnull));
 /******************************************************//**
 The following function is used to get the order number
 of a new-style record in the heap of the index page.
@@ -305,7 +338,8 @@ UNIV_INLINE
 ulint
 rec_get_heap_no_new(
 /*================*/
-	const rec_t*	rec);	/*!< in: physical record */
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 The following function is used to set the heap number
 field in a new-style record. */
@@ -314,7 +348,8 @@ void
 rec_set_heap_no_new(
 /*================*/
 	rec_t*	rec,	/*!< in/out: physical record */
-	ulint	heap_no);/*!< in: the heap number */
+	ulint	heap_no)/*!< in: the heap number */
+	__attribute__((nonnull));
 /******************************************************//**
 The following function is used to test whether the data offsets
 in the record are stored in one-byte or two-byte format.
@@ -323,7 +358,57 @@ UNIV_INLINE
 ibool
 rec_get_1byte_offs_flag(
 /*====================*/
-	const rec_t*	rec);	/*!< in: physical record */
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+The following function is used to set the 1-byte offsets flag. */
+UNIV_INLINE
+void
+rec_set_1byte_offs_flag(
+/*====================*/
+	rec_t*	rec,	/*!< in: physical record */
+	ibool	flag)	/*!< in: TRUE if 1byte form */
+	__attribute__((nonnull));
+
+/******************************************************//**
+Returns the offset of nth field end if the record is stored in the 1-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value.
+@return	offset of the start of the field, SQL null flag ORed */
+UNIV_INLINE
+ulint
+rec_1_get_field_end_info(
+/*=====================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+Returns the offset of nth field end if the record is stored in the 2-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value.
+@return offset of the start of the field, SQL null flag and extern
+storage flag ORed */
+UNIV_INLINE
+ulint
+rec_2_get_field_end_info(
+/*=====================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+Returns nonzero if the field is stored off-page.
+@retval 0 if the field is stored in-page
+@retval REC_2BYTE_EXTERN_MASK if the field is stored externally */
+UNIV_INLINE
+ulint
+rec_2_is_field_extern(
+/*==================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+	__attribute__((nonnull, pure, warn_unused_result));
 
 /******************************************************//**
 Determine how many of the first n columns in a compact
@@ -333,9 +418,10 @@ UNIV_INTERN
 ulint
 rec_get_n_extern_new(
 /*=================*/
-	const rec_t*	rec,	/*!< in: compact physical record */
-	dict_index_t*	index,	/*!< in: record descriptor */
-	ulint		n);	/*!< in: number of columns to scan */
+	const rec_t*		rec,	/*!< in: compact physical record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint			n)	/*!< in: number of columns to scan */
+	__attribute__((nonnull, warn_unused_result));
 
 /******************************************************//**
 The following function determines the offsets to each field
@@ -356,30 +442,13 @@ rec_get_offsets_func(
 					 (ULINT_UNDEFINED if all fields) */
 	mem_heap_t**		heap,	/*!< in/out: memory heap */
 	const char*		file,	/*!< in: file name where called */
-	ulint			line);	/*!< in: line number where called */
+	ulint			line)	/*!< in: line number where called */
+	__attribute__((nonnull(1,2,5,6),warn_unused_result));
 
 #define rec_get_offsets(rec,index,offsets,n,heap)	\
 	rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
 
 /******************************************************//**
-Determine the offset to each field in a leaf-page record
-in ROW_FORMAT=COMPACT.  This is a special case of
-rec_init_offsets() and rec_get_offsets_func(). */
-UNIV_INTERN
-void
-rec_init_offsets_comp_ordinary(
-/*===========================*/
-	const rec_t*		rec,	/*!< in: physical record in
-					ROW_FORMAT=COMPACT */
-	ulint			extra,	/*!< in: number of bytes to reserve
-					between the record header and
-					the data payload
-					(usually REC_N_NEW_EXTRA_BYTES) */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint*			offsets);/*!< in/out: array of offsets;
-					in: n=rec_offs_n_fields(offsets) */
-
-/******************************************************//**
 The following function determines the offsets to each field
 in the record.  It can reuse a previously allocated array. */
 UNIV_INTERN
@@ -393,9 +462,10 @@ rec_get_offsets_reverse(
 	const dict_index_t*	index,	/*!< in: record descriptor */
 	ulint			node_ptr,/*!< in: nonzero=node pointer,
 					0=leaf node */
-	ulint*			offsets);/*!< in/out: array consisting of
+	ulint*			offsets)/*!< in/out: array consisting of
 					offsets[0] allocated elements */
-
+	__attribute__((nonnull));
+#ifdef UNIV_DEBUG
 /************************************************************//**
 Validates offsets returned by rec_get_offsets().
 @return	TRUE if valid */
@@ -405,9 +475,9 @@ rec_offs_validate(
 /*==============*/
 	const rec_t*		rec,	/*!< in: record or NULL */
 	const dict_index_t*	index,	/*!< in: record descriptor or NULL */
-	const ulint*		offsets);/*!< in: array returned by
+	const ulint*		offsets)/*!< in: array returned by
 					rec_get_offsets() */
-#ifdef UNIV_DEBUG
+	__attribute__((nonnull(3), warn_unused_result));
 /************************************************************//**
 Updates debug data in offsets, in order to avoid bogus
 rec_offs_validate() failures. */
@@ -417,8 +487,9 @@ rec_offs_make_valid(
 /*================*/
 	const rec_t*		rec,	/*!< in: record */
 	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint*			offsets);/*!< in: array returned by
+	ulint*			offsets)/*!< in: array returned by
 					rec_get_offsets() */
+	__attribute__((nonnull));
 #else
 # define rec_offs_make_valid(rec, index, offsets) ((void) 0)
 #endif /* UNIV_DEBUG */
@@ -433,8 +504,9 @@ rec_get_nth_field_offs_old(
 /*=======================*/
 	const rec_t*	rec,	/*!< in: record */
 	ulint		n,	/*!< in: index of the field */
-	ulint*		len);	/*!< out: length of the field; UNIV_SQL_NULL
+	ulint*		len)	/*!< out: length of the field; UNIV_SQL_NULL
 				if SQL null */
+	__attribute__((nonnull));
 #define rec_get_nth_field_old(rec, n, len) \
 ((rec) + rec_get_nth_field_offs_old(rec, n, len))
 /************************************************************//**
@@ -447,7 +519,8 @@ ulint
 rec_get_nth_field_size(
 /*===================*/
 	const rec_t*	rec,	/*!< in: record */
-	ulint		n);	/*!< in: index of the field */
+	ulint		n)	/*!< in: index of the field */
+	__attribute__((nonnull, pure, warn_unused_result));
 /************************************************************//**
 The following function is used to get an offset to the nth
 data field in a record.
@@ -458,8 +531,9 @@ rec_get_nth_field_offs(
 /*===================*/
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
 	ulint		n,	/*!< in: index of the field */
-	ulint*		len);	/*!< out: length of the field; UNIV_SQL_NULL
+	ulint*		len)	/*!< out: length of the field; UNIV_SQL_NULL
 				if SQL null */
+	__attribute__((nonnull));
 #define rec_get_nth_field(rec, offsets, n, len) \
 ((rec) + rec_get_nth_field_offs(offsets, n, len))
 /******************************************************//**
@@ -470,7 +544,8 @@ UNIV_INLINE
 ulint
 rec_offs_comp(
 /*==========*/
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 Determine if the offsets are for a record containing
 externally stored columns.
@@ -479,8 +554,8 @@ UNIV_INLINE
 ulint
 rec_offs_any_extern(
 /*================*/
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 Determine if the offsets are for a record containing null BLOB pointers.
 @return	first field containing a null BLOB pointer, or NULL if none found */
@@ -490,8 +565,7 @@ rec_offs_any_null_extern(
 /*=====================*/
 	const rec_t*	rec,		/*!< in: record */
 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec) */
-	__attribute__((nonnull, warn_unused_result));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 Returns nonzero if the extern bit is set in nth field of rec.
 @return	nonzero if externally stored */
@@ -500,7 +574,8 @@ ulint
 rec_offs_nth_extern(
 /*================*/
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n);	/*!< in: nth field */
+	ulint		n)	/*!< in: nth field */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 Returns nonzero if the SQL NULL bit is set in nth field of rec.
 @return	nonzero if SQL NULL */
@@ -509,7 +584,8 @@ ulint
 rec_offs_nth_sql_null(
 /*==================*/
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n);	/*!< in: nth field */
+	ulint		n)	/*!< in: nth field */
+	__attribute__((nonnull, pure, warn_unused_result));
 /******************************************************//**
 Gets the physical size of a field.
 @return	length of field */
@@ -518,7 +594,8 @@ ulint
 rec_offs_nth_size(
 /*==============*/
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n);	/*!< in: nth field */
+	ulint		n)	/*!< in: nth field */
+	__attribute__((nonnull, pure, warn_unused_result));
 
 /******************************************************//**
 Returns the number of extern bits set in a record.
@@ -527,7 +604,8 @@ UNIV_INLINE
 ulint
 rec_offs_n_extern(
 /*==============*/
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
 /***********************************************************//**
 This is used to modify the value of an already existing field in a record.
 The previous value must have exactly the same size as the new value. If len
@@ -542,11 +620,12 @@ rec_set_nth_field(
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
 	ulint		n,	/*!< in: index number of the field */
 	const void*	data,	/*!< in: pointer to the data if not SQL null */
-	ulint		len);	/*!< in: length of the data or UNIV_SQL_NULL.
+	ulint		len)	/*!< in: length of the data or UNIV_SQL_NULL.
 				If not SQL null, must have the same
 				length as the previous value.
 				If SQL null, previous value must be
 				SQL null. */
+	__attribute__((nonnull(1,2)));
 /**********************************************************//**
 The following function returns the data size of an old-style physical
 record, that is the sum of field lengths. SQL null fields
@@ -557,7 +636,8 @@ UNIV_INLINE
 ulint
 rec_get_data_size_old(
 /*==================*/
-	const rec_t*	rec);	/*!< in: physical record */
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
 /**********************************************************//**
 The following function returns the number of allocated elements
 for an array of offsets.
@@ -566,7 +646,8 @@ UNIV_INLINE
 ulint
 rec_offs_get_n_alloc(
 /*=================*/
-	const ulint*	offsets);/*!< in: array for rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array for rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
 /**********************************************************//**
 The following function sets the number of allocated elements
 for an array of offsets. */
@@ -576,7 +657,8 @@ rec_offs_set_n_alloc(
 /*=================*/
 	ulint*	offsets,	/*!< out: array for rec_get_offsets(),
 				must be allocated */
-	ulint	n_alloc);	/*!< in: number of elements */
+	ulint	n_alloc)	/*!< in: number of elements */
+	__attribute__((nonnull));
 #define rec_offs_init(offsets) \
 	rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets)
 /**********************************************************//**
@@ -586,7 +668,8 @@ UNIV_INLINE
 ulint
 rec_offs_n_fields(
 /*==============*/
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
 /**********************************************************//**
 The following function returns the data size of a physical
 record, that is the sum of field lengths. SQL null fields
@@ -597,7 +680,8 @@ UNIV_INLINE
 ulint
 rec_offs_data_size(
 /*===============*/
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
 /**********************************************************//**
 Returns the total size of record minus data size of record.
 The value returned by the function is the distance from record
@@ -607,7 +691,8 @@ UNIV_INLINE
 ulint
 rec_offs_extra_size(
 /*================*/
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
 /**********************************************************//**
 Returns the total size of a physical record.
 @return	size */
@@ -615,7 +700,8 @@ UNIV_INLINE
 ulint
 rec_offs_size(
 /*==========*/
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
 #ifdef UNIV_DEBUG
 /**********************************************************//**
 Returns a pointer to the start of the record.
@@ -625,7 +711,8 @@ byte*
 rec_get_start(
 /*==========*/
 	const rec_t*	rec,	/*!< in: pointer to record */
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
 /**********************************************************//**
 Returns a pointer to the end of the record.
 @return	pointer to end */
@@ -634,7 +721,8 @@ byte*
 rec_get_end(
 /*========*/
 	const rec_t*	rec,	/*!< in: pointer to record */
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
 #else /* UNIV_DEBUG */
 # define rec_get_start(rec, offsets) ((rec) - rec_offs_extra_size(offsets))
 # define rec_get_end(rec, offsets) ((rec) + rec_offs_data_size(offsets))
@@ -648,8 +736,48 @@ rec_copy(
 /*=====*/
 	void*		buf,	/*!< in: buffer */
 	const rec_t*	rec,	/*!< in: physical record */
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull));
 #ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Determines the size of a data tuple prefix in a temporary file.
+@return	total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_temp(
+/*========================*/
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint*			extra)	/*!< out: extra size */
+	__attribute__((warn_unused_result, nonnull));
+
+/******************************************************//**
+Determine the offset to each field in temporary file.
+@see rec_convert_dtuple_to_temp() */
+UNIV_INTERN
+void
+rec_init_offsets_temp(
+/*==================*/
+	const rec_t*		rec,	/*!< in: temporary file record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets)/*!< in/out: array of offsets;
+					in: n=rec_offs_n_fields(offsets) */
+	__attribute__((nonnull));
+
+/*********************************************************//**
+Builds a temporary file record out of a data tuple.
+@see rec_init_offsets_temp() */
+UNIV_INTERN
+void
+rec_convert_dtuple_to_temp(
+/*=======================*/
+	rec_t*			rec,		/*!< out: record */
+	const dict_index_t*	index,		/*!< in: record descriptor */
+	const dfield_t*		fields,		/*!< in: array of data fields */
+	ulint			n_fields)	/*!< in: number of fields */
+	__attribute__((nonnull));
+
 /**************************************************************//**
 Copies the first n fields of a physical record to a new physical record in
 a buffer.
@@ -665,7 +793,8 @@ rec_copy_prefix_to_buf(
 	byte**			buf,		/*!< in/out: memory buffer
 						for the copied prefix,
 						or NULL */
-	ulint*			buf_size);	/*!< in/out: buffer size */
+	ulint*			buf_size)	/*!< in/out: buffer size */
+	__attribute__((nonnull));
 /************************************************************//**
 Folds a prefix of a physical record to a ulint.
 @return	the folded value */
@@ -681,24 +810,9 @@ rec_fold(
 	ulint		n_bytes,	/*!< in: number of bytes to fold
 					in an incomplete last field */
 	index_id_t	tree_id)	/*!< in: index tree id */
-	__attribute__((pure));
+	__attribute__((nonnull, pure, warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************//**
-Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
-UNIV_INTERN
-void
-rec_convert_dtuple_to_rec_comp(
-/*===========================*/
-	rec_t*			rec,	/*!< in: origin of record */
-	ulint			extra,	/*!< in: number of bytes to
-					reserve between the record
-					header and the data payload
-					(normally REC_N_NEW_EXTRA_BYTES) */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint			status,	/*!< in: status bits of the record */
-	const dfield_t*		fields,	/*!< in: array of data fields */
-	ulint			n_fields);/*!< in: number of data fields */
-/*********************************************************//**
 Builds a physical record out of a data tuple and
 stores it into the given buffer.
 @return	pointer to the origin of physical record */
@@ -710,8 +824,9 @@ rec_convert_dtuple_to_rec(
 					physical record */
 	const dict_index_t*	index,	/*!< in: record descriptor */
 	const dtuple_t*		dtuple,	/*!< in: data tuple */
-	ulint			n_ext);	/*!< in: number of
+	ulint			n_ext)	/*!< in: number of
 					externally stored columns */
+	__attribute__((nonnull, warn_unused_result));
 /**********************************************************//**
 Returns the extra size of an old-style physical record if we know its
 data size and number of fields.
@@ -723,7 +838,7 @@ rec_get_converted_extra_size(
 	ulint	data_size,	/*!< in: data size */
 	ulint	n_fields,	/*!< in: number of fields */
 	ulint	n_ext)		/*!< in: number of externally stored columns */
-		__attribute__((const));
+	__attribute__((const));
 /**********************************************************//**
 Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
 @return	total size */
@@ -731,13 +846,11 @@ UNIV_INTERN
 ulint
 rec_get_converted_size_comp_prefix(
 /*===============================*/
-	const dict_index_t*	index,	/*!< in: record descriptor;
-					dict_table_is_comp() is
-					assumed to hold, even if
-					it does not */
+	const dict_index_t*	index,	/*!< in: record descriptor */
 	const dfield_t*		fields,	/*!< in: array of data fields */
 	ulint			n_fields,/*!< in: number of data fields */
-	ulint*			extra);	/*!< out: extra size */
+	ulint*			extra)	/*!< out: extra size */
+	__attribute__((warn_unused_result, nonnull(1,2)));
 /**********************************************************//**
 Determines the size of a data tuple in ROW_FORMAT=COMPACT.
 @return	total size */
@@ -752,7 +865,8 @@ rec_get_converted_size_comp(
 	ulint			status,	/*!< in: status bits of the record */
 	const dfield_t*		fields,	/*!< in: array of data fields */
 	ulint			n_fields,/*!< in: number of data fields */
-	ulint*			extra);	/*!< out: extra size */
+	ulint*			extra)	/*!< out: extra size */
+	__attribute__((nonnull(1,3)));
 /**********************************************************//**
 The following function returns the size of a data tuple when converted to
 a physical record.
@@ -763,7 +877,8 @@ rec_get_converted_size(
 /*===================*/
 	dict_index_t*	index,	/*!< in: record descriptor */
 	const dtuple_t*	dtuple,	/*!< in: data tuple */
-	ulint		n_ext);	/*!< in: number of externally stored columns */
+	ulint		n_ext)	/*!< in: number of externally stored columns */
+	__attribute__((warn_unused_result, nonnull));
 #ifndef UNIV_HOTBACKUP
 /**************************************************************//**
 Copies the first n fields of a physical record to a data tuple.
@@ -777,7 +892,8 @@ rec_copy_prefix_to_dtuple(
 	const dict_index_t*	index,		/*!< in: record descriptor */
 	ulint			n_fields,	/*!< in: number of fields
 						to copy */
-	mem_heap_t*		heap);		/*!< in: memory heap */
+	mem_heap_t*		heap)		/*!< in: memory heap */
+	__attribute__((nonnull));
 #endif /* !UNIV_HOTBACKUP */
 /***************************************************************//**
 Validates the consistency of a physical record.
@@ -787,7 +903,8 @@ ibool
 rec_validate(
 /*=========*/
 	const rec_t*	rec,	/*!< in: physical record */
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull));
 /***************************************************************//**
 Prints an old-style physical record. */
 UNIV_INTERN
@@ -795,7 +912,8 @@ void
 rec_print_old(
 /*==========*/
 	FILE*		file,	/*!< in: file where to print */
-	const rec_t*	rec);	/*!< in: physical record */
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull));
 #ifndef UNIV_HOTBACKUP
 /***************************************************************//**
 Prints a physical record in ROW_FORMAT=COMPACT.  Ignores the
@@ -806,7 +924,8 @@ rec_print_comp(
 /*===========*/
 	FILE*		file,	/*!< in: file where to print */
 	const rec_t*	rec,	/*!< in: physical record */
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull));
 /***************************************************************//**
 Prints a physical record. */
 UNIV_INTERN
@@ -815,7 +934,8 @@ rec_print_new(
 /*==========*/
 	FILE*		file,	/*!< in: file where to print */
 	const rec_t*	rec,	/*!< in: physical record */
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull));
 /***************************************************************//**
 Prints a physical record. */
 UNIV_INTERN
@@ -824,7 +944,21 @@ rec_print(
 /*======*/
 	FILE*			file,	/*!< in: file where to print */
 	const rec_t*		rec,	/*!< in: physical record */
-	const dict_index_t*	index);	/*!< in: record descriptor */
+	const dict_index_t*	index)	/*!< in: record descriptor */
+	__attribute__((nonnull));
+
+# ifdef UNIV_DEBUG
+/************************************************************//**
+Reads the DB_TRX_ID of a clustered index record.
+@return	the value of DB_TRX_ID */
+UNIV_INTERN
+trx_id_t
+rec_get_trx_id(
+/*===========*/
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index)	/*!< in: clustered index */
+	__attribute__((nonnull, warn_unused_result));
+# endif /* UNIV_DEBUG */
 #endif /* UNIV_HOTBACKUP */
 
 /* Maximum lengths for the data in a physical record if the offsets
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
index 6950263fe81..18a7deb9d26 100644
--- a/storage/innobase/include/rem0rec.ic
+++ b/storage/innobase/include/rem0rec.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -118,17 +118,6 @@ and the shift needed to obtain each bit-field of the record. */
 #define	REC_INFO_BITS_MASK	0xF0UL
 #define REC_INFO_BITS_SHIFT	0
 
-/* The following masks are used to filter the SQL null bit from
-one-byte and two-byte offsets */
-
-#define REC_1BYTE_SQL_NULL_MASK	0x80UL
-#define REC_2BYTE_SQL_NULL_MASK	0x8000UL
-
-/* In a 2-byte offset the second most significant bit denotes
-a field stored to another page: */
-
-#define REC_2BYTE_EXTERN_MASK	0x4000UL
-
 #if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \
 		^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \
 		^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \
@@ -883,6 +872,20 @@ rec_2_get_field_end_info(
 	return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2)));
 }
 
+/******************************************************//**
+Returns nonzero if the field is stored off-page.
+@retval 0 if the field is stored in-page
+@retval REC_2BYTE_EXTERN_MASK if the field is stored externally */
+UNIV_INLINE
+ulint
+rec_2_is_field_extern(
+/*==================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+{
+	return(rec_2_get_field_end_info(rec, n) & REC_2BYTE_EXTERN_MASK);
+}
+
 /* Get the base address of offsets.  The extra_size is stored at
 this position, and following positions hold the end offsets of
 the fields. */
@@ -1084,7 +1087,6 @@ rec_offs_any_extern(
 	return(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL);
 }
 
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 /******************************************************//**
 Determine if the offsets are for a record containing null BLOB pointers.
 @return	first field containing a null BLOB pointer, or NULL if none found */
@@ -1120,7 +1122,6 @@ rec_offs_any_null_extern(
 
 	return(NULL);
 }
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
 /******************************************************//**
 Returns nonzero if the extern bit is set in nth field of rec.
diff --git a/storage/innobase/include/rem0types.h b/storage/innobase/include/rem0types.h
index 2f1ead43c07..f8133f77466 100644
--- a/storage/innobase/include/rem0types.h
+++ b/storage/innobase/include/rem0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -34,6 +34,15 @@ typedef byte	rec_t;
 #define REC_MAX_HEAP_NO		(2 * 8192 - 1)
 #define REC_MAX_N_OWNED		(16 - 1)
 
+/* Maximum number of user defined fields/columns. The reserved columns
+are the ones InnoDB adds internally: DB_ROW_ID, DB_TRX_ID, DB_ROLL_PTR.
+We need "* 2" because mlog_parse_index() creates a dummy table object
+possibly, with some of the system columns in it, and then adds the 3
+system columns (again) using dict_table_add_system_columns(). The problem
+is that mlog_parse_index() cannot recognize the system columns by
+just having n_fields, n_uniq and the lengths of the columns. */
+#define REC_MAX_N_USER_FIELDS	(REC_MAX_N_FIELDS - DATA_N_SYS_COLS * 2)
+
 /* REC_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
 indexed field length (or indexed prefix length) for indexes on tables of
 ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT format.
diff --git a/storage/innobase/include/row0ext.h b/storage/innobase/include/row0ext.h
index 60aaf16c09a..a098e2f9b29 100644
--- a/storage/innobase/include/row0ext.h
+++ b/storage/innobase/include/row0ext.h
@@ -84,7 +84,7 @@ row_ext_lookup(
 					DICT_MAX_FIELD_LEN_BY_FORMAT() */
 
 /** Prefixes of externally stored columns */
-struct row_ext_struct{
+struct row_ext_t{
 	ulint		n_ext;	/*!< number of externally stored columns */
 	const ulint*	ext;	/*!< col_no's of externally stored columns */
 	byte*		buf;	/*!< backing store of the column prefix cache */
diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h
index cc5efea026f..4a486450efc 100644
--- a/storage/innobase/include/row0ftsort.h
+++ b/storage/innobase/include/row0ftsort.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -56,16 +56,16 @@ typedef UT_LIST_BASE_NODE_T(fts_doc_item_t)     fts_doc_list_t;
 #define FTS_PLL_MERGE		1
 
 /** Sort information passed to each individual parallel sort thread */
-typedef struct fts_psort_struct		fts_psort_t;
+struct fts_psort_t;
 
 /** Common info passed to each parallel sort thread */
-struct fts_psort_common_struct {
-	struct TABLE*		table;		/*!< MySQL table */
+struct fts_psort_common_t {
+	row_merge_dup_t*	dup;		/*!< descriptor of FTS index */
 	dict_table_t*		new_table;	/*!< source table */
 	trx_t*			trx;		/*!< transaction */
-	dict_index_t*		sort_index;	/*!< FTS index */
 	fts_psort_t*		all_info;	/*!< all parallel sort info */
 	os_event_t		sort_event;	/*!< sort event */
+	os_event_t		merge_event;	/*!< merge event */
 	ibool			opt_doc_id_size;/*!< whether to use 4 bytes
 						instead of 8 bytes integer to
 						store Doc ID during sort, if
@@ -73,9 +73,7 @@ struct fts_psort_common_struct {
 						to use 8 bytes value */
 };
 
-typedef struct fts_psort_common_struct	fts_psort_common_t;
-
-struct fts_psort_struct {
+struct fts_psort_t {
 	ulint			psort_id;	/*!< Parallel sort ID */
 	row_merge_buf_t*	merge_buf[FTS_NUM_AUX_INDEX];
 						/*!< sort buffer */
@@ -89,6 +87,7 @@ struct fts_psort_struct {
 	ulint			state;		/*!< child thread state */
 	fts_doc_list_t		fts_doc_list;	/*!< doc list to process */
 	fts_psort_common_t*	psort_common;	/*!< ptr to all psort info */
+	os_thread_t		thread_hdl;	/*!< thread handler */
 };
 
 /** Structure stores information from string tokenization operation */
@@ -126,6 +125,7 @@ typedef struct fts_psort_insert	fts_psort_insert_t;
 /** status bit used for communication between parent and child thread */
 #define FTS_PARENT_COMPLETE	1
 #define FTS_CHILD_COMPLETE	1
+#define FTS_CHILD_EXITING	2
 
 /** Print some debug information */
 #define	FTSORT_PRINT
@@ -171,18 +171,19 @@ ibool
 row_fts_psort_info_init(
 /*====================*/
 	trx_t*			trx,	/*!< in: transaction */
-	struct TABLE*		table,	/*!< in: MySQL table object */
+	row_merge_dup_t*	dup,	/*!< in,own: descriptor of
+					FTS index being created */
 	const dict_table_t*	new_table,/*!< in: table where indexes are
 					created */
-	dict_index_t*		index,	/*!< in: FTS index to be created */
 	ibool			opt_doc_id_size,
 					/*!< in: whether to use 4 bytes
 					instead of 8 bytes integer to
 					store Doc ID during sort */
 	fts_psort_t**		psort,	/*!< out: parallel sort info to be
 					instantiated */
-	fts_psort_t**		merge);	/*!< out: parallel merge info
+	fts_psort_t**		merge)	/*!< out: parallel merge info
 					to be instantiated */
+	__attribute__((nonnull));
 /********************************************************************//**
 Clean up and deallocate FTS parallel sort structures, and close
 temparary merge sort files */
@@ -231,19 +232,6 @@ row_fts_start_parallel_merge(
 /*=========================*/
 	fts_psort_t*	merge_info);	/*!< in: parallel sort info */
 /********************************************************************//**
-Insert processed FTS data to the auxillary tables.
-@return DB_SUCCESS if insertion runs fine */
-UNIV_INTERN
-ulint
-row_merge_write_fts_word(
-/*=====================*/
-	trx_t*		trx,		/*!< in: transaction */
-	que_t**		ins_graph,	/*!< in: Insert query graphs */
-	fts_tokenizer_word_t*word,	/*!< in: sorted and tokenized
-					word */
-	fts_table_t*	fts_table,	/*!< in: fts aux table instance */
-	CHARSET_INFO*	charset);	/*!< in: charset */
-/********************************************************************//**
 Read sorted FTS data files and insert data tuples to auxillary tables.
 @return DB_SUCCESS or error number */
 UNIV_INTERN
@@ -275,13 +263,13 @@ Read sorted file containing index data tuples and insert these data
 tuples to the index
 @return DB_SUCCESS or error number */
 UNIV_INTERN
-ulint
+dberr_t
 row_fts_merge_insert(
 /*=================*/
 	dict_index_t*	index,		/*!< in: index */
 	dict_table_t*	table,		/*!< in: new table */
 	fts_psort_t*	psort_info,	/*!< parallel sort info */
-	ulint		id);		/* !< in: which auxiliary table's data
+	ulint		id)		/* !< in: which auxiliary table's data
 					to insert to */
-
+	__attribute__((nonnull));
 #endif /* row0ftsort_h */
diff --git a/storage/innobase/include/row0import.h b/storage/innobase/include/row0import.h
new file mode 100644
index 00000000000..aa46fdb7c27
--- /dev/null
+++ b/storage/innobase/include/row0import.h
@@ -0,0 +1,91 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0import.h
+Header file for import tablespace functions.
+
+Created 2012-02-08 by Sunny Bains
+*******************************************************/
+
+#ifndef row0import_h
+#define row0import_h
+
+#include "univ.i"
+#include "db0err.h"
+#include "dict0types.h"
+
+// Forward declarations
+struct trx_t;
+struct dict_table_t;
+struct row_prebuilt_t;
+
+/*****************************************************************//**
+Imports a tablespace. The space id in the .ibd file must match the space id
+of the table in the data dictionary.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_import_for_mysql(
+/*=================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct
+						in MySQL */
+	__attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
+@return DB_SUCCESS or error code. */
+UNIV_INTERN
+dberr_t
+row_import_update_discarded_flag(
+/*=============================*/
+	trx_t*		trx,			/*!< in/out: transaction that
+						covers the update */
+	table_id_t	table_id,		/*!< in: Table for which we want
+						to set the root table->flags2 */
+	bool		discarded,		/*!< in: set MIX_LEN column bit
+						to discarded, if true */
+	bool		dict_locked)		/*!< in: Set to true if the
+						caller already owns the
+						dict_sys_t:: mutex. */
+	__attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Update the (space, root page) of a table's indexes from the values
+in the data dictionary.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_import_update_index_root(
+/*=========================*/
+	trx_t*			trx,		/*!< in/out: transaction that
+						covers the update */
+	const dict_table_t*	table,		/*!< in: Table for which we want
+						to set the root page_no */
+	bool			reset,		/*!< in: if true then set to
+						FIL_NUL */
+	bool			dict_locked)	/*!< in: Set to true if the
+						caller already owns the
+						dict_sys_t:: mutex. */
+	__attribute__((nonnull, warn_unused_result));
+#ifndef UNIV_NONINL
+#include "row0import.ic"
+#endif
+
+#endif /* row0import_h */
diff --git a/storage/innobase/include/row0import.ic b/storage/innobase/include/row0import.ic
new file mode 100644
index 00000000000..c5bbab49f6f
--- /dev/null
+++ b/storage/innobase/include/row0import.ic
@@ -0,0 +1,25 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0import.ic
+
+Import tablespace inline functions.
+
+Created 2012-02-08 Sunny Bains
+*******************************************************/
diff --git a/storage/innobase/include/row0ins.h b/storage/innobase/include/row0ins.h
index 54ad7241a4f..2a892d2f5df 100644
--- a/storage/innobase/include/row0ins.h
+++ b/storage/innobase/include/row0ins.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -40,7 +40,7 @@ the caller must have a shared latch on dict_foreign_key_check_lock.
 @return DB_SUCCESS, DB_LOCK_WAIT, DB_NO_REFERENCED_ROW, or
 DB_ROW_IS_REFERENCED */
 UNIV_INTERN
-ulint
+dberr_t
 row_ins_check_foreign_constraint(
 /*=============================*/
 	ibool		check_ref,/*!< in: TRUE If we want to check that
@@ -52,7 +52,8 @@ row_ins_check_foreign_constraint(
 	dict_table_t*	table,	/*!< in: if check_ref is TRUE, then the foreign
 				table, else the referenced table */
 	dtuple_t*	entry,	/*!< in: index entry for index */
-	que_thr_t*	thr);	/*!< in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Creates an insert node struct.
 @return	own: insert node struct */
@@ -74,21 +75,110 @@ ins_node_set_new_row(
 	ins_node_t*	node,	/*!< in: insert node */
 	dtuple_t*	row);	/*!< in: new row (or first row) for the node */
 /***************************************************************//**
-Inserts an index entry to index. Tries first optimistic, then pessimistic
-descent down the tree. If the entry matches enough to a delete marked record,
-performs the insert by updating or delete unmarking the delete marked
-record.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+Tries to insert an entry into a clustered index, ignoring foreign key
+constraints. If a record with the same unique key is found, the other
+record is necessarily marked deleted by a committed transaction, or a
+unique key violation error occurs. The delete marked record is then
+updated to an existing record, and we must write an undo log record on
+the delete marked record.
+@retval DB_SUCCESS on success
+@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
+@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
+@return error code */
 UNIV_INTERN
-ulint
-row_ins_index_entry(
-/*================*/
-	dict_index_t*	index,	/*!< in: index */
+dberr_t
+row_ins_clust_index_entry_low(
+/*==========================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+				depending on whether we wish optimistic or
+				pessimistic descent down the index tree */
+	dict_index_t*	index,	/*!< in: clustered index */
+	ulint		n_uniq,	/*!< in: 0 or index->n_uniq */
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	ibool		foreign,/*!< in: TRUE=check foreign key constraints
-				(foreign=FALSE only during CREATE INDEX) */
-	que_thr_t*	thr);	/*!< in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread or NULL */
+	__attribute__((nonnull, warn_unused_result));
+/***************************************************************//**
+Tries to insert an entry into a secondary index. If a record with exactly the
+same fields is found, the other record is necessarily marked deleted.
+It is then unmarked. Otherwise, the entry is just inserted to the index.
+@retval DB_SUCCESS on success
+@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
+@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
+@return error code */
+UNIV_INTERN
+dberr_t
+row_ins_sec_index_entry_low(
+/*========================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+				depending on whether we wish optimistic or
+				pessimistic descent down the index tree */
+	dict_index_t*	index,	/*!< in: secondary index */
+	mem_heap_t*	offsets_heap,
+				/*!< in/out: memory heap that can be emptied */
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	dtuple_t*	entry,	/*!< in/out: index entry to insert */
+	trx_id_t	trx_id,	/*!< in: PAGE_MAX_TRX_ID during
+				row_log_table_apply(), or 0 */
+	que_thr_t*	thr)	/*!< in: query thread */
+	__attribute__((nonnull, warn_unused_result));
+/***************************************************************//**
+Tries to insert the externally stored fields (off-page columns)
+of a clustered index entry.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+UNIV_INTERN
+dberr_t
+row_ins_index_entry_big_rec_func(
+/*=============================*/
+	const dtuple_t*		entry,	/*!< in/out: index entry to insert */
+	const big_rec_t*	big_rec,/*!< in: externally stored fields */
+	ulint*			offsets,/*!< in/out: rec offsets */
+	mem_heap_t**		heap,	/*!< in/out: memory heap */
+	dict_index_t*		index,	/*!< in: index */
+	const char*		file,	/*!< in: file name of caller */
+#ifndef DBUG_OFF
+	const void*		thd,	/*!< in: connection, or NULL */
+#endif /* DBUG_OFF */
+	ulint			line)	/*!< in: line number of caller */
+	__attribute__((nonnull(1,2,3,4,5,6), warn_unused_result));
+#ifdef DBUG_OFF
+# define row_ins_index_entry_big_rec(e,big,ofs,heap,index,thd,file,line) \
+	row_ins_index_entry_big_rec_func(e,big,ofs,heap,index,file,line)
+#else /* DBUG_OFF */
+# define row_ins_index_entry_big_rec(e,big,ofs,heap,index,thd,file,line) \
+	row_ins_index_entry_big_rec_func(e,big,ofs,heap,index,file,thd,line)
+#endif /* DBUG_OFF */
+/***************************************************************//**
+Inserts an entry into a clustered index. Tries first optimistic,
+then pessimistic descent down the tree. If the entry matches enough
+to a delete marked record, performs the insert by updating or delete
+unmarking the delete marked record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+UNIV_INTERN
+dberr_t
+row_ins_clust_index_entry(
+/*======================*/
+	dict_index_t*	index,	/*!< in: clustered index */
+	dtuple_t*	entry,	/*!< in/out: index entry to insert */
+	que_thr_t*	thr,	/*!< in: query thread */
+	ulint		n_ext)	/*!< in: number of externally stored columns */
+	__attribute__((nonnull, warn_unused_result));
+/***************************************************************//**
+Inserts an entry into a secondary index. Tries first optimistic,
+then pessimistic descent down the tree. If the entry matches enough
+to a delete marked record, performs the insert by updating or delete
+unmarking the delete marked record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+UNIV_INTERN
+dberr_t
+row_ins_sec_index_entry(
+/*====================*/
+	dict_index_t*	index,	/*!< in: secondary index */
+	dtuple_t*	entry,	/*!< in/out: index entry to insert */
+	que_thr_t*	thr)	/*!< in: query thread */
+	__attribute__((nonnull, warn_unused_result));
 /***********************************************************//**
 Inserts a row to a table. This is a high-level function used in
 SQL execution graphs.
@@ -98,17 +188,10 @@ que_thr_t*
 row_ins_step(
 /*=========*/
 	que_thr_t*	thr);	/*!< in: query thread */
-/***********************************************************//**
-Creates an entry template for each index of a table. */
-UNIV_INTERN
-void
-ins_node_create_entry_list(
-/*=======================*/
-	ins_node_t*	node);	/*!< in: row insert node */
 
 /* Insert node structure */
 
-struct ins_node_struct{
+struct ins_node_t{
 	que_common_t	common;	/*!< node type: QUE_NODE_INSERT */
 	ulint		ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */
 	dtuple_t*	row;	/*!< row to insert */
diff --git a/storage/innobase/include/row0log.h b/storage/innobase/include/row0log.h
new file mode 100644
index 00000000000..984d907d390
--- /dev/null
+++ b/storage/innobase/include/row0log.h
@@ -0,0 +1,241 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0log.h
+Modification log for online index creation and online table rebuild
+
+Created 2011-05-26 Marko Makela
+*******************************************************/
+
+#ifndef row0log_h
+#define row0log_h
+
+#include "univ.i"
+#include "mtr0types.h"
+#include "row0types.h"
+#include "rem0types.h"
+#include "data0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+
+/******************************************************//**
+Allocate the row log for an index and flag the index
+for online creation.
+@retval true if success, false if not */
+UNIV_INTERN
+bool
+row_log_allocate(
+/*=============*/
+	dict_index_t*	index,	/*!< in/out: index */
+	dict_table_t*	table,	/*!< in/out: new table being rebuilt,
+				or NULL when creating a secondary index */
+	bool		same_pk,/*!< in: whether the definition of the
+				PRIMARY KEY has remained the same */
+	const dtuple_t*	add_cols,
+				/*!< in: default values of
+				added columns, or NULL */
+	const ulint*	col_map)/*!< in: mapping of old column
+				numbers to new ones, or NULL if !table */
+	__attribute__((nonnull(1), warn_unused_result));
+
+/******************************************************//**
+Free the row log for an index that was being created online. */
+UNIV_INTERN
+void
+row_log_free(
+/*=========*/
+	row_log_t*&	log)	/*!< in,own: row log */
+	__attribute__((nonnull));
+
+/******************************************************//**
+Free the row log for an index on which online creation was aborted. */
+UNIV_INLINE
+void
+row_log_abort_sec(
+/*==============*/
+	dict_index_t*	index)	/*!< in/out: index (x-latched) */
+	__attribute__((nonnull));
+
+/******************************************************//**
+Try to log an operation to a secondary index that is
+(or was) being created.
+@retval	true if the operation was logged or can be ignored
+@retval	false if online index creation is not taking place */
+UNIV_INLINE
+bool
+row_log_online_op_try(
+/*==================*/
+	dict_index_t*	index,	/*!< in/out: index, S or X latched */
+	const dtuple_t* tuple,	/*!< in: index tuple */
+	trx_id_t	trx_id)	/*!< in: transaction ID for insert,
+				or 0 for delete */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************//**
+Logs an operation to a secondary index that is (or was) being created. */
+UNIV_INTERN
+void
+row_log_online_op(
+/*==============*/
+	dict_index_t*	index,	/*!< in/out: index, S or X latched */
+	const dtuple_t*	tuple,	/*!< in: index tuple */
+	trx_id_t	trx_id)	/*!< in: transaction ID for insert,
+				or 0 for delete */
+	UNIV_COLD __attribute__((nonnull));
+
+/******************************************************//**
+Gets the error status of the online index rebuild log.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_log_table_get_error(
+/*====================*/
+	const dict_index_t*	index)	/*!< in: clustered index of a table
+					that is being rebuilt online */
+	__attribute__((nonnull, warn_unused_result));
+
+/******************************************************//**
+Logs a delete operation to a table that is being rebuilt.
+This will be merged in row_log_table_apply_delete(). */
+UNIV_INTERN
+void
+row_log_table_delete(
+/*=================*/
+	const rec_t*	rec,	/*!< in: clustered index leaf page record,
+				page X-latched */
+	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
+				or X-latched */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
+	trx_id_t	trx_id)	/*!< in: DB_TRX_ID of the record before
+				it was deleted */
+	UNIV_COLD __attribute__((nonnull));
+
+/******************************************************//**
+Logs an update operation to a table that is being rebuilt.
+This will be merged in row_log_table_apply_update(). */
+UNIV_INTERN
+void
+row_log_table_update(
+/*=================*/
+	const rec_t*	rec,	/*!< in: clustered index leaf page record,
+				page X-latched */
+	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
+				or X-latched */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
+	const dtuple_t*	old_pk)	/*!< in: row_log_table_get_pk()
+				before the update */
+	UNIV_COLD __attribute__((nonnull(1,2,3)));
+
+/******************************************************//**
+Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
+of a table that is being rebuilt.
+@return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table,
+or NULL if the PRIMARY KEY definition does not change */
+UNIV_INTERN
+const dtuple_t*
+row_log_table_get_pk(
+/*=================*/
+	const rec_t*	rec,	/*!< in: clustered index leaf page record,
+				page X-latched */
+	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
+				or X-latched */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index),
+				or NULL */
+	mem_heap_t**	heap)	/*!< in/out: memory heap where allocated */
+	UNIV_COLD __attribute__((nonnull(1,2,4), warn_unused_result));
+
+/******************************************************//**
+Logs an insert to a table that is being rebuilt.
+This will be merged in row_log_table_apply_insert(). */
+UNIV_INTERN
+void
+row_log_table_insert(
+/*=================*/
+	const rec_t*	rec,	/*!< in: clustered index leaf page record,
+				page X-latched */
+	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
+				or X-latched */
+	const ulint*	offsets)/*!< in: rec_get_offsets(rec,index) */
+	UNIV_COLD __attribute__((nonnull));
+
+/******************************************************//**
+Notes that a transaction is being rolled back. */
+UNIV_INTERN
+void
+row_log_table_rollback(
+/*===================*/
+	dict_index_t*	index,	/*!< in/out: clustered index */
+	trx_id_t	trx_id)	/*!< in: transaction being rolled back */
+	UNIV_COLD __attribute__((nonnull));
+
+/******************************************************//**
+Check if a transaction rollback has been initiated.
+@return true if inserts of this transaction were rolled back */
+UNIV_INTERN
+bool
+row_log_table_is_rollback(
+/*======================*/
+	const dict_index_t*	index,	/*!< in: clustered index */
+	trx_id_t		trx_id)	/*!< in: transaction id */
+	__attribute__((nonnull));
+
+/******************************************************//**
+Apply the row_log_table log to a table upon completing rebuild.
+@return DB_SUCCESS, or error code on failure */
+UNIV_INTERN
+dberr_t
+row_log_table_apply(
+/*================*/
+	que_thr_t*	thr,	/*!< in: query graph */
+	dict_table_t*	old_table,
+				/*!< in: old table */
+	struct TABLE*	table)	/*!< in/out: MySQL table
+				(for reporting duplicates) */
+	__attribute__((nonnull, warn_unused_result));
+
+/******************************************************//**
+Get the latest transaction ID that has invoked row_log_online_op()
+during online creation.
+@return latest transaction ID, or 0 if nothing was logged */
+UNIV_INTERN
+trx_id_t
+row_log_get_max_trx(
+/*================*/
+	dict_index_t*	index)	/*!< in: index, must be locked */
+	__attribute__((nonnull, warn_unused_result));
+
+/******************************************************//**
+Merge the row log to the index upon completing index creation.
+@return DB_SUCCESS, or error code on failure */
+UNIV_INTERN
+dberr_t
+row_log_apply(
+/*==========*/
+	trx_t*		trx,	/*!< in: transaction (for checking if
+				the operation was interrupted) */
+	dict_index_t*	index,	/*!< in/out: secondary index */
+	struct TABLE*	table)	/*!< in/out: MySQL table
+				(for reporting duplicates) */
+	__attribute__((nonnull, warn_unused_result));
+
+#ifndef UNIV_NONINL
+#include "row0log.ic"
+#endif
+
+#endif /* row0log.h */
diff --git a/storage/innobase/include/row0log.ic b/storage/innobase/include/row0log.ic
new file mode 100644
index 00000000000..b0f37dbd8e7
--- /dev/null
+++ b/storage/innobase/include/row0log.ic
@@ -0,0 +1,84 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0log.ic
+Modification log for online index creation and online table rebuild
+
+Created 2012-10-18 Marko Makela
+*******************************************************/
+
+#include "dict0dict.h"
+
+/******************************************************//**
+Free the row log for an index on which online creation was aborted. */
+UNIV_INLINE
+void
+row_log_abort_sec(
+/*===============*/
+	dict_index_t*	index)	/*!< in/out: index (x-latched) */
+{
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(!dict_index_is_clust(index));
+	dict_index_set_online_status(index, ONLINE_INDEX_ABORTED);
+	row_log_free(index->online_log);
+}
+
+/******************************************************//**
+Try to log an operation to a secondary index that is
+(or was) being created.
+@retval	true if the operation was logged or can be ignored
+@retval	false if online index creation is not taking place */
+UNIV_INLINE
+bool
+row_log_online_op_try(
+/*==================*/
+	dict_index_t*	index,	/*!< in/out: index, S or X latched */
+	const dtuple_t* tuple,	/*!< in: index tuple */
+	trx_id_t	trx_id)	/*!< in: transaction ID for insert,
+				or 0 for delete */
+{
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
+	      || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	switch (dict_index_get_online_status(index)) {
+	case ONLINE_INDEX_COMPLETE:
+		/* This is a normal index. Do not log anything.
+		The caller must perform the operation on the
+		index tree directly. */
+		return(false);
+	case ONLINE_INDEX_CREATION:
+		/* The index is being created online. Log the
+		operation. */
+		row_log_online_op(index, tuple, trx_id);
+		break;
+	case ONLINE_INDEX_ABORTED:
+	case ONLINE_INDEX_ABORTED_DROPPED:
+		/* The index was created online, but the operation was
+		aborted. Do not log the operation and tell the caller
+		to skip the operation. */
+		break;
+	}
+
+	return(true);
+}
diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h
index c4e2f5ddf41..f464e46ae5b 100644
--- a/storage/innobase/include/row0merge.h
+++ b/storage/innobase/include/row0merge.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2005, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -40,15 +40,17 @@ Created 13/06/2005 Jan Lindstrom
 #include "lock0types.h"
 #include "srv0srv.h"
 
+// Forward declaration
+struct ib_sequence_t;
+
 /** @brief Block size for I/O operations in merge sort.
 
 The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty()
 rounded to a power of 2.
 
 When not creating a PRIMARY KEY that contains column prefixes, this
-can be set as small as UNIV_PAGE_SIZE / 2.  See the comment above
-ut_ad(data_size < sizeof(row_merge_block_t)). */
-typedef byte   row_merge_block_t;
+can be set as small as UNIV_PAGE_SIZE / 2. */
+typedef byte	row_merge_block_t;
 
 /** @brief Secondary buffer for I/O operations of merge records.
 
@@ -64,114 +66,146 @@ The format is the same as a record in ROW_FORMAT=COMPACT with the
 exception that the REC_N_NEW_EXTRA_BYTES are omitted. */
 typedef byte	mrec_t;
 
+/** Merge record in row_merge_buf_t */
+struct mtuple_t {
+	dfield_t*	fields;		/*!< data fields */
+};
+
 /** Buffer for sorting in main memory. */
-struct row_merge_buf_struct {
+struct row_merge_buf_t {
 	mem_heap_t*	heap;		/*!< memory heap where allocated */
 	dict_index_t*	index;		/*!< the index the tuples belong to */
 	ulint		total_size;	/*!< total amount of data bytes */
 	ulint		n_tuples;	/*!< number of data tuples */
 	ulint		max_tuples;	/*!< maximum number of data tuples */
-	const dfield_t**tuples;		/*!< array of pointers to
-					arrays of fields that form
-					the data tuples */
-	const dfield_t**tmp_tuples;	/*!< temporary copy of tuples,
+	mtuple_t*	tuples;		/*!< array of data tuples */
+	mtuple_t*	tmp_tuples;	/*!< temporary copy of tuples,
 					for sorting */
 };
 
-/** Buffer for sorting in main memory. */
-typedef struct row_merge_buf_struct	row_merge_buf_t;
-
 /** Information about temporary files used in merge sort */
-struct merge_file_struct {
+struct merge_file_t {
 	int		fd;		/*!< file descriptor */
 	ulint		offset;		/*!< file offset (end of file) */
 	ib_uint64_t	n_rec;		/*!< number of records in the file */
 };
 
-/** Information about temporary files used in merge sort */
-typedef struct merge_file_struct	merge_file_t;
-
 /** Index field definition */
-struct merge_index_field_struct {
+struct index_field_t {
+	ulint		col_no;		/*!< column offset */
 	ulint		prefix_len;	/*!< column prefix length, or 0
 					if indexing the whole column */
-	const char*	field_name;	/*!< field name */
 };
 
-/** Index field definition */
-typedef struct merge_index_field_struct	merge_index_field_t;
-
 /** Definition of an index being created */
-struct merge_index_def_struct {
-	const char*		name;		/*!< index name */
-	ulint			ind_type;	/*!< 0, DICT_UNIQUE,
-						or DICT_CLUSTERED */
-	ulint			n_fields;	/*!< number of fields
-						in index */
-	merge_index_field_t*	fields;		/*!< field definitions */
+struct index_def_t {
+	const char*	name;		/*!< index name */
+	ulint		ind_type;	/*!< 0, DICT_UNIQUE,
+					or DICT_CLUSTERED */
+	ulint		key_number;	/*!< MySQL key number,
+					or ULINT_UNDEFINED if none */
+	ulint		n_fields;	/*!< number of fields in index */
+	index_field_t*	fields;		/*!< field definitions */
 };
 
-/** Definition of an index being created */
-typedef struct merge_index_def_struct	merge_index_def_t;
-
 /** Structure for reporting duplicate records. */
-struct row_merge_dup_struct {
-	const dict_index_t*	index;		/*!< index being sorted */
-	struct TABLE*		table;		/*!< MySQL table object */
-	ulint			n_dup;		/*!< number of duplicates */
+struct row_merge_dup_t {
+	dict_index_t*		index;	/*!< index being sorted */
+	struct TABLE*		table;	/*!< MySQL table object */
+	const ulint*		col_map;/*!< mapping of column numbers
+					in table to the rebuilt table
+					(index->table), or NULL if not
+					rebuilding table */
+	ulint			n_dup;	/*!< number of duplicates */
 };
 
-/** Structure for reporting duplicate records. */
-typedef struct row_merge_dup_struct row_merge_dup_t;
-
+/*************************************************************//**
+Report a duplicate key. */
+UNIV_INTERN
+void
+row_merge_dup_report(
+/*=================*/
+	row_merge_dup_t*	dup,	/*!< in/out: for reporting duplicates */
+	const dfield_t*		entry)	/*!< in: duplicate index entry */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Sets an exclusive lock on a table, for the duration of creating indexes.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 row_merge_lock_table(
 /*=================*/
 	trx_t*		trx,		/*!< in/out: transaction */
 	dict_table_t*	table,		/*!< in: table to lock */
-	enum lock_mode	mode);		/*!< in: LOCK_X or LOCK_S */
+	enum lock_mode	mode)		/*!< in: LOCK_X or LOCK_S */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
-Drop an index from the InnoDB system tables.  The data dictionary must
-have been locked exclusively by the caller, because the transaction
-will not be committed. */
+Drop indexes that were created before an error occurred.
+The data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
 UNIV_INTERN
 void
-row_merge_drop_index(
-/*=================*/
-	dict_index_t*	index,	/*!< in: index to be removed */
-	dict_table_t*	table,	/*!< in: table */
-	trx_t*		trx);	/*!< in: transaction handle */
+row_merge_drop_indexes_dict(
+/*========================*/
+	trx_t*		trx,	/*!< in/out: dictionary transaction */
+	table_id_t	table_id)/*!< in: table identifier */
+	__attribute__((nonnull));
 /*********************************************************************//**
-Drop those indexes which were created before an error occurred when
-building an index.  The data dictionary must have been locked
-exclusively by the caller, because the transaction will not be
-committed. */
+Drop those indexes which were created before an error occurred.
+The data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
 UNIV_INTERN
 void
 row_merge_drop_indexes(
 /*===================*/
-	trx_t*		trx,		/*!< in: transaction */
-	dict_table_t*	table,		/*!< in: table containing the indexes */
-	dict_index_t**	index,		/*!< in: indexes to drop */
-	ulint		num_created);	/*!< in: number of elements in
-					index[] */
+	trx_t*		trx,	/*!< in/out: transaction */
+	dict_table_t*	table,	/*!< in/out: table containing the indexes */
+	ibool		locked)	/*!< in: TRUE=table locked,
+				FALSE=may need to do a lazy drop */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Drop all partially created indexes during crash recovery. */
 UNIV_INTERN
 void
 row_merge_drop_temp_indexes(void);
 /*=============================*/
+
+/*********************************************************************//**
+Creates temporary merge files, and if UNIV_PFS_IO defined, register
+the file descriptor with Performance Schema.
+@return File descriptor */
+UNIV_INTERN
+int
+row_merge_file_create_low(void)
+/*===========================*/
+	__attribute__((warn_unused_result));
+/*********************************************************************//**
+Destroy a merge file. And de-register the file from Performance Schema
+if UNIV_PFS_IO is defined. */
+UNIV_INTERN
+void
+row_merge_file_destroy_low(
+/*=======================*/
+	int		fd);	/*!< in: merge file descriptor */
+
+/*********************************************************************//**
+Provide a new pathname for a table that is being renamed if it belongs to
+a file-per-table tablespace.  The caller is responsible for freeing the
+memory allocated for the return value.
+@return	new pathname of tablespace file, or NULL if space = 0 */
+UNIV_INTERN
+char*
+row_make_new_pathname(
+/*==================*/
+	dict_table_t*	table,		/*!< in: table to be renamed */
+	const char*	new_name);	/*!< in: new name */
 /*********************************************************************//**
 Rename the tables in the data dictionary.  The data dictionary must
 have been locked exclusively by the caller, because the transaction
 will not be committed.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 row_merge_rename_tables(
 /*====================*/
 	dict_table_t*	old_table,	/*!< in/out: old table, renamed to
@@ -179,32 +213,35 @@ row_merge_rename_tables(
 	dict_table_t*	new_table,	/*!< in/out: new table, renamed to
 					old_table->name */
 	const char*	tmp_name,	/*!< in: new name for old_table */
-	trx_t*		trx);		/*!< in: transaction handle */
+	trx_t*		trx)		/*!< in: transaction handle */
+	__attribute__((nonnull, warn_unused_result));
+
 /*********************************************************************//**
-Create a temporary table for creating a primary key, using the definition
-of an existing table.
-@return	table, or NULL on error */
+Rename an index in the dictionary that was created. The data
+dictionary must have been locked exclusively by the caller, because
+the transaction will not be committed.
+@return	DB_SUCCESS if all OK */
 UNIV_INTERN
-dict_table_t*
-row_merge_create_temporary_table(
-/*=============================*/
-	const char*		table_name,	/*!< in: new table name */
-	const merge_index_def_t*index_def,	/*!< in: the index definition
-						of the primary key */
-	const dict_table_t*	table,		/*!< in: old table definition */
-	trx_t*			trx);		/*!< in/out: transaction
-						(sets error_state) */
+dberr_t
+row_merge_rename_index_to_add(
+/*==========================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	table_id_t	table_id,	/*!< in: table identifier */
+	index_id_t	index_id)	/*!< in: index identifier */
+	__attribute__((nonnull));
 /*********************************************************************//**
-Rename the temporary indexes in the dictionary to permanent ones.  The
-data dictionary must have been locked exclusively by the caller,
-because the transaction will not be committed.
+Rename an index in the dictionary that is to be dropped. The data
+dictionary must have been locked exclusively by the caller, because
+the transaction will not be committed.
 @return	DB_SUCCESS if all OK */
 UNIV_INTERN
-ulint
-row_merge_rename_indexes(
-/*=====================*/
+dberr_t
+row_merge_rename_index_to_drop(
+/*===========================*/
 	trx_t*		trx,		/*!< in/out: transaction */
-	dict_table_t*	table);		/*!< in/out: table with new indexes */
+	table_id_t	table_id,	/*!< in: table identifier */
+	index_id_t	index_id)	/*!< in: index identifier */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Create the index and load in to the dictionary.
 @return	index, or NULL on error */
@@ -214,7 +251,7 @@ row_merge_create_index(
 /*===================*/
 	trx_t*			trx,	/*!< in/out: trx (sets error_state) */
 	dict_table_t*		table,	/*!< in: the index is on this table */
-	const merge_index_def_t*index_def);
+	const index_def_t*	index_def);
 					/*!< in: the index definition */
 /*********************************************************************//**
 Check if a transaction can use an index.
@@ -226,22 +263,25 @@ row_merge_is_index_usable(
 	const trx_t*		trx,	/*!< in: transaction */
 	const dict_index_t*	index);	/*!< in: index to check */
 /*********************************************************************//**
-If there are views that refer to the old table name then we "attach" to
-the new instance of the table else we drop it immediately.
+Drop a table. The caller must have ensured that the background stats
+thread is not processing the table. This can be done by calling
+dict_stats_wait_bg_to_stop_using_tables() after locking the dictionary and
+before calling this function.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 row_merge_drop_table(
 /*=================*/
 	trx_t*		trx,		/*!< in: transaction */
-	dict_table_t*	table);		/*!< in: table instance to drop */
+	dict_table_t*	table)		/*!< in: table instance to drop */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Build indexes on a table by reading a clustered index,
 creating a temporary file containing index entries, merge sorting
 these index entries and inserting sorted index entries to indexes.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 row_merge_build_indexes(
 /*====================*/
 	trx_t*		trx,		/*!< in: transaction */
@@ -250,11 +290,24 @@ row_merge_build_indexes(
 	dict_table_t*	new_table,	/*!< in: table where indexes are
 					created; identical to old_table
 					unless creating a PRIMARY KEY */
+	bool		online,		/*!< in: true if creating indexes
+					online */
 	dict_index_t**	indexes,	/*!< in: indexes to be created */
+	const ulint*	key_numbers,	/*!< in: MySQL key numbers */
 	ulint		n_indexes,	/*!< in: size of indexes[] */
-	struct TABLE*	table);		/*!< in/out: MySQL table, for
+	struct TABLE*	table,		/*!< in/out: MySQL table, for
 					reporting erroneous key value
 					if applicable */
+	const dtuple_t*	add_cols,	/*!< in: default values of
+					added columns, or NULL */
+	const ulint*	col_map,	/*!< in: mapping of old column
+					numbers to new ones, or NULL
+					if old_table == new_table */
+	ulint		add_autoinc,	/*!< in: number of added
+					AUTO_INCREMENT column, or
+					ULINT_UNDEFINED if none is added */
+	ib_sequence_t&	sequence)	/*!< in/out: autoinc sequence */
+	__attribute__((nonnull(1,2,3,5,6,8), warn_unused_result));
 /********************************************************************//**
 Write a buffer to a block. */
 UNIV_INTERN
@@ -263,15 +316,18 @@ row_merge_buf_write(
 /*================*/
 	const row_merge_buf_t*	buf,	/*!< in: sorted buffer */
 	const merge_file_t*	of,	/*!< in: output file */
-	row_merge_block_t*	block);	/*!< out: buffer for writing to file */
+	row_merge_block_t*	block)	/*!< out: buffer for writing to file */
+	__attribute__((nonnull));
 /********************************************************************//**
 Sort a buffer. */
 UNIV_INTERN
 void
 row_merge_buf_sort(
 /*===============*/
-        row_merge_buf_t*        buf,    /*!< in/out: sort buffer */
-        row_merge_dup_t*        dup);	/*!< in/out: for reporting duplicates */
+	row_merge_buf_t*	buf,	/*!< in/out: sort buffer */
+	row_merge_dup_t*	dup)	/*!< in/out: reporter of duplicates
+					(NULL if non-unique index) */
+	__attribute__((nonnull(1)));
 /********************************************************************//**
 Write a merge block to the file system.
 @return TRUE if request was successful, FALSE if fail */
@@ -290,30 +346,32 @@ UNIV_INTERN
 row_merge_buf_t*
 row_merge_buf_empty(
 /*================*/
-        row_merge_buf_t*        buf);    /*!< in,own: sort buffer */
+	row_merge_buf_t*	buf)	/*!< in,own: sort buffer */
+	__attribute__((warn_unused_result, nonnull));
 /*********************************************************************//**
-Create a merge file. */
+Create a merge file.
+@return file descriptor, or -1 on failure */
 UNIV_INTERN
-void
+int
 row_merge_file_create(
 /*==================*/
-        merge_file_t*   merge_file);     /*!< out: merge file structure */
+	merge_file_t*	merge_file)	/*!< out: merge file structure */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Merge disk files.
 @return DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 row_merge_sort(
 /*===========*/
 	trx_t*			trx,	/*!< in: transaction */
-	const dict_index_t*	index,	/*!< in: index being created */
+	const row_merge_dup_t*	dup,	/*!< in: descriptor of
+					index being created */
 	merge_file_t*		file,	/*!< in/out: file containing
 					index entries */
 	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
-	int*			tmpfd,	/*!< in/out: temporary file handle */
-	struct TABLE*		table);	/*!< in/out: MySQL table, for
-					reporting erroneous key value
-					if applicable */
+	int*			tmpfd)	/*!< in/out: temporary file handle */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Allocate a sort buffer.
 @return own: sort buffer */
@@ -321,37 +379,24 @@ UNIV_INTERN
 row_merge_buf_t*
 row_merge_buf_create(
 /*=================*/
-        dict_index_t*   index);  /*!< in: secondary index */
+	dict_index_t*	index)	/*!< in: secondary index */
+	__attribute__((warn_unused_result, nonnull, malloc));
 /*********************************************************************//**
 Deallocate a sort buffer. */
 UNIV_INTERN
 void
 row_merge_buf_free(
 /*===============*/
-	row_merge_buf_t*	buf);    /*!< in,own: sort buffer, to be freed */
+	row_merge_buf_t*	buf)	/*!< in,own: sort buffer to be freed */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Destroy a merge file. */
 UNIV_INTERN
 void
 row_merge_file_destroy(
 /*===================*/
-	merge_file_t*	merge_file);	/*!< out: merge file structure */
-/*********************************************************************//**
-Compare two merge records.
-@return 1, 0, -1 if mrec1 is greater, equal, less, respectively, than mrec2 */
-UNIV_INTERN
-int
-row_merge_cmp(
-/*==========*/
-	const mrec_t*		mrec1,		/*!< in: first merge
-						record to be compared */
-	const mrec_t*		mrec2,		/*!< in: second merge
-						record to be compared */
-	const ulint*		offsets1,	/*!< in: first record offsets */
-	const ulint*		offsets2,	/*!< in: second record offsets */
-	const dict_index_t*	index,		/*!< in: index */
-	ibool*			null_eq);	/*!< out: set to TRUE if
-						found matching null values */
+	merge_file_t*	merge_file)	/*!< in/out: merge file structure */
+	__attribute__((nonnull));
 /********************************************************************//**
 Read a merge block from the file system.
 @return TRUE if request was successful, FALSE if fail */
@@ -367,7 +412,7 @@ row_merge_read(
 /********************************************************************//**
 Read a merge record.
 @return pointer to next record, or NULL on I/O error or end of list */
-UNIV_INTERN __attribute__((nonnull))
+UNIV_INTERN
 const byte*
 row_merge_read_rec(
 /*===============*/
@@ -380,5 +425,6 @@ row_merge_read_rec(
 	const mrec_t**		mrec,	/*!< out: pointer to merge record,
 					or NULL on end of list
 					(non-NULL on I/O error) */
-	ulint*			offsets);/*!< out: offsets of mrec */
+	ulint*			offsets)/*!< out: offsets of mrec */
+	__attribute__((nonnull, warn_unused_result));
 #endif /* row0merge.h */
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
index 17a29e38ec7..1e0f3b30f8c 100644
--- a/storage/innobase/include/row0mysql.h
+++ b/storage/innobase/include/row0mysql.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2000, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -36,9 +36,12 @@ Created 9/17/2000 Heikki Tuuri
 #include "btr0pcur.h"
 #include "trx0types.h"
 
+// Forward declaration
+struct SysIndexCallback;
+
 extern ibool row_rollback_on_timeout;
 
-typedef struct row_prebuilt_struct row_prebuilt_t;
+struct row_prebuilt_t;
 
 /*******************************************************************//**
 Frees the blob heap in prebuilt when no longer needed. */
@@ -152,18 +155,19 @@ row_mysql_store_col_in_innobase_format(
 	ulint		comp);		/*!< in: nonzero=compact format */
 /****************************************************************//**
 Handles user errors and lock waits detected by the database engine.
-@return TRUE if it was a lock wait and we should continue running the
+@return true if it was a lock wait and we should continue running the
 query thread */
 UNIV_INTERN
-ibool
+bool
 row_mysql_handle_errors(
 /*====================*/
-	ulint*		new_err,/*!< out: possible new error encountered in
+	dberr_t*	new_err,/*!< out: possible new error encountered in
 				rollback, or the old error which was
 				during the function entry */
 	trx_t*		trx,	/*!< in: transaction */
-	que_thr_t*	thr,	/*!< in: query thread */
-	trx_savept_t*	savept);/*!< in: savepoint */
+	que_thr_t*	thr,	/*!< in: query thread, or NULL */
+	trx_savept_t*	savept)	/*!< in: savepoint, or NULL */
+	__attribute__((nonnull(1,2)));
 /********************************************************************//**
 Create a prebuilt struct for a MySQL table handle.
 @return	own: a prebuilt struct */
@@ -200,16 +204,17 @@ It is not compatible with another AUTO_INC or exclusive lock on the
 table.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_lock_table_autoinc_for_mysql(
 /*=============================*/
-	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct in the MySQL
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in the MySQL
 					table handle */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Sets a table lock on the table mentioned in prebuilt.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_lock_table_for_mysql(
 /*=====================*/
 	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct in the MySQL
@@ -218,19 +223,20 @@ row_lock_table_for_mysql(
 					if prebuilt->table should be
 					locked as
 					prebuilt->select_lock_type */
-	ulint		mode);		/*!< in: lock mode of table
+	ulint		mode)		/*!< in: lock mode of table
 					(ignored if table==NULL) */
-
+	__attribute__((nonnull(1)));
 /*********************************************************************//**
 Does an insert for MySQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_insert_for_mysql(
 /*=================*/
 	byte*		mysql_rec,	/*!< in: row in the MySQL format */
-	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct in MySQL
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
 					handle */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Builds a dummy query graph used in selects. */
 UNIV_INTERN
@@ -263,13 +269,14 @@ row_table_got_default_clust_index(
 Does an update or delete of a row for MySQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_update_for_mysql(
 /*=================*/
 	byte*		mysql_rec,	/*!< in: the row to be updated, in
 					the MySQL format */
-	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct in MySQL
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
 					handle */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
 session is using a READ COMMITTED or READ UNCOMMITTED isolation level.
@@ -278,19 +285,31 @@ initialized prebuilt->new_rec_locks to store the information which new
 record locks really were set. This function removes a newly set
 clustered index record lock under prebuilt->pcur or
 prebuilt->clust_pcur.  Thus, this implements a 'mini-rollback' that
-releases the latest clustered index record lock we set.
-@return error code or DB_SUCCESS */
+releases the latest clustered index record lock we set. */
 UNIV_INTERN
-int
+void
 row_unlock_for_mysql(
 /*=================*/
 	row_prebuilt_t*	prebuilt,	/*!< in/out: prebuilt struct in MySQL
 					handle */
-	ibool		has_latches_on_recs);/*!< in: TRUE if called
+	ibool		has_latches_on_recs)/*!< in: TRUE if called
 					so that we have the latches on
 					the records under pcur and
 					clust_pcur, and we do not need
 					to reposition the cursors. */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Checks if a table name contains the string "/#sql" which denotes temporary
+tables in MySQL.
+@return true if temporary table */
+UNIV_INTERN
+bool
+row_is_mysql_tmp_table_name(
+/*========================*/
+	const char*	name) __attribute__((warn_unused_result));
+				/*!< in: table name in the form
+				'database/tablename' */
+
 /*********************************************************************//**
 Creates an query graph node of 'update' type to be used in the MySQL
 interface.
@@ -305,13 +324,14 @@ row_create_update_node_for_mysql(
 Does a cascaded delete or set null in a foreign key operation.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 row_update_cascade_for_mysql(
 /*=========================*/
 	que_thr_t*	thr,	/*!< in: query thread */
 	upd_node_t*	node,	/*!< in: update node used in the cascade
 				or set null operation */
-	dict_table_t*	table);	/*!< in: table where we do the operation */
+	dict_table_t*	table)	/*!< in: table where we do the operation */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Locks the data dictionary exclusively for performing a table create or other
 data dictionary modification operation. */
@@ -355,33 +375,38 @@ Creates a table for MySQL. If the name of the table ends in
 one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
 "innodb_table_monitor", then this will also start the printing of monitor
 output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate().
+InnoDB will try to invoke mem_validate(). On failure the transaction will
+be rolled back.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_create_table_for_mysql(
 /*=======================*/
-	dict_table_t*	table,		/*!< in, own: table definition
-					(will be freed) */
-	trx_t*		trx);		/*!< in: transaction handle */
+	dict_table_t*	table,	/*!< in, own: table definition
+				(will be freed, or on DB_SUCCESS
+				added to the data dictionary cache) */
+	trx_t*		trx,	/*!< in/out: transaction */
+	bool		commit)	/*!< in: if true, commit the transaction */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Does an index creation operation for MySQL. TODO: currently failure
 to create an index results in dropping the whole table! This is no problem
 currently as all indexes must be created at the same time as the table.
 @return	error number or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_create_index_for_mysql(
 /*=======================*/
 	dict_index_t*	index,		/*!< in, own: index definition
 					(will be freed) */
 	trx_t*		trx,		/*!< in: transaction handle */
-	const ulint*	field_lengths); /*!< in: if not NULL, must contain
+	const ulint*	field_lengths)	/*!< in: if not NULL, must contain
 					dict_index_get_n_fields(index)
 					actual field lengths for the
 					index columns, which are
 					then checked for not being too
 					large. */
+	__attribute__((nonnull(1,2), warn_unused_result));
 /*********************************************************************//**
 Scans a table create SQL string and adds to the data dictionary
 the foreign key constraints declared in the string. This function
@@ -391,7 +416,7 @@ bot participating tables. The indexes are allowed to contain more
 fields than mentioned in the constraint.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_table_add_foreign_constraints(
 /*==============================*/
 	trx_t*		trx,		/*!< in: transaction */
@@ -404,10 +429,10 @@ row_table_add_foreign_constraints(
 	const char*	name,		/*!< in: table full name in the
 					normalized form
 					database_name/table_name */
-	ibool		reject_fks);	/*!< in: if TRUE, fail with error
+	ibool		reject_fks)	/*!< in: if TRUE, fail with error
 					code DB_CANNOT_ADD_CONSTRAINT if
 					any foreign keys are found. */
-
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 The master thread in srv0srv.cc calls this regularly to drop tables which
 we must drop in background after queries to them have ended. Such lazy
@@ -426,14 +451,28 @@ ulint
 row_get_background_drop_list_len_low(void);
 /*======================================*/
 /*********************************************************************//**
+Sets an exclusive lock on a table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_mysql_lock_table(
+/*=================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	dict_table_t*	table,		/*!< in: table to lock */
+	enum lock_mode	mode,		/*!< in: LOCK_X or LOCK_S */
+	const char*	op_info)	/*!< in: string for trx->op_info */
+	__attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
 Truncates a table for MySQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_truncate_table_for_mysql(
 /*=========================*/
 	dict_table_t*	table,	/*!< in: table handle */
-	trx_t*		trx);	/*!< in: transaction handle */
+	trx_t*		trx)	/*!< in: transaction handle */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Drops a table for MySQL.  If the name of the dropped table ends in
 one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
@@ -443,12 +482,16 @@ by the transaction, the transaction will be committed.  Otherwise, the
 data dictionary will remain locked.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_drop_table_for_mysql(
 /*=====================*/
 	const char*	name,	/*!< in: table name */
-	trx_t*		trx,	/*!< in: transaction handle */
-	ibool		drop_db);/*!< in: TRUE=dropping whole database */
+	trx_t*		trx,	/*!< in: dictionary transaction handle */
+	bool		drop_db,/*!< in: true=dropping whole database */
+	bool		nonatomic = true)
+				/*!< in: whether it is permitted
+				to release and reacquire dict_operation_lock */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Drop all temporary tables during crash recovery. */
 UNIV_INTERN
@@ -462,66 +505,70 @@ means that this function deletes the .ibd file and assigns a new table id for
 the table. Also the flag table->ibd_file_missing is set TRUE.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_discard_tablespace_for_mysql(
 /*=============================*/
 	const char*	name,	/*!< in: table name */
-	trx_t*		trx);	/*!< in: transaction handle */
+	trx_t*		trx)	/*!< in: transaction handle */
+	__attribute__((nonnull, warn_unused_result));
 /*****************************************************************//**
 Imports a tablespace. The space id in the .ibd file must match the space id
 of the table in the data dictionary.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_import_tablespace_for_mysql(
 /*============================*/
-	const char*	name,	/*!< in: table name */
-	trx_t*		trx);	/*!< in: transaction handle */
+	dict_table_t*	table,		/*!< in/out: table */
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL */
+        __attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Drops a database for MySQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_drop_database_for_mysql(
 /*========================*/
 	const char*	name,	/*!< in: database name which ends to '/' */
-	trx_t*		trx);	/*!< in: transaction handle */
+	trx_t*		trx)	/*!< in: transaction handle */
+	__attribute__((nonnull));
 /*********************************************************************//**
 Renames a table for MySQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 row_rename_table_for_mysql(
 /*=======================*/
 	const char*	old_name,	/*!< in: old table name */
 	const char*	new_name,	/*!< in: new table name */
-	trx_t*		trx,		/*!< in: transaction handle */
-	ibool		commit);	/*!< in: if TRUE then commit trx */
+	trx_t*		trx,		/*!< in/out: transaction */
+	bool		commit)		/*!< in: whether to commit trx */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Checks that the index contains entries in an ascending order, unique
 constraint is not broken, and calculates the number of index entries
 in the read view of the current transaction.
-@return	DB_SUCCESS if ok */
+@return true if ok */
 UNIV_INTERN
-ulint
+bool
 row_check_index_for_mysql(
 /*======================*/
 	row_prebuilt_t*		prebuilt,	/*!< in: prebuilt struct
 						in MySQL handle */
 	const dict_index_t*	index,		/*!< in: index */
-	ulint*			n_rows);	/*!< out: number of entries
+	ulint*			n_rows)		/*!< out: number of entries
 						seen in the consistent read */
-
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Determines if a table is a magic monitor table.
-@return	TRUE if monitor table */
+@return	true if monitor table */
 UNIV_INTERN
-ibool
+bool
 row_is_magic_monitor_table(
 /*=======================*/
-	const char*	table_name);	/*!< in: name of the table, in the
+	const char*	table_name)	/*!< in: name of the table, in the
 					form database/table_name */
-
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Initialize this module */
 UNIV_INTERN
@@ -536,13 +583,24 @@ void
 row_mysql_close(void);
 /*=================*/
 
+/*********************************************************************//**
+Reassigns the table identifier of a table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_mysql_table_id_reassign(
+/*========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	trx_t*		trx,	/*!< in/out: transaction */
+	table_id_t*	new_id) /*!< out: new table id */
+        __attribute__((nonnull, warn_unused_result));
+
 /* A struct describing a place for an individual column in the MySQL
 row format which is presented to the table handler in ha_innobase.
 This template struct is used to speed up row transformations between
 Innobase and MySQL. */
 
-typedef struct mysql_row_templ_struct mysql_row_templ_t;
-struct mysql_row_templ_struct {
+struct mysql_row_templ_t {
 	ulint	col_no;			/*!< column number of the column */
 	ulint	rec_field_no;		/*!< field number of the column in an
 					Innobase record in the current index;
@@ -597,7 +655,7 @@ struct mysql_row_templ_struct {
 /** A struct for (sometimes lazily) prebuilt structures in an Innobase table
 handle used within MySQL; these are used to save CPU time. */
 
-struct row_prebuilt_struct {
+struct row_prebuilt_t {
 	ulint		magic_n;	/*!< this magic number is set to
 					ROW_PREBUILT_ALLOCATED when created,
 					or ROW_PREBUILT_FREED when the
@@ -682,8 +740,11 @@ struct row_prebuilt_struct {
 					columns in the table */
 	upd_node_t*	upd_node;	/*!< Innobase SQL update node used
 					to perform updates and deletes */
+	trx_id_t	trx_id;		/*!< The table->def_trx_id when
+					ins_graph was built */
 	que_fork_t*	ins_graph;	/*!< Innobase SQL query graph used
-					in inserts */
+					in inserts. Will be rebuilt on
+					trx_id or n_indexes mismatch. */
 	que_fork_t*	upd_graph;	/*!< Innobase SQL query graph used
 					in updates or deletes */
 	btr_pcur_t	pcur;		/*!< persistent cursor used in selects
@@ -780,7 +841,7 @@ struct row_prebuilt_struct {
 					to this heap */
 	mem_heap_t*	old_vers_heap;	/*!< memory heap where a previous
 					version is built in consistent read */
-	fts_result_t*	result;		/* The result of an FTS query */
+	bool		in_fts_query;	/*!< Whether we are in a FTS query */
 	/*----------------------*/
 	ulonglong	autoinc_last_value;
 					/*!< last value of AUTO-INC interval */
@@ -791,7 +852,7 @@ struct row_prebuilt_struct {
 	ulonglong	autoinc_offset; /*!< The offset passed to
 					get_auto_increment() by MySQL. Required
 					to calculate the next value */
-	ulint		autoinc_error;	/*!< The actual error code encountered
+	dberr_t		autoinc_error;	/*!< The actual error code encountered
 					while trying to init or read the
 					autoinc value from the table. We
 					store it here so that we can return
@@ -806,6 +867,20 @@ struct row_prebuilt_struct {
 	/*----------------------*/
 	ulint		magic_n2;	/*!< this should be the same as
 					magic_n */
+	/*----------------------*/
+	unsigned	innodb_api:1;	/*!< whether this is a InnoDB API
+					query */
+	const rec_t*	innodb_api_rec;	/*!< InnoDB API search result */
+};
+
+/** Callback for row_mysql_sys_index_iterate() */
+struct SysIndexCallback {
+	virtual ~SysIndexCallback() { }
+
+	/** Callback method
+	@param mtr - current mini transaction
+	@param pcur - persistent cursor. */
+	virtual void operator()(mtr_t* mtr, btr_pcur_t* pcur) throw() = 0;
 };
 
 #define ROW_PREBUILT_FETCH_MAGIC_N	465765687
@@ -829,4 +904,4 @@ struct row_prebuilt_struct {
 #include "row0mysql.ic"
 #endif
 
-#endif
+#endif /* row0mysql.h */
diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h
index 740771fa3eb..93dcf9cf49b 100644
--- a/storage/innobase/include/row0purge.h
+++ b/storage/innobase/include/row0purge.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -46,7 +46,8 @@ row_purge_node_create(
 /*==================*/
 	que_thr_t*	parent,		/*!< in: parent node, i.e., a
 					thr node */
-	mem_heap_t*	heap);		/*!< in: memory heap where created */
+	mem_heap_t*	heap)		/*!< in: memory heap where created */
+	__attribute__((nonnull, warn_unused_result));
 /***********************************************************//**
 Determines if it is possible to remove a secondary index entry.
 Removal is possible if the secondary index entry does not refer to any
@@ -56,19 +57,20 @@ is newer than the purge view.
 NOTE: This function should only be called by the purge thread, only
 while holding a latch on the leaf page of the secondary index entry
 (or keeping the buffer pool watch on the page).  It is possible that
-this function first returns TRUE and then FALSE, if a user transaction
+this function first returns true and then false, if a user transaction
 inserts a record that the secondary index entry would refer to.
 However, in that case, the user transaction would also re-insert the
 secondary index entry after purge has removed it and released the leaf
 page latch.
-@return	TRUE if the secondary index record can be purged */
+@return	true if the secondary index record can be purged */
 UNIV_INTERN
-ibool
+bool
 row_purge_poss_sec(
 /*===============*/
 	purge_node_t*	node,	/*!< in/out: row purge node */
 	dict_index_t*	index,	/*!< in: secondary index */
-	const dtuple_t*	entry);	/*!< in: secondary index entry */
+	const dtuple_t*	entry)	/*!< in: secondary index entry */
+	__attribute__((nonnull, warn_unused_result));
 /***************************************************************
 Does the purge operation for a single undo log record. This is a high-level
 function used in an SQL execution graph.
@@ -77,11 +79,12 @@ UNIV_INTERN
 que_thr_t*
 row_purge_step(
 /*===========*/
-	que_thr_t*	thr);	/*!< in: query thread */
+	que_thr_t*	thr)	/*!< in: query thread */
+	__attribute__((nonnull, warn_unused_result));
 
 /* Purge node structure */
 
-struct purge_node_struct{
+struct purge_node_t{
 	que_common_t	common;	/*!< node type: QUE_NODE_PURGE */
 	/*----------------------*/
 	/* Local storage for this graph node */
diff --git a/storage/innobase/include/row0quiesce.h b/storage/innobase/include/row0quiesce.h
new file mode 100644
index 00000000000..1d6d11291b8
--- /dev/null
+++ b/storage/innobase/include/row0quiesce.h
@@ -0,0 +1,74 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0quiesce.h
+
+Header file for tablespace quiesce functions.
+
+Created 2012-02-08 by Sunny Bains
+*******************************************************/
+
+#ifndef row0quiesce_h
+#define row0quiesce_h
+
+#include "univ.i"
+#include "dict0types.h"
+
+struct trx_t;
+
+/** The version number of the export meta-data text file. */
+#define IB_EXPORT_CFG_VERSION_V1	0x1UL
+
+/*********************************************************************//**
+Quiesce the tablespace that the table resides in. */
+UNIV_INTERN
+void
+row_quiesce_table_start(
+/*====================*/
+	dict_table_t*	table,		/*!< in: quiesce this table */
+	trx_t*		trx)		/*!< in/out: transaction/session */
+        __attribute__((nonnull));
+
+/*********************************************************************//**
+Set a table's quiesce state.
+@return DB_SUCCESS or errro code. */
+UNIV_INTERN
+dberr_t
+row_quiesce_set_state(
+/*==================*/
+	dict_table_t*	table,		/*!< in: quiesce this table */
+	ib_quiesce_t	state,		/*!< in: quiesce state to set */
+	trx_t*		trx)		/*!< in/out: transaction */
+        __attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
+Cleanup after table quiesce. */
+UNIV_INTERN
+void
+row_quiesce_table_complete(
+/*=======================*/
+	dict_table_t*	table,		/*!< in: quiesce this table */
+	trx_t*		trx)		/*!< in/out: transaction/session */
+        __attribute__((nonnull));
+
+#ifndef UNIV_NONINL
+#include "row0quiesce.ic"
+#endif
+
+#endif /* row0quiesce_h */
diff --git a/storage/innobase/include/row0quiesce.ic b/storage/innobase/include/row0quiesce.ic
new file mode 100644
index 00000000000..f570a6aed05
--- /dev/null
+++ b/storage/innobase/include/row0quiesce.ic
@@ -0,0 +1,26 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0quiesce.ic
+
+Quiesce a tablespace.
+
+Created 2012-02-08 Sunny Bains
+*******************************************************/
+
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
index cf253ab2347..a4e5e0dd2fa 100644
--- a/storage/innobase/include/row0row.h
+++ b/storage/innobase/include/row0row.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -73,20 +73,41 @@ row_get_rec_roll_ptr(
 /*****************************************************************//**
 When an insert or purge to a table is performed, this function builds
 the entry to be inserted into or purged from an index on the table.
+@return index entry which should be inserted or purged
+@retval NULL if the externally stored columns in the clustered index record
+are unavailable and ext != NULL, or row is missing some needed columns. */
+UNIV_INTERN
+dtuple_t*
+row_build_index_entry_low(
+/*======================*/
+	const dtuple_t*		row,	/*!< in: row which should be
+					inserted or purged */
+	const row_ext_t*	ext,	/*!< in: externally stored column
+					prefixes, or NULL */
+	dict_index_t*		index,	/*!< in: index on the table */
+	mem_heap_t*		heap)	/*!< in: memory heap from which
+					the memory for the index entry
+					is allocated */
+	__attribute__((warn_unused_result, nonnull(1,3,4)));
+/*****************************************************************//**
+When an insert or purge to a table is performed, this function builds
+the entry to be inserted into or purged from an index on the table.
 @return index entry which should be inserted or purged, or NULL if the
 externally stored columns in the clustered index record are
 unavailable and ext != NULL */
-UNIV_INTERN
+UNIV_INLINE
 dtuple_t*
 row_build_index_entry(
 /*==================*/
-	const dtuple_t*	row,	/*!< in: row which should be
-				inserted or purged */
-	row_ext_t*	ext,	/*!< in: externally stored column prefixes,
-				or NULL */
-	dict_index_t*	index,	/*!< in: index on the table */
-	mem_heap_t*	heap);	/*!< in: memory heap from which the memory for
-				the index entry is allocated */
+	const dtuple_t*		row,	/*!< in: row which should be
+					inserted or purged */
+	const row_ext_t*	ext,	/*!< in: externally stored column
+					prefixes, or NULL */
+	dict_index_t*		index,	/*!< in: index on the table */
+	mem_heap_t*		heap)	/*!< in: memory heap from which
+					the memory for the index entry
+					is allocated */
+	__attribute__((warn_unused_result, nonnull(1,3,4)));
 /*******************************************************************//**
 An inverse function to row_build_index_entry. Builds a row from a
 record in a clustered index.
@@ -124,11 +145,17 @@ row_build(
 					consulted instead; the user
 					columns in this table should be
 					the same columns as in index->table */
+	const dtuple_t*		add_cols,
+					/*!< in: default values of
+					added columns, or NULL */
+	const ulint*		col_map,/*!< in: mapping of old column
+					numbers to new ones, or NULL */
 	row_ext_t**		ext,	/*!< out, own: cache of
 					externally stored column
 					prefixes, or NULL */
-	mem_heap_t*		heap);	/*!< in: memory heap from which
+	mem_heap_t*		heap)	/*!< in: memory heap from which
 					the memory needed is allocated */
+	__attribute__((nonnull(2,3,9)));
 /*******************************************************************//**
 Converts an index record to a typed data tuple.
 @return index entry built; does not set info_bits, and the data fields
@@ -142,37 +169,25 @@ row_rec_to_index_entry_low(
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
 	ulint*			n_ext,	/*!< out: number of externally
 					stored columns */
-	mem_heap_t*		heap);	/*!< in: memory heap from which
+	mem_heap_t*		heap)	/*!< in: memory heap from which
 					the memory needed is allocated */
+	__attribute__((nonnull, warn_unused_result));
 /*******************************************************************//**
 Converts an index record to a typed data tuple. NOTE that externally
 stored (often big) fields are NOT copied to heap.
-@return	own: index entry built; see the NOTE below! */
+@return	own: index entry built */
 UNIV_INTERN
 dtuple_t*
 row_rec_to_index_entry(
 /*===================*/
-	ulint			type,	/*!< in: ROW_COPY_DATA, or
-					ROW_COPY_POINTERS: the former
-					copies also the data fields to
-					heap as the latter only places
-					pointers to data fields on the
-					index page */
-	const rec_t*		rec,	/*!< in: record in the index;
-					NOTE: in the case
-					ROW_COPY_POINTERS the data
-					fields in the row will point
-					directly into this record,
-					therefore, the buffer page of
-					this record must be at least
-					s-latched and the latch held
-					as long as the dtuple is used! */
+	const rec_t*		rec,	/*!< in: record in the index */
 	const dict_index_t*	index,	/*!< in: index */
-	ulint*			offsets,/*!< in/out: rec_get_offsets(rec) */
+	const ulint*		offsets,/*!< in/out: rec_get_offsets(rec) */
 	ulint*			n_ext,	/*!< out: number of externally
 					stored columns */
-	mem_heap_t*		heap);	/*!< in: memory heap from which
+	mem_heap_t*		heap)	/*!< in: memory heap from which
 					the memory needed is allocated */
+	__attribute__((nonnull, warn_unused_result));
 /*******************************************************************//**
 Builds from a secondary index record a row reference with which we can
 search the clustered index record.
@@ -193,8 +208,9 @@ row_build_row_ref(
 				the buffer page of this record must be
 				at least s-latched and the latch held
 				as long as the row reference is used! */
-	mem_heap_t*	heap);	/*!< in: memory heap from which the memory
+	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
 				needed is allocated */
+	__attribute__((nonnull, warn_unused_result));
 /*******************************************************************//**
 Builds from a secondary index record a row reference with which we can
 search the clustered index record. */
@@ -215,7 +231,8 @@ row_build_row_ref_in_tuple(
 	const dict_index_t*	index,	/*!< in: secondary index */
 	ulint*			offsets,/*!< in: rec_get_offsets(rec, index)
 					or NULL */
-	trx_t*			trx);	/*!< in: transaction */
+	trx_t*			trx)	/*!< in: transaction or NULL */
+	__attribute__((nonnull(1,2,3)));
 /*******************************************************************//**
 Builds from a secondary index record a row reference with which we can
 search the clustered index record. */
@@ -245,7 +262,8 @@ row_search_on_row_ref(
 	ulint			mode,	/*!< in: BTR_MODIFY_LEAF, ... */
 	const dict_table_t*	table,	/*!< in: table */
 	const dtuple_t*		ref,	/*!< in: row reference */
-	mtr_t*			mtr);	/*!< in/out: mtr */
+	mtr_t*			mtr)	/*!< in/out: mtr */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Fetches the clustered index record for a secondary index record. The latches
 on the secondary index record are preserved.
@@ -258,7 +276,8 @@ row_get_clust_rec(
 	const rec_t*	rec,	/*!< in: record in a secondary index */
 	dict_index_t*	index,	/*!< in: secondary index */
 	dict_index_t**	clust_index,/*!< out: clustered index */
-	mtr_t*		mtr);	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull, warn_unused_result));
 
 /** Result of row_search_index_entry */
 enum row_search_result {
@@ -285,8 +304,8 @@ row_search_index_entry(
 	ulint		mode,	/*!< in: BTR_MODIFY_LEAF, ... */
 	btr_pcur_t*	pcur,	/*!< in/out: persistent cursor, which must
 				be closed by the caller */
-	mtr_t*		mtr);	/*!< in: mtr */
-
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull, warn_unused_result));
 
 #define ROW_COPY_DATA		1
 #define ROW_COPY_POINTERS	2
@@ -313,8 +332,9 @@ row_raw_format(
 						in bytes */
 	const dict_field_t*	dict_field,	/*!< in: index field */
 	char*			buf,		/*!< out: output buffer */
-	ulint			buf_size);	/*!< in: output buffer size
+	ulint			buf_size)	/*!< in: output buffer size
 						in bytes */
+	__attribute__((nonnull, warn_unused_result));
 
 #ifndef UNIV_NONINL
 #include "row0row.ic"
diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic
index 8e9f3460519..ac62422be1f 100644
--- a/storage/innobase/include/row0row.ic
+++ b/storage/innobase/include/row0row.ic
@@ -104,6 +104,33 @@ row_get_rec_roll_ptr(
 	return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
 }
 
+/*****************************************************************//**
+When an insert or purge to a table is performed, this function builds
+the entry to be inserted into or purged from an index on the table.
+@return index entry which should be inserted or purged, or NULL if the
+externally stored columns in the clustered index record are
+unavailable and ext != NULL */
+UNIV_INLINE
+dtuple_t*
+row_build_index_entry(
+/*==================*/
+	const dtuple_t*		row,	/*!< in: row which should be
+					inserted or purged */
+	const row_ext_t*	ext,	/*!< in: externally stored column
+					prefixes, or NULL */
+	dict_index_t*		index,	/*!< in: index on the table */
+	mem_heap_t*		heap)	/*!< in: memory heap from which
+					the memory for the index entry
+					is allocated */
+{
+	dtuple_t*	entry;
+
+	ut_ad(dtuple_check_typed(row));
+	entry = row_build_index_entry_low(row, ext, index, heap);
+	ut_ad(!entry || dtuple_check_typed(entry));
+	return(entry);
+}
+
 /*******************************************************************//**
 Builds from a secondary index record a row reference with which we can
 search the clustered index record. */
diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
index fa3c93b6b9a..c8be80f89d9 100644
--- a/storage/innobase/include/row0sel.h
+++ b/storage/innobase/include/row0sel.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -148,7 +148,7 @@ position and fetch next or fetch prev must not be tried to the cursor!
 @return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
 DB_LOCK_TABLE_FULL, or DB_TOO_BIG_RECORD */
 UNIV_INTERN
-ulint
+dberr_t
 row_search_for_mysql(
 /*=================*/
 	byte*		buf,		/*!< in/out: buffer for the fetched
@@ -163,11 +163,12 @@ row_search_for_mysql(
 					'mode' */
 	ulint		match_mode,	/*!< in: 0 or ROW_SEL_EXACT or
 					ROW_SEL_EXACT_PREFIX */
-	ulint		direction);	/*!< in: 0 or ROW_SEL_NEXT or
+	ulint		direction)	/*!< in: 0 or ROW_SEL_NEXT or
 					ROW_SEL_PREV; NOTE: if this is != 0,
 					then prebuilt must have a pcur
 					with stored position! In opening of a
 					cursor 'direction' should be 0. */
+	__attribute__((nonnull, warn_unused_result));
 /*******************************************************************//**
 Checks if MySQL at the moment is allowed for this table to retrieve a
 consistent read result, or store it to the query cache.
@@ -179,28 +180,20 @@ row_search_check_if_query_cache_permitted(
 	trx_t*		trx,		/*!< in: transaction object */
 	const char*	norm_name);	/*!< in: concatenation of database name,
 					'/' char, table name */
-void
-row_create_key(
-/*===========*/
-	dtuple_t*	tuple,		/* in: tuple where to build;
-					NOTE: we assume that the type info
-					in the tuple is already according
-					to index! */
-	dict_index_t*	index,		/* in: index of the key value */
-	doc_id_t*	doc_id);	/* in: doc id to lookup.*/
 /*******************************************************************//**
 Read the max AUTOINC value from an index.
 @return	DB_SUCCESS if all OK else error code */
 UNIV_INTERN
-ulint
+dberr_t
 row_search_max_autoinc(
 /*===================*/
 	dict_index_t*	index,		/*!< in: index to search */
 	const char*	col_name,	/*!< in: autoinc column name */
-	ib_uint64_t*	value);		/*!< out: AUTOINC value read */
+	ib_uint64_t*	value)		/*!< out: AUTOINC value read */
+	__attribute__((nonnull, warn_unused_result));
 
 /** A structure for caching column values for prefetched rows */
-struct sel_buf_struct{
+struct sel_buf_t{
 	byte*		data;	/*!< data, or NULL; if not NULL, this field
 				has allocated memory which must be explicitly
 				freed; can be != NULL even when len is
@@ -213,7 +206,7 @@ struct sel_buf_struct{
 };
 
 /** Query plan */
-struct plan_struct{
+struct plan_t{
 	dict_table_t*	table;		/*!< table struct in the dictionary
 					cache */
 	dict_index_t*	index;		/*!< table index used in the search */
@@ -299,7 +292,7 @@ enum sel_node_state {
 };
 
 /** Select statement node */
-struct sel_node_struct{
+struct sel_node_t{
 	que_common_t	common;		/*!< node type: QUE_NODE_SELECT */
 	enum sel_node_state
 			state;	/*!< node state */
@@ -352,7 +345,7 @@ struct sel_node_struct{
 };
 
 /** Fetch statement node */
-struct fetch_node_struct{
+struct fetch_node_t{
 	que_common_t	common;		/*!< type: QUE_NODE_FETCH */
 	sel_node_t*	cursor_def;	/*!< cursor definition */
 	sym_node_t*	into_list;	/*!< variables to set */
@@ -379,7 +372,7 @@ enum open_node_op {
 };
 
 /** Open or close cursor statement node */
-struct open_node_struct{
+struct open_node_t{
 	que_common_t	common;		/*!< type: QUE_NODE_OPEN */
 	enum open_node_op
 			op_type;	/*!< operation type: open or
@@ -388,7 +381,7 @@ struct open_node_struct{
 };
 
 /** Row printf statement node */
-struct row_printf_node_struct{
+struct row_printf_node_t{
 	que_common_t	common;		/*!< type: QUE_NODE_ROW_PRINTF */
 	sel_node_t*	sel_node;	/*!< select */
 };
diff --git a/storage/innobase/include/row0types.h b/storage/innobase/include/row0types.h
index 463651b43b8..52c89cb01fa 100644
--- a/storage/innobase/include/row0types.h
+++ b/storage/innobase/include/row0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,32 +26,28 @@ Created 12/27/1996 Heikki Tuuri
 #ifndef row0types_h
 #define row0types_h
 
-typedef struct plan_struct plan_t;
+struct plan_t;
 
-typedef	struct upd_struct upd_t;
+struct upd_t;
+struct upd_field_t;
+struct upd_node_t;
+struct del_node_t;
+struct ins_node_t;
+struct sel_node_t;
+struct open_node_t;
+struct fetch_node_t;
 
-typedef struct upd_field_struct upd_field_t;
+struct row_printf_node_t;
+struct sel_buf_t;
 
-typedef	struct upd_node_struct upd_node_t;
+struct undo_node_t;
 
-typedef	struct del_node_struct del_node_t;
+struct purge_node_t;
 
-typedef	struct ins_node_struct ins_node_t;
+struct row_ext_t;
 
-typedef struct sel_node_struct	sel_node_t;
-
-typedef struct open_node_struct	open_node_t;
-
-typedef struct fetch_node_struct fetch_node_t;
-
-typedef struct row_printf_node_struct	row_printf_node_t;
-typedef struct sel_buf_struct	sel_buf_t;
-
-typedef	struct undo_node_struct undo_node_t;
-
-typedef	struct purge_node_struct purge_node_t;
-
-typedef struct row_ext_struct row_ext_t;
+/** Buffer for logging modifications during online index creation */
+struct row_log_t;
 
 /* MySQL data types */
 struct TABLE;
diff --git a/storage/innobase/include/row0uins.h b/storage/innobase/include/row0uins.h
index 5f3a7212ee1..ebf4881208a 100644
--- a/storage/innobase/include/row0uins.h
+++ b/storage/innobase/include/row0uins.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -42,11 +42,11 @@ if it figures out that an index record will be removed in the purge
 anyway, it will remove it in the rollback.
 @return	DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 row_undo_ins(
 /*=========*/
-	undo_node_t*	node);	/*!< in: row undo node */
-
+	undo_node_t*	node)	/*!< in: row undo node */
+	__attribute__((nonnull, warn_unused_result));
 #ifndef UNIV_NONINL
 #include "row0uins.ic"
 #endif
diff --git a/storage/innobase/include/row0umod.h b/storage/innobase/include/row0umod.h
index 84831e59d90..f89d5a334fc 100644
--- a/storage/innobase/include/row0umod.h
+++ b/storage/innobase/include/row0umod.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -38,12 +38,12 @@ Created 2/27/1997 Heikki Tuuri
 Undoes a modify operation on a row of a table.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 row_undo_mod(
 /*=========*/
 	undo_node_t*	node,	/*!< in: row undo node */
-	que_thr_t*	thr);	/*!< in: query thread */
-
+	que_thr_t*	thr)	/*!< in: query thread */
+	__attribute__((nonnull, warn_unused_result));
 
 #ifndef UNIV_NONINL
 #include "row0umod.ic"
diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h
index 47f9afdc74a..5dddfb4eae1 100644
--- a/storage/innobase/include/row0undo.h
+++ b/storage/innobase/include/row0undo.h
@@ -95,7 +95,7 @@ enum undo_exec {
 };
 
 /** Undo node structure */
-struct undo_node_struct{
+struct undo_node_t{
 	que_common_t	common;	/*!< node type: QUE_NODE_UNDO */
 	enum undo_exec	state;	/*!< node execution state */
 	trx_t*		trx;	/*!< trx for which undo is done */
diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h
index a7687bb1ded..27dedeb65a7 100644
--- a/storage/innobase/include/row0upd.h
+++ b/storage/innobase/include/row0upd.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -101,7 +101,7 @@ byte*
 row_upd_write_sys_vals_to_log(
 /*==========================*/
 	dict_index_t*	index,	/*!< in: clustered index */
-	trx_t*		trx,	/*!< in: transaction */
+	trx_id_t	trx_id,	/*!< in: transaction id */
 	roll_ptr_t	roll_ptr,/*!< in: roll ptr of the undo log record */
 	byte*		log_ptr,/*!< pointer to a buffer of size > 20 opened
 				in mlog */
@@ -118,8 +118,9 @@ row_upd_rec_sys_fields(
 				uncompressed part will be updated, or NULL */
 	dict_index_t*	index,	/*!< in: clustered index */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
-	trx_t*		trx,	/*!< in: transaction */
-	roll_ptr_t	roll_ptr);/*!< in: roll ptr of the undo log record */
+	const trx_t*	trx,	/*!< in: transaction */
+	roll_ptr_t	roll_ptr);/*!< in: roll ptr of the undo log record,
+				  can be 0 during IMPORT */
 /*********************************************************************//**
 Sets the trx id or roll ptr field of a clustered index entry. */
 UNIV_INTERN
@@ -165,6 +166,15 @@ row_upd_changes_field_size_or_external(
 	dict_index_t*	index,	/*!< in: index */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
 	const upd_t*	update);/*!< in: update vector */
+/***********************************************************//**
+Returns true if row update contains disowned external fields.
+@return true if the update contains disowned external fields. */
+UNIV_INTERN
+bool
+row_upd_changes_disowned_external(
+/*==============================*/
+	const upd_t*	update)	/*!< in: update vector */
+	__attribute__((nonnull, warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
 Replaces the new column values stored in the update vector to the
@@ -192,11 +202,12 @@ UNIV_INTERN
 upd_t*
 row_upd_build_sec_rec_difference_binary(
 /*====================================*/
+	const rec_t*	rec,	/*!< in: secondary index record */
 	dict_index_t*	index,	/*!< in: index */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
 	const dtuple_t*	entry,	/*!< in: entry to insert */
-	const rec_t*	rec,	/*!< in: secondary index record */
-	trx_t*		trx,	/*!< in: transaction */
-	mem_heap_t*	heap);	/*!< in: memory heap from which allocated */
+	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
+	__attribute__((warn_unused_result, nonnull));
 /***************************************************************//**
 Builds an update vector from those fields, excluding the roll ptr and
 trx id fields, which in an index entry differ from a record that has
@@ -204,14 +215,19 @@ the equal ordering fields. NOTE: we compare the fields as binary strings!
 @return own: update vector of differing fields, excluding roll ptr and
 trx id */
 UNIV_INTERN
-upd_t*
+const upd_t*
 row_upd_build_difference_binary(
 /*============================*/
 	dict_index_t*	index,	/*!< in: clustered index */
 	const dtuple_t*	entry,	/*!< in: entry to insert */
 	const rec_t*	rec,	/*!< in: clustered index record */
-	trx_t*		trx,	/*!< in: transaction */
-	mem_heap_t*	heap);	/*!< in: memory heap from which allocated */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index), or NULL */
+	bool		no_sys,	/*!< in: skip the system columns
+				DB_TRX_ID and DB_ROLL_PTR */
+	trx_t*		trx,	/*!< in: transaction (for diagnostics),
+				or NULL */
+	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
+	__attribute__((nonnull(1,2,3,7), warn_unused_result));
 /***********************************************************//**
 Replaces the new column values stored in the update vector to the index entry
 given. */
@@ -315,25 +331,14 @@ row_upd_changes_fts_column(
 	upd_field_t*	upd_field);	/*!< in: field to check */
 /***********************************************************//**
 Checks if an FTS Doc ID column is affected by an UPDATE.
-@return TRUE if Doc ID column is affected */
+@return whether Doc ID column is affected */
 UNIV_INTERN
-ulint
+bool
 row_upd_changes_doc_id(
 /*===================*/
 	dict_table_t*	table,		/*!< in: table */
-	upd_field_t*	upd_field);	/*!< in: field to check */
-/***********************************************************//**
-Checks if an update vector changes the table's FTS-indexed columns.
-NOTE: must not be called for tables which do not have an FTS-index.
-Also, the vector returned must be explicitly freed as it's allocated
-using the ut_malloc() allocator.
-@return vector of FTS indexes that were affected by the update else NULL */
-UNIV_INTERN
-ib_vector_t*
-row_upd_changes_fts_columns(
-/*========================*/
-	dict_table_t*	table,		/*!< in: table */
-	upd_t*		update);	/*!< in: update vector for the row */
+	upd_field_t*	upd_field)	/*!< in: field to check */
+	__attribute__((nonnull, warn_unused_result));
 /***********************************************************//**
 Checks if an update vector changes an ordering field of an index record.
 This function is fast if the update vector is short or the number of ordering
@@ -397,7 +402,7 @@ row_upd_index_parse(
 
 
 /* Update vector field */
-struct upd_field_struct{
+struct upd_field_t{
 	unsigned	field_no:16;	/*!< field number in an index, usually
 					the clustered index, but in updating
 					a secondary index record in btr0cur.cc
@@ -416,7 +421,7 @@ struct upd_field_struct{
 };
 
 /* Update vector structure */
-struct upd_struct{
+struct upd_t{
 	ulint		info_bits;	/*!< new value of info bits to record;
 					default is 0 */
 	ulint		n_fields;	/*!< number of update fields */
@@ -427,7 +432,7 @@ struct upd_struct{
 /* Update node structure which also implements the delete operation
 of a row */
 
-struct upd_node_struct{
+struct upd_node_t{
 	que_common_t	common;	/*!< node type: QUE_NODE_UPDATE */
 	ibool		is_delete;/* TRUE if delete, FALSE if update */
 	ibool		searched_update;
diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
index d054662c080..618a77fa4bf 100644
--- a/storage/innobase/include/row0upd.ic
+++ b/storage/innobase/include/row0upd.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -46,7 +46,6 @@ upd_create(
 
 	update = (upd_t*) mem_heap_zalloc(heap, sizeof(upd_t));
 
-	update->info_bits = 0;
 	update->n_fields = n;
 	update->fields = (upd_field_t*)
 		mem_heap_zalloc(heap, sizeof(upd_field_t) * n);
@@ -111,6 +110,7 @@ upd_field_set_field_no(
 		fprintf(stderr, "\n"
 			"InnoDB: but index only has %lu fields\n",
 			(ulong) dict_index_get_n_fields(index));
+		ut_ad(0);
 	}
 
 	dict_col_copy_type(dict_index_get_nth_col(index, field_no),
@@ -152,8 +152,9 @@ row_upd_rec_sys_fields(
 				uncompressed part will be updated, or NULL */
 	dict_index_t*	index,	/*!< in: clustered index */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
-	trx_t*		trx,	/*!< in: transaction */
-	roll_ptr_t	roll_ptr)/*!< in: roll ptr of the undo log record */
+	const trx_t*	trx,	/*!< in: transaction */
+	roll_ptr_t	roll_ptr)/*!< in: roll ptr of the undo log record,
+				 can be 0 during IMPORT */
 {
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(rec_offs_validate(rec, index, offsets));
@@ -172,8 +173,14 @@ row_upd_rec_sys_fields(
 #if DATA_TRX_ID + 1 != DATA_ROLL_PTR
 # error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
 #endif
-		ut_ad(lock_check_trx_id_sanity(trx_read_trx_id(rec + offset),
-					       rec, index, offsets));
+		/* During IMPORT the trx id in the record can be in the
+		future, if the .ibd file is being imported from another
+		instance. During IMPORT roll_ptr will be 0. */
+		ut_ad(roll_ptr == 0
+		      || lock_check_trx_id_sanity(
+			      trx_read_trx_id(rec + offset),
+			      rec, index, offsets));
+
 		trx_write_trx_id(rec + offset, trx->id);
 		trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
 	}
diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h
index d9e3471b3dc..1df5b4d3e98 100644
--- a/storage/innobase/include/row0vers.h
+++ b/storage/innobase/include/row0vers.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -87,7 +87,7 @@ read should see. We assume that the trx id stored in rec is such that
 the consistent read should not see rec in its present version.
 @return	DB_SUCCESS or DB_MISSING_HISTORY */
 UNIV_INTERN
-ulint
+dberr_t
 row_vers_build_for_consistent_read(
 /*===============================*/
 	const rec_t*	rec,	/*!< in: record in a clustered index; the
@@ -106,16 +106,17 @@ row_vers_build_for_consistent_read(
 				*old_vers is allocated; memory for possible
 				intermediate versions is allocated and freed
 				locally within the function */
-	rec_t**		old_vers);/*!< out, own: old version, or NULL if the
-				record does not exist in the view, that is,
+	rec_t**		old_vers)/*!< out, own: old version, or NULL
+				if the history is missing or the record
+				does not exist in the view, that is,
 				it was freshly inserted afterwards */
+	__attribute__((nonnull(1,2,3,4,5,6,7)));
 
 /*****************************************************************//**
 Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read.
-@return	DB_SUCCESS or DB_MISSING_HISTORY */
+which should be seen by a semi-consistent read. */
 UNIV_INTERN
-ulint
+void
 row_vers_build_for_semi_consistent_read(
 /*====================================*/
 	const rec_t*	rec,	/*!< in: record in a clustered index; the
@@ -132,9 +133,10 @@ row_vers_build_for_semi_consistent_read(
 				*old_vers is allocated; memory for possible
 				intermediate versions is allocated and freed
 				locally within the function */
-	const rec_t**	old_vers);/*!< out: rec, old version, or NULL if the
+	const rec_t**	old_vers)/*!< out: rec, old version, or NULL if the
 				record does not exist in the view, that is,
 				it was freshly inserted afterwards */
+	__attribute__((nonnull(1,2,3,4,5)));
 
 
 #ifndef UNIV_NONINL
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index 5e47f82f416..48d4b94dcae 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -1,6 +1,7 @@
 /***********************************************************************
 
-Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
@@ -55,7 +56,7 @@ fill in counter information as described in "monitor_info_t" and
 create the internal counter ID in "monitor_id_t". */
 
 /** Structure containing the actual values of a monitor counter. */
-struct monitor_value_struct {
+struct monitor_value_t {
 	ib_time_t	mon_start_time;	/*!< Start time of monitoring  */
 	ib_time_t	mon_stop_time;	/*!< Stop time of monitoring */
 	ib_time_t	mon_reset_time;	/*!< Time counter resetted */
@@ -70,11 +71,9 @@ struct monitor_value_struct {
 	monitor_running_t mon_status;	/* whether monitor still running */
 };
 
-typedef struct monitor_value_struct	monitor_value_t;
-
 /** Follwoing defines are possible values for "monitor_type" field in
 "struct monitor_info" */
-enum monitor_type_value {
+enum monitor_type_t {
 	MONITOR_NONE = 0,	/*!< No monitoring */
 	MONITOR_MODULE = 1,	/*!< This is a monitor module type,
 				not a counter */
@@ -97,8 +96,6 @@ enum monitor_type_value {
 				metrics table */
 };
 
-typedef enum monitor_type_value	monitor_type_t;
-
 /** Counter minimum value is initialized to be max value of
  mon_type_t (ib_int64_t) */
 #define	MIN_RESERVED		((mon_type_t) (IB_ULONGLONG_MAX >> 1))
@@ -117,7 +114,7 @@ name shall start with MONITOR_OVLD
 Please refer to "innodb_counter_info" in srv/srv0mon.cc for detail
 information for each monitor counter */
 
-enum monitor_id_value {
+enum monitor_id_t {
 	/* This is to identify the default value set by the metrics
 	control global variables */
 	MONITOR_DEFAULT_START = 0,
@@ -154,14 +151,15 @@ enum monitor_id_value {
 	MONITOR_OVLD_BUF_POOL_READS,
 	MONITOR_OVLD_BUF_POOL_READ_REQUESTS,
 	MONITOR_OVLD_BUF_POOL_WRITE_REQUEST,
-	MONITOR_PAGE_INFLUSH,
 	MONITOR_OVLD_BUF_POOL_WAIT_FREE,
 	MONITOR_OVLD_BUF_POOL_READ_AHEAD,
 	MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED,
 	MONITOR_OVLD_BUF_POOL_PAGE_TOTAL,
 	MONITOR_OVLD_BUF_POOL_PAGE_MISC,
 	MONITOR_OVLD_BUF_POOL_PAGES_DATA,
+	MONITOR_OVLD_BUF_POOL_BYTES_DATA,
 	MONITOR_OVLD_BUF_POOL_PAGES_DIRTY,
+	MONITOR_OVLD_BUF_POOL_BYTES_DIRTY,
 	MONITOR_OVLD_BUF_POOL_PAGES_FREE,
 	MONITOR_OVLD_PAGE_CREATED,
 	MONITOR_OVLD_PAGES_WRITTEN,
@@ -177,15 +175,15 @@ enum monitor_id_value {
 	MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
 	MONITOR_FLUSH_NEIGHBOR_COUNT,
 	MONITOR_FLUSH_NEIGHBOR_PAGES,
-	MONITOR_FLUSH_MAX_DIRTY_TOTAL_PAGE,
-	MONITOR_FLUSH_MAX_DIRTY_COUNT,
-	MONITOR_FLUSH_MAX_DIRTY_PAGES,
+	MONITOR_FLUSH_N_TO_FLUSH_REQUESTED,
+	MONITOR_FLUSH_AVG_PAGE_RATE,
+	MONITOR_FLUSH_LSN_AVG_RATE,
+	MONITOR_FLUSH_PCT_FOR_DIRTY,
+	MONITOR_FLUSH_PCT_FOR_LSN,
+	MONITOR_FLUSH_SYNC_WAITS,
 	MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
 	MONITOR_FLUSH_ADAPTIVE_COUNT,
 	MONITOR_FLUSH_ADAPTIVE_PAGES,
-	MONITOR_FLUSH_ASYNC_TOTAL_PAGE,
-	MONITOR_FLUSH_ASYNC_COUNT,
-	MONITOR_FLUSH_ASYNC_PAGES,
 	MONITOR_FLUSH_SYNC_TOTAL_PAGE,
 	MONITOR_FLUSH_SYNC_COUNT,
 	MONITOR_FLUSH_SYNC_PAGES,
@@ -303,6 +301,8 @@ enum monitor_id_value {
 	MONITOR_MODULE_PAGE,
 	MONITOR_PAGE_COMPRESS,
 	MONITOR_PAGE_DECOMPRESS,
+	MONITOR_PAD_INCREMENTS,
+	MONITOR_PAD_DECREMENTS,
 
 	/* Index related counters */
 	MONITOR_MODULE_INDEX,
@@ -367,7 +367,10 @@ enum monitor_id_value {
 
 	/* Data DDL related counters */
 	MONITOR_MODULE_DDL_STATS,
+	MONITOR_BACKGROUND_DROP_INDEX,
 	MONITOR_BACKGROUND_DROP_TABLE,
+	MONITOR_ONLINE_CREATE_INDEX,
+	MONITOR_PENDING_ALTER_TABLE,
 
 	MONITOR_MODULE_ICP,
 	MONITOR_ICP_ATTEMPTS,
@@ -383,8 +386,6 @@ enum monitor_id_value {
 	NUM_MONITOR
 };
 
-typedef enum monitor_id_value		monitor_id_t;
-
 /** This informs the monitor control system to turn
 on/off and reset monitor counters through wild card match */
 #define	MONITOR_WILDCARD_MATCH		(NUM_MONITOR + 1)
@@ -394,7 +395,7 @@ on/off and reset monitor counters through wild card match */
 
 /** struct monitor_info describes the basic/static information
 about each monitor counter. */
-struct monitor_info_struct {
+struct monitor_info_t {
 	const char*	monitor_name;	/*!< Monitor name */
 	const char*	monitor_module;	/*!< Sub Module the monitor
 					belongs to */
@@ -408,12 +409,10 @@ struct monitor_info_struct {
 					monitor_id_t */
 };
 
-typedef struct monitor_info_struct	monitor_info_t;
-
 /** Following are the "set_option" values allowed for
 srv_mon_process_existing_counter() and srv_mon_process_existing_counter()
 functions. To turn on/off/reset the monitor counters. */
-enum mon_set_option {
+enum mon_option_t {
 	MONITOR_TURN_ON = 1,		/*!< Turn on the counter */
 	MONITOR_TURN_OFF,		/*!< Turn off the counter */
 	MONITOR_RESET_VALUE,		/*!< Reset current values */
@@ -423,8 +422,6 @@ enum mon_set_option {
 					function */
 };
 
-typedef enum mon_set_option		mon_option_t;
-
 /** Number of bit in a ulint datatype */
 #define	NUM_BITS_ULINT	(sizeof(ulint) * CHAR_BIT)
 
@@ -533,8 +530,37 @@ on the counters */
 		}							\
 	}
 
-#ifdef HAVE_ATOMIC_BUILTINS
+/** Increment a monitor counter under mutex protection.
+Use MONITOR_INC if appropriate mutex protection already exists.
+@param monitor	monitor to be incremented by 1
+@param mutex	mutex to acquire and relese */
+# define MONITOR_MUTEX_INC(mutex, monitor)				\
+	ut_ad(!mutex_own(mutex));					\
+	if (MONITOR_IS_ON(monitor)) {					\
+		mutex_enter(mutex);					\
+		if (++MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
+			MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor); \
+		}							\
+		mutex_exit(mutex);					\
+	}
+/** Decrement a monitor counter under mutex protection.
+Use MONITOR_DEC if appropriate mutex protection already exists.
+@param monitor	monitor to be decremented by 1
+@param mutex	mutex to acquire and relese */
+# define MONITOR_MUTEX_DEC(mutex, monitor)				\
+	ut_ad(!mutex_own(mutex));					\
+	if (MONITOR_IS_ON(monitor)) {					\
+		mutex_enter(mutex);					\
+		if (--MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
+			MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor); \
+		}							\
+		mutex_exit(mutex);					\
+	}
 
+#if defined HAVE_ATOMIC_BUILTINS_64
+/** Atomically increment a monitor counter.
+Use MONITOR_INC if appropriate mutex protection exists.
+@param monitor	monitor to be incremented by 1 */
 # define MONITOR_ATOMIC_INC(monitor)					\
 	if (MONITOR_IS_ON(monitor)) {					\
 		ib_uint64_t	value;					\
@@ -547,10 +573,13 @@ on the counters */
 		}							\
 	}
 
+/** Atomically decrement a monitor counter.
+Use MONITOR_DEC if appropriate mutex protection exists.
+@param monitor	monitor to be decremented by 1 */
 # define MONITOR_ATOMIC_DEC(monitor)					\
 	if (MONITOR_IS_ON(monitor)) {					\
 		ib_uint64_t	value;					\
-		value = os_atomic_decrement_ulint(			\
+		value = os_atomic_decrement_uint64(			\
 			(ib_uint64_t*) &MONITOR_VALUE(monitor), 1);	\
 		/* Note: This is not 100% accurate because of the	\
 		inherent race, we ignore it due to performance. */	\
@@ -558,7 +587,34 @@ on the counters */
 			MONITOR_MIN_VALUE(monitor) = value;		\
 		}							\
 	}
-#endif /* HAVE_ATOMIC_BUILTINS */
+# define srv_mon_create() ((void) 0)
+# define srv_mon_free() ((void) 0)
+#else /* HAVE_ATOMIC_BUILTINS_64 */
+/** Mutex protecting atomic operations on platforms that lack
+built-in operations for atomic memory access */
+extern ib_mutex_t	monitor_mutex;
+/****************************************************************//**
+Initialize the monitor subsystem. */
+UNIV_INTERN
+void
+srv_mon_create(void);
+/*================*/
+/****************************************************************//**
+Close the monitor subsystem. */
+UNIV_INTERN
+void
+srv_mon_free(void);
+/*==============*/
+
+/** Atomically increment a monitor counter.
+Use MONITOR_INC if appropriate mutex protection exists.
+@param monitor	monitor to be incremented by 1 */
+# define MONITOR_ATOMIC_INC(monitor) MONITOR_MUTEX_INC(&monitor_mutex, monitor)
+/** Atomically decrement a monitor counter.
+Use MONITOR_DEC if appropriate mutex protection exists.
+@param monitor	monitor to be decremented by 1 */
+# define MONITOR_ATOMIC_DEC(monitor) MONITOR_MUTEX_DEC(&monitor_mutex, monitor)
+#endif /* HAVE_ATOMIC_BUILTINS_64 */
 
 #define	MONITOR_DEC(monitor)						\
 	if (MONITOR_IS_ON(monitor)) {					\
@@ -568,7 +624,17 @@ on the counters */
 		}							\
 	}
 
+#ifdef UNIV_DEBUG_VALGRIND
+# define MONITOR_CHECK_DEFINED(value) do {	\
+	mon_type_t m = value;			\
+	UNIV_MEM_ASSERT_RW(&m, sizeof m);	\
+} while (0)
+#else /* UNIV_DEBUG_VALGRIND */
+# define MONITOR_CHECK_DEFINED(value) (void) 0
+#endif /* UNIV_DEBUG_VALGRIND */
+
 #define	MONITOR_INC_VALUE(monitor, value)				\
+	MONITOR_CHECK_DEFINED(value);					\
 	if (MONITOR_IS_ON(monitor)) {					\
 		MONITOR_VALUE(monitor) += (mon_type_t) (value);		\
 		if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) {  \
@@ -577,6 +643,7 @@ on the counters */
 	}
 
 #define	MONITOR_DEC_VALUE(monitor, value)				\
+	MONITOR_CHECK_DEFINED(value);					\
 	if (MONITOR_IS_ON(monitor)) {					\
 		ut_ad(MONITOR_VALUE(monitor) >= (mon_type_t) (value);	\
 		MONITOR_VALUE(monitor) -= (mon_type_t) (value);		\
@@ -605,6 +672,7 @@ could already be checked as a module group */
 
 /** Directly set a monitor counter's value */
 #define	MONITOR_SET(monitor, value)					\
+	MONITOR_CHECK_DEFINED(value);					\
 	if (MONITOR_IS_ON(monitor)) {					\
 		MONITOR_VALUE(monitor) = (mon_type_t) (value);		\
 		if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) {  \
@@ -617,9 +685,10 @@ could already be checked as a module group */
 
 /** Add time difference between now and input "value" (in seconds) to the
 monitor counter
-@monitor	monitor to update for the time difference
-@value		the start time value */
+@param monitor	monitor to update for the time difference
+@param value	the start time value */
 #define	MONITOR_INC_TIME_IN_MICRO_SECS(monitor, value)			\
+	MONITOR_CHECK_DEFINED(value);					\
 	if (MONITOR_IS_ON(monitor)) {					\
 		ullint	old_time = (value);				\
 		value = ut_time_us(NULL);				\
@@ -629,15 +698,16 @@ monitor counter
 /** This macro updates 3 counters in one call. However, it only checks the
 main/first monitor counter 'monitor', to see it is on or off to decide
 whether to do the update.
-@monitor		the main monitor counter to update. It accounts for
+@param monitor		the main monitor counter to update. It accounts for
 			the accumulative value for the counter.
-@monitor_n_calls	counter that counts number of times this macro is
+@param monitor_n_calls	counter that counts number of times this macro is
 			called
-@monitor_per_call	counter that records the current and max value of
+@param monitor_per_call	counter that records the current and max value of
 			each incremental value
-@value			incremental value to record this time */
+@param value		incremental value to record this time */
 #define MONITOR_INC_VALUE_CUMULATIVE(					\
 		monitor, monitor_n_calls, monitor_per_call, value)	\
+	MONITOR_CHECK_DEFINED(value);					\
 	if (MONITOR_IS_ON(monitor)) {					\
 		MONITOR_VALUE(monitor_n_calls)++;			\
 		MONITOR_VALUE(monitor_per_call) = (mon_type_t) (value);	\
@@ -655,6 +725,7 @@ whether to do the update.
 /** Directly set a monitor counter's value, and if the value
 is monotonically increasing, only max value needs to be updated */
 #define	MONITOR_SET_UPD_MAX_ONLY(monitor, value)			\
+	MONITOR_CHECK_DEFINED(value);					\
 	if (MONITOR_IS_ON(monitor)) {					\
 		MONITOR_VALUE(monitor) = (mon_type_t) (value);		\
 		if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) {  \
@@ -665,6 +736,7 @@ is monotonically increasing, only max value needs to be updated */
 /** Some values such as log sequence number are montomically increasing
 number, do not need to record max/min values */
 #define MONITOR_SET_SIMPLE(monitor, value)				\
+	MONITOR_CHECK_DEFINED(value);					\
 	if (MONITOR_IS_ON(monitor)) {					\
 		MONITOR_VALUE(monitor) = (mon_type_t) (value);		\
 	}
@@ -693,9 +765,11 @@ consolidate information from existing system status variables. */
 
 /** Save the passed-in value to mon_start_value field of monitor
 counters */
-#define MONITOR_SAVE_START(monitor, value)				\
+#define MONITOR_SAVE_START(monitor, value) do {				\
+	MONITOR_CHECK_DEFINED(value);					\
 	(MONITOR_START_VALUE(monitor) =					\
-		 (mon_type_t) (value) - MONITOR_VALUE_RESET(monitor))
+		(mon_type_t) (value) - MONITOR_VALUE_RESET(monitor));	\
+	} while (0)
 
 /** Save the passed-in value to mon_last_value field of monitor
 counters */
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 99cff251e3c..201f19c0cd8 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -50,22 +50,91 @@ Created 10/10/1995 Heikki Tuuri
 #include "trx0types.h"
 #include "srv0conc.h"
 #include "buf0checksum.h"
+#include "ut0counter.h"
+
+/* Global counters used inside InnoDB. */
+struct srv_stats_t {
+	typedef ib_counter_t<lsn_t, 1, single_indexer_t> lsn_ctr_1_t;
+	typedef ib_counter_t<ulint, 1, single_indexer_t> ulint_ctr_1_t;
+	typedef ib_counter_t<lint, 1, single_indexer_t> lint_ctr_1_t;
+	typedef ib_counter_t<ulint, 64> ulint_ctr_64_t;
+	typedef ib_counter_t<ib_int64_t, 1, single_indexer_t> ib_int64_ctr_1_t;
+
+	/** Count the amount of data written in total (in bytes) */
+	ulint_ctr_1_t		data_written;
+
+	/** Number of the log write requests done */
+	ulint_ctr_1_t		log_write_requests;
+
+	/** Number of physical writes to the log performed */
+	ulint_ctr_1_t		log_writes;
+
+	/** Amount of data written to the log files in bytes */
+	lsn_ctr_1_t		os_log_written;
+
+	/** Number of writes being done to the log files */
+	lint_ctr_1_t		os_log_pending_writes;
+
+	/** We increase this counter, when we don't have enough
+	space in the log buffer and have to flush it */
+	ulint_ctr_1_t		log_waits;
+
+	/** Count the number of times the doublewrite buffer was flushed */
+	ulint_ctr_1_t		dblwr_writes;
+
+	/** Store the number of pages that have been flushed to the
+	doublewrite buffer */
+	ulint_ctr_1_t		dblwr_pages_written;
+
+	/** Store the number of write requests issued */
+	ulint_ctr_1_t		buf_pool_write_requests;
+
+	/** Store the number of times when we had to wait for a free page
+	in the buffer pool. It happens when the buffer pool is full and we
+	need to make a flush, in order to be able to read or create a page. */
+	ulint_ctr_1_t		buf_pool_wait_free;
+
+	/** Count the number of pages that were written from buffer
+	pool to the disk */
+	ulint_ctr_1_t		buf_pool_flushed;
+
+	/** Number of buffer pool reads that led to the reading of
+	a disk page */
+	ulint_ctr_1_t		buf_pool_reads;
+
+	/** Number of data read in total (in bytes) */
+	ulint_ctr_1_t		data_read;
+
+	/** Wait time of database locks */
+	ib_int64_ctr_1_t	n_lock_wait_time;
+
+	/** Number of database lock waits */
+	ulint_ctr_1_t		n_lock_wait_count;
+
+	/** Number of threads currently waiting on database locks */
+	lint_ctr_1_t		n_lock_wait_current_count;
+
+	/** Number of rows read. */
+	ulint_ctr_64_t		n_rows_read;
+
+	/** Number of rows updated */
+	ulint_ctr_64_t		n_rows_updated;
+
+	/** Number of rows deleted */
+	ulint_ctr_64_t		n_rows_deleted;
+
+	/** Number of rows inserted */
+	ulint_ctr_64_t		n_rows_inserted;
+};
 
 extern const char*	srv_main_thread_op_info;
 
 /** Prefix used by MySQL to indicate pre-5.1 table name encoding */
 extern const char	srv_mysql50_table_name_prefix[10];
 
-/* When this event is set the lock timeout and InnoDB monitor
-thread starts running */
-extern os_event_t	srv_lock_timeout_thread_event;
-
 /* The monitor thread waits on this event. */
 extern os_event_t	srv_monitor_event;
 
-/* The lock timeout thread waits on this event. */
-extern os_event_t	srv_timeout_event;
-
 /* The error monitor thread waits on this event. */
 extern os_event_t	srv_error_event;
 
@@ -89,20 +158,20 @@ at a time */
 #define SRV_AUTO_EXTEND_INCREMENT	\
 	(srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
 
-/* Mutex for locking srv_monitor_file */
-extern mutex_t	srv_monitor_file_mutex;
+/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
+extern ib_mutex_t	srv_monitor_file_mutex;
 /* Temporary file for innodb monitor output */
 extern FILE*	srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile.
+/* Mutex for locking srv_dict_tmpfile. Only created if !srv_read_only_mode.
 This mutex has a very high rank; threads reserving it should not
 be holding any InnoDB latches. */
-extern mutex_t	srv_dict_tmpfile_mutex;
+extern ib_mutex_t	srv_dict_tmpfile_mutex;
 /* Temporary file for output from the data dictionary */
 extern FILE*	srv_dict_tmpfile;
-/* Mutex for locking srv_misc_tmpfile.
+/* Mutex for locking srv_misc_tmpfile. Only created if !srv_read_only_mode.
 This mutex has a very low rank; threads reserving it should not
 acquire any further latches or sleep before releasing this one. */
-extern mutex_t	srv_misc_tmpfile_mutex;
+extern ib_mutex_t	srv_misc_tmpfile_mutex;
 /* Temporary file for miscellanous diagnostic output */
 extern FILE*	srv_misc_tmpfile;
 
@@ -114,6 +183,10 @@ extern char*	srv_data_home;
 extern char*	srv_arch_dir;
 #endif /* UNIV_LOG_ARCHIVE */
 
+/** Set if InnoDB must operate in read-only mode. We don't do any
+recovery and open all tables in RO mode instead of RW mode. We don't
+sync the max trx id to disk either. */
+extern my_bool	srv_read_only_mode;
 /** store to its own file each table created by an user; data
 dictionary tables are in the system tablespace 0 */
 extern my_bool	srv_file_per_table;
@@ -134,8 +207,10 @@ extern ulint	srv_max_file_format_at_startup;
 on duplicate key checking and foreign key checking */
 extern ibool	srv_locks_unsafe_for_binlog;
 
-/* Variable specifying the FTS parallel sort buffer size */
+/** Sort buffer size in index creation */
 extern ulong	srv_sort_buf_size;
+/** Maximum modification log file size for online index creation */
+extern unsigned long long	srv_online_max_size;
 
 /* If this flag is TRUE, then we will use the native aio of the
 OS (provided we compiled Innobase with it in), otherwise we will
@@ -153,6 +228,9 @@ extern char*	srv_undo_dir;
 /** Number of undo tablespaces to use. */
 extern ulong	srv_undo_tablespaces;
 
+/** The number of UNDO tablespaces that are open and ready to use. */
+extern ulint	srv_undo_tablespaces_open;
+
 /* The number of undo segments to use */
 extern ulong	srv_undo_logs;
 
@@ -163,17 +241,20 @@ extern ulint*	srv_data_file_is_raw_partition;
 
 extern ibool	srv_auto_extend_last_data_file;
 extern ulint	srv_last_file_size_max;
-extern char**	srv_log_group_home_dirs;
+extern char*	srv_log_group_home_dir;
 #ifndef UNIV_HOTBACKUP
 extern ulong	srv_auto_extend_increment;
 
 extern ibool	srv_created_new_raw;
 
-extern ulint	srv_n_log_groups;
-extern ulint	srv_n_log_files;
+/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
+#define SRV_N_LOG_FILES_MAX 100
+extern ulong	srv_n_log_files;
 extern ib_uint64_t	srv_log_file_size;
+extern ib_uint64_t	srv_log_file_size_requested;
 extern ulint	srv_log_buffer_size;
 extern ulong	srv_flush_log_at_trx_commit;
+extern uint	srv_flush_log_at_timeout;
 extern char	srv_adaptive_flushing;
 
 /* If this flag is TRUE, then we will load the indexes' (and tables') metadata
@@ -195,7 +276,7 @@ extern ulong	srv_n_page_hash_locks;	/*!< number of locks to
 					protect buf_pool->page_hash */
 extern ulong	srv_LRU_scan_depth;	/*!< Scan depth for LRU
 					flush batch */
-extern my_bool	srv_flush_neighbors;	/*!< whether or not to flush
+extern ulong	srv_flush_neighbors;	/*!< whether or not to flush
 					neighbors of a block */
 extern ulint	srv_buf_pool_old_size;	/*!< previously requested size */
 extern ulint	srv_buf_pool_curr_size;	/*!< current size in bytes */
@@ -210,6 +291,12 @@ extern ulint	srv_n_write_io_threads;
 
 /* Number of IO operations per second the server can do */
 extern ulong    srv_io_capacity;
+
+/* We use this dummy default value at startup for max_io_capacity.
+The real value is set based on the value of io_capacity. */
+#define SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT	(~0UL)
+#define SRV_MAX_IO_CAPACITY_LIMIT		(~0UL)
+extern ulong    srv_max_io_capacity;
 /* Returns the number of IO operations that is X percent of the
 capacity. PCT_IO(5) -> returns the number of IO operations that
 is 5% of the max where max is srv_io_capacity.  */
@@ -232,9 +319,16 @@ extern ulint	srv_win_file_flush_method;
 
 extern ulint	srv_max_n_open_files;
 
-extern ulint	srv_max_dirty_pages_pct;
+extern ulong	srv_max_dirty_pages_pct;
+extern ulong	srv_max_dirty_pages_pct_lwm;
+
+extern ulong	srv_adaptive_flushing_lwm;
+extern ulong	srv_flushing_avg_loops;
 
-extern ulint	srv_force_recovery;
+extern ulong	srv_force_recovery;
+#ifndef DBUG_OFF
+extern ulong	srv_force_recovery_crash;
+#endif /* !DBUG_OFF */
 
 extern ulint	srv_fast_shutdown;	/*!< If this is 1, do not do a
 					purge and index buffer merge.
@@ -246,7 +340,9 @@ extern ulint	srv_fast_shutdown;	/*!< If this is 1, do not do a
 extern ibool	srv_innodb_status;
 
 extern unsigned long long	srv_stats_transient_sample_pages;
+extern my_bool			srv_stats_persistent;
 extern unsigned long long	srv_stats_persistent_sample_pages;
+extern my_bool			srv_stats_auto_recalc;
 
 extern ibool	srv_use_doublewrite_buf;
 extern ulong	srv_doublewrite_batch_size;
@@ -259,11 +355,6 @@ extern ulong	srv_max_purge_lag_delay;
 extern ulong	srv_replication_delay;
 /*-------------------------------------------*/
 
-extern ulint	srv_n_rows_inserted;
-extern ulint	srv_n_rows_updated;
-extern ulint	srv_n_rows_deleted;
-extern ulint	srv_n_rows_read;
-
 extern ibool	srv_print_innodb_monitor;
 extern ibool	srv_print_innodb_lock_monitor;
 extern ibool	srv_print_innodb_tablespace_monitor;
@@ -274,21 +365,21 @@ extern ibool	srv_print_verbose_log;
 	"tables instead, see " REFMAN "innodb-i_s-tables.html"
 extern ibool	srv_print_innodb_table_monitor;
 
-extern ibool	srv_lock_timeout_active;
 extern ibool	srv_monitor_active;
 extern ibool	srv_error_monitor_active;
 
 /* TRUE during the lifetime of the buffer pool dump/load thread */
 extern ibool	srv_buf_dump_thread_active;
 
+/* TRUE during the lifetime of the stats thread */
+extern ibool	srv_dict_stats_thread_active;
+
 extern ulong	srv_n_spin_wait_rounds;
 extern ulong	srv_n_free_tickets_to_enter;
 extern ulong	srv_thread_sleep_delay;
 extern ulong	srv_spin_wait_delay;
 extern ibool	srv_priority_boost;
 
-extern ulint	srv_n_lock_wait_count;
-
 extern ulint	srv_truncated_status_writes;
 extern ulint	srv_available_undo_logs;
 
@@ -309,12 +400,21 @@ extern	ibool	srv_print_latch_waits;
 # define srv_print_latch_waits		FALSE
 #endif /* UNIV_DEBUG */
 
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+extern my_bool	srv_ibuf_disable_background_merge;
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
+#ifdef UNIV_DEBUG
+extern my_bool	srv_purge_view_update_only_debug;
+#endif /* UNIV_DEBUG */
+
 extern ulint	srv_fatal_semaphore_wait_threshold;
+#define SRV_SEMAPHORE_WAIT_EXTENSION	7200
 extern ulint	srv_dml_needed_delay;
 
 #ifndef HAVE_ATOMIC_BUILTINS
 /** Mutex protecting some server global variables. */
-extern mutex_t	server_mutex;
+extern ib_mutex_t	server_mutex;
 #endif /* !HAVE_ATOMIC_BUILTINS */
 
 #define SRV_MAX_N_IO_THREADS	130
@@ -324,22 +424,6 @@ i/o handler thread */
 extern const char* srv_io_thread_op_info[];
 extern const char* srv_io_thread_function[];
 
-/* the number of the log write requests done */
-extern ulint srv_log_write_requests;
-
-/* the number of physical writes to the log performed */
-extern ulint srv_log_writes;
-
-/* amount of data written to the log files in bytes */
-extern lsn_t srv_os_log_written;
-
-/* amount of writes being done to the log files */
-extern ulint srv_os_log_pending_writes;
-
-/* we increase this counter, when there we don't have enough space in the
-log buffer and have to flush it */
-extern ulint srv_log_waits;
-
 /* the number of purge threads to use from the worker pool (currently 0 or 1) */
 extern ulong srv_n_purge_threads;
 
@@ -349,50 +433,16 @@ extern ulong srv_purge_batch_size;
 /* the number of sync wait arrays */
 extern ulong srv_sync_array_size;
 
-/* variable that counts amount of data read in total (in bytes) */
-extern ulint srv_data_read;
-
-/* here we count the amount of data written in total (in bytes) */
-extern ulint srv_data_written;
-
-/* this variable counts the amount of times, when the doublewrite buffer
-was flushed */
-extern ulint srv_dblwr_writes;
-
-/* here we store the number of pages that have been flushed to the
-doublewrite buffer */
-extern ulint srv_dblwr_pages_written;
-
-/* in this variable we store the number of write requests issued */
-extern ulint srv_buf_pool_write_requests;
-
-/* here we store the number of times when we had to wait for a free page
-in the buffer pool. It happens when the buffer pool is full and we need
-to make a flush, in order to be able to read or create a page. */
-extern ulint srv_buf_pool_wait_free;
-
-/* variable to count the number of pages that were written from the
-buffer pool to disk */
-extern ulint srv_buf_pool_flushed;
-
-/** Number of buffer pool reads that led to the
-reading of a disk page */
-extern ulint srv_buf_pool_reads;
-
 /* print all user-level transactions deadlocks to mysqld stderr */
 extern my_bool srv_print_all_deadlocks;
 
-/** Status variables to be passed to MySQL */
-typedef struct export_var_struct export_struc;
-
-/** Thread slot in the thread table */
-typedef struct srv_slot_struct	srv_slot_t;
-
-/** Thread table is an array of slots */
-typedef srv_slot_t	srv_table_t;
+extern my_bool	srv_cmp_per_index_enabled;
 
 /** Status variables to be passed to MySQL */
-extern export_struc export_vars;
+extern struct export_var_t export_vars;
+
+/** Global counters */
+extern srv_stats_t	srv_stats;
 
 # ifdef UNIV_PFS_THREAD
 /* Keys to register InnoDB threads with performance schema */
@@ -404,19 +454,20 @@ extern mysql_pfs_key_t	srv_error_monitor_thread_key;
 extern mysql_pfs_key_t	srv_monitor_thread_key;
 extern mysql_pfs_key_t	srv_master_thread_key;
 extern mysql_pfs_key_t	srv_purge_thread_key;
+extern mysql_pfs_key_t	recv_writer_thread_key;
 
 /* This macro register the current thread and its key with performance
 schema */
 #  define pfs_register_thread(key)			\
 do {								\
-	struct PSI_thread* psi = PSI_CALL(new_thread)(key, NULL, 0);\
-	PSI_CALL(set_thread)(psi);				\
+	struct PSI_thread* psi = PSI_THREAD_CALL(new_thread)(key, NULL, 0);\
+	PSI_THREAD_CALL(set_thread)(psi);			\
 } while (0)
 
 /* This macro delist the current thread from performance schema */
 #  define pfs_delete_thread()				\
 do {								\
-	PSI_CALL(delete_current_thread)();			\
+	PSI_THREAD_CALL(delete_current_thread)();		\
 } while (0)
 # endif /* UNIV_PFS_THREAD */
 
@@ -439,8 +490,19 @@ enum {
 				when writing data files, but do flush
 				after writing to log files */
 	SRV_UNIX_NOSYNC,	/*!< do not flush after writing */
-	SRV_UNIX_O_DIRECT	/*!< invoke os_file_set_nocache() on
-				data files */
+	SRV_UNIX_O_DIRECT,	/*!< invoke os_file_set_nocache() on
+				data files. This implies using
+				non-buffered IO but still using fsync,
+				the reason for which is that some FS
+				do not flush meta-data when
+				unbuffered IO happens */
+	SRV_UNIX_O_DIRECT_NO_FSYNC
+				/*!< do not use fsync() when using
+				direct IO i.e.: it can be set to avoid
+				the fsync() call that we make when
+				using SRV_UNIX_O_DIRECT. However, in
+				this case user/DBA should be sure about
+				the integrity of the meta-data */
 };
 
 /** Alternatives for file i/o in Windows */
@@ -499,10 +561,9 @@ enum srv_thread_type {
 };
 
 /*********************************************************************//**
-Boots Innobase server.
-@return	DB_SUCCESS or error code */
+Boots Innobase server. */
 UNIV_INTERN
-ulint
+void
 srv_boot(void);
 /*==========*/
 /*********************************************************************//**
@@ -533,6 +594,12 @@ srv_set_io_thread_op_info(
 	ulint		i,	/*!< in: the 'segment' of the i/o thread */
 	const char*	str);	/*!< in: constant char string describing the
 				state */
+/*********************************************************************//**
+Resets the info describing an i/o thread current state. */
+UNIV_INTERN
+void
+srv_reset_io_thread_op_info();
+/*=========================*/
 /*******************************************************************//**
 Tells the purge thread that there has been activity in the database
 and wakes up the purge thread if it is suspended (not sleeping).  Note
@@ -714,7 +781,7 @@ srv_purge_wakeup(void);
 /*==================*/
 
 /** Status variables to be passed to MySQL */
-struct export_var_struct{
+struct export_var_t{
 	ulint innodb_data_pending_reads;	/*!< Pending reads */
 	ulint innodb_data_pending_writes;	/*!< Pending writes */
 	ulint innodb_data_pending_fsyncs;	/*!< Pending fsyncs */
@@ -727,7 +794,9 @@ struct export_var_struct{
 	char  innodb_buffer_pool_load_status[512];/*!< Buf pool load status */
 	ulint innodb_buffer_pool_pages_total;	/*!< Buffer pool size */
 	ulint innodb_buffer_pool_pages_data;	/*!< Data pages */
+	ulint innodb_buffer_pool_bytes_data;	/*!< File bytes used */
 	ulint innodb_buffer_pool_pages_dirty;	/*!< Dirty data pages */
+	ulint innodb_buffer_pool_bytes_dirty;	/*!< File bytes modified */
 	ulint innodb_buffer_pool_pages_misc;	/*!< Miscellanous pages */
 	ulint innodb_buffer_pool_pages_free;	/*!< Free pages */
 #ifdef UNIV_DEBUG
@@ -771,10 +840,15 @@ struct export_var_struct{
 	ulint innodb_num_open_files;		/*!< fil_n_file_opened */
 	ulint innodb_truncated_status_writes;	/*!< srv_truncated_status_writes */
 	ulint innodb_available_undo_logs;       /*!< srv_available_undo_logs */
+#ifdef UNIV_DEBUG
+	ulint innodb_purge_trx_id_age;		/*!< rw_max_trx_id - purged trx_id */
+	ulint innodb_purge_view_trx_id_age;	/*!< rw_max_trx_id
+						- purged view's min trx_id */
+#endif /* UNIV_DEBUG */
 };
 
 /** Thread slot in the thread table.  */
-struct srv_slot_struct{
+struct srv_slot_t{
 	srv_thread_type type;			/*!< thread type: user,
 						utility etc. */
 	ibool		in_use;			/*!< TRUE if this slot
@@ -803,6 +877,7 @@ struct srv_slot_struct{
 # define srv_use_native_aio			FALSE
 # define srv_force_recovery			0UL
 # define srv_set_io_thread_op_info(t,info)	((void) 0)
+# define srv_reset_io_thread_op_info()		((void) 0)
 # define srv_is_being_started			0
 # define srv_win_file_flush_method		SRV_WIN_IO_UNBUFFERED
 # define srv_unix_file_flush_method		SRV_UNIX_O_DSYNC
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
index 9d948675011..e136f30f96a 100644
--- a/storage/innobase/include/srv0start.h
+++ b/storage/innobase/include/srv0start.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -83,24 +83,50 @@ Starts Innobase and creates a new database if database files
 are not found and the user wants.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-int
+dberr_t
 innobase_start_or_create_for_mysql(void);
 /*====================================*/
 /****************************************************************//**
 Shuts down the Innobase database.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-int
+dberr_t
 innobase_shutdown_for_mysql(void);
 
 /********************************************************************
 Signal all per-table background threads to shutdown, and wait for them to do
 so. */
-
+UNIV_INTERN
 void
 srv_shutdown_table_bg_threads(void);
-
 /*=============================*/
+
+/*************************************************************//**
+Copy the file path component of the physical file to parameter. It will
+copy up to and including the terminating path separator.
+@return number of bytes copied or ULINT_UNDEFINED if destination buffer
+	is smaller than the path to be copied. */
+UNIV_INTERN
+ulint
+srv_path_copy(
+/*==========*/
+	char*		dest,		/*!< out: destination buffer */
+	ulint		dest_len,	/*!< in: max bytes to copy */
+	const char*	basedir,	/*!< in: base directory */
+	const char*	table_name)	/*!< in: source table name */
+	__attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Get the meta-data filename from the table name. */
+UNIV_INTERN
+void
+srv_get_meta_data_filename(
+/*======================*/
+	dict_table_t*	table,		/*!< in: table */
+	char*			filename,	/*!< out: filename */
+	ulint			max_len)	/*!< in: filename max length */
+	__attribute__((nonnull));
+
 /** Log sequence number at shutdown */
 extern	lsn_t	srv_shutdown_lsn;
 /** Log sequence number immediately after startup */
diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h
index 56f9ff78c49..bb4d1037a62 100644
--- a/storage/innobase/include/sync0arr.h
+++ b/storage/innobase/include/sync0arr.h
@@ -32,9 +32,9 @@ Created 9/5/1995 Heikki Tuuri
 #include "os0thread.h"
 
 /** Synchronization wait array cell */
-typedef struct sync_cell_struct		sync_cell_t;
+struct sync_cell_t;
 /** Synchronization wait array */
-typedef struct sync_array_struct	sync_array_t;
+struct sync_array_t;
 
 /******************************************************************//**
 Reserves a wait array cell for waiting for an object.
diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
index b0c21d0c76b..c268098d1ea 100644
--- a/storage/innobase/include/sync0rw.h
+++ b/storage/innobase/include/sync0rw.h
@@ -36,6 +36,7 @@ Created 9/11/1995 Heikki Tuuri
 #include "univ.i"
 #ifndef UNIV_HOTBACKUP
 #include "ut0lst.h"
+#include "ut0counter.h"
 #include "sync0sync.h"
 #include "os0sync.h"
 
@@ -44,6 +45,43 @@ in MySQL: */
 #undef rw_lock_t
 #endif /* !UNIV_HOTBACKUP */
 
+/** Counters for RW locks. */
+struct rw_lock_stats_t {
+	typedef ib_counter_t<ib_int64_t, IB_N_SLOTS> ib_int64_counter_t;
+
+	/** number of spin waits on rw-latches,
+	resulted during shared (read) locks */
+	ib_int64_counter_t	rw_s_spin_wait_count;
+
+	/** number of spin loop rounds on rw-latches,
+	resulted during shared (read) locks */
+	ib_int64_counter_t	rw_s_spin_round_count;
+
+	/** number of OS waits on rw-latches,
+	resulted during shared (read) locks */
+	ib_int64_counter_t	rw_s_os_wait_count;
+
+	/** number of unlocks (that unlock shared locks),
+	set only when UNIV_SYNC_PERF_STAT is defined */
+	ib_int64_counter_t	rw_s_exit_count;
+
+	/** number of spin waits on rw-latches,
+	resulted during exclusive (write) locks */
+	ib_int64_counter_t	rw_x_spin_wait_count;
+
+	/** number of spin loop rounds on rw-latches,
+	resulted during exclusive (write) locks */
+	ib_int64_counter_t	rw_x_spin_round_count;
+
+	/** number of OS waits on rw-latches,
+	resulted during exclusive (write) locks */
+	ib_int64_counter_t	rw_x_os_wait_count;
+
+	/** number of unlocks (that unlock exclusive locks),
+	set only when UNIV_SYNC_PERF_STAT is defined */
+	ib_int64_counter_t	rw_x_exit_count;
+};
+
 /* Latch types; these are used also in btr0btr.h: keep the numerical values
 smaller than 30 and the order of the numerical values like below! */
 #define RW_S_LATCH	1
@@ -57,22 +95,22 @@ of concurrent read locks before the rw_lock breaks. The current value of
 0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
 #define X_LOCK_DECR		0x00100000
 
-typedef struct rw_lock_struct		rw_lock_t;
+struct rw_lock_t;
 #ifdef UNIV_SYNC_DEBUG
-typedef struct rw_lock_debug_struct	rw_lock_debug_t;
+struct rw_lock_debug_t;
 #endif /* UNIV_SYNC_DEBUG */
 
 typedef UT_LIST_BASE_NODE_T(rw_lock_t)	rw_lock_list_t;
 
 extern rw_lock_list_t	rw_lock_list;
-extern mutex_t		rw_lock_list_mutex;
+extern ib_mutex_t		rw_lock_list_mutex;
 
 #ifdef UNIV_SYNC_DEBUG
 /* The global mutex which protects debug info lists of all rw-locks.
 To modify the debug info list of an rw-lock, this mutex has to be
 
 acquired in addition to the mutex protecting the lock. */
-extern mutex_t		rw_lock_debug_mutex;
+extern ib_mutex_t		rw_lock_debug_mutex;
 extern os_event_t	rw_lock_debug_event;	/*!< If deadlock detection does
 					not get immediately the mutex it
 					may wait for this event */
@@ -80,30 +118,8 @@ extern ibool		rw_lock_debug_waiters;	/*!< This is set to TRUE, if
 					there may be waiters for the event */
 #endif /* UNIV_SYNC_DEBUG */
 
-/** number of spin waits on rw-latches,
-resulted during exclusive (write) locks */
-extern	ib_int64_t	rw_s_spin_wait_count;
-/** number of spin loop rounds on rw-latches,
-resulted during exclusive (write) locks */
-extern	ib_int64_t	rw_s_spin_round_count;
-/** number of unlocks (that unlock shared locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-extern	ib_int64_t	rw_s_exit_count;
-/** number of OS waits on rw-latches,
-resulted during shared (read) locks */
-extern	ib_int64_t	rw_s_os_wait_count;
-/** number of spin waits on rw-latches,
-resulted during shared (read) locks */
-extern	ib_int64_t	rw_x_spin_wait_count;
-/** number of spin loop rounds on rw-latches,
-resulted during shared (read) locks */
-extern	ib_int64_t	rw_x_spin_round_count;
-/** number of OS waits on rw-latches,
-resulted during exclusive (write) locks */
-extern	ib_int64_t	rw_x_os_wait_count;
-/** number of unlocks (that unlock exclusive locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-extern	ib_int64_t	rw_x_exit_count;
+/** Counters for RW locks. */
+extern rw_lock_stats_t	rw_lock_stats;
 
 #ifdef UNIV_PFS_RWLOCK
 /* Following are rwlock keys used to register with MySQL
@@ -121,10 +137,10 @@ extern	mysql_pfs_key_t	checkpoint_lock_key;
 extern	mysql_pfs_key_t	fil_space_latch_key;
 extern	mysql_pfs_key_t	fts_cache_rw_lock_key;
 extern	mysql_pfs_key_t	fts_cache_init_rw_lock_key;
-extern	mysql_pfs_key_t	index_tree_rw_lock_key;
 extern	mysql_pfs_key_t	trx_i_s_cache_lock_key;
 extern	mysql_pfs_key_t	trx_purge_latch_key;
 extern	mysql_pfs_key_t	index_tree_rw_lock_key;
+extern	mysql_pfs_key_t	index_online_log_key;
 extern	mysql_pfs_key_t	dict_table_stats_latch_key;
 extern  mysql_pfs_key_t trx_sys_rw_lock_key;
 extern  mysql_pfs_key_t hash_table_rw_lock_key;
@@ -159,6 +175,9 @@ unlocking, not the corresponding function. */
 # define rw_lock_s_lock(M)					\
 	rw_lock_s_lock_func((M), 0, __FILE__, __LINE__)
 
+# define rw_lock_s_lock_inline(M, P, F, L)			\
+	rw_lock_s_lock_func((M), (P), (F), (L))
+
 # define rw_lock_s_lock_gen(M, P)				\
 	rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
 
@@ -175,12 +194,18 @@ unlocking, not the corresponding function. */
 # define rw_lock_x_lock(M)					\
 	rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
 
+# define rw_lock_x_lock_inline(M, P, F, L)			\
+	rw_lock_x_lock_func((M), (P), (F), (L))
+
 # define rw_lock_x_lock_gen(M, P)				\
 	rw_lock_x_lock_func((M), (P), __FILE__, __LINE__)
 
 # define rw_lock_x_lock_nowait(M)				\
 	rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__)
 
+# define rw_lock_x_lock_func_nowait_inline(M, F, L)		\
+	rw_lock_x_lock_func_nowait((M), (F), (L))
+
 # ifdef UNIV_SYNC_DEBUG
 #  define rw_lock_x_unlock_gen(L, P)	rw_lock_x_unlock_func(P, L)
 # else
@@ -212,6 +237,9 @@ unlocking, not the corresponding function. */
 # define rw_lock_s_lock(M)					\
 	pfs_rw_lock_s_lock_func((M), 0, __FILE__, __LINE__)
 
+# define rw_lock_s_lock_inline(M, P, F, L)			\
+	pfs_rw_lock_s_lock_func((M), (P), (F), (L))
+
 # define rw_lock_s_lock_gen(M, P)				\
 	pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
 
@@ -227,12 +255,18 @@ unlocking, not the corresponding function. */
 # define rw_lock_x_lock(M)					\
 	pfs_rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
 
+# define rw_lock_x_lock_inline(M, P, F, L)			\
+	pfs_rw_lock_x_lock_func((M), (P), (F), (L))
+
 # define rw_lock_x_lock_gen(M, P)				\
 	pfs_rw_lock_x_lock_func((M), (P), __FILE__, __LINE__)
 
 # define rw_lock_x_lock_nowait(M)				\
 	pfs_rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__)
 
+# define rw_lock_x_lock_func_nowait_inline(M, F, L)		\
+	pfs_rw_lock_x_lock_func_nowait((M), (F), (L))
+
 # ifdef UNIV_SYNC_DEBUG
 #  define rw_lock_x_unlock_gen(L, P)	pfs_rw_lock_x_unlock_func(P, L)
 # else
@@ -367,30 +401,6 @@ rw_lock_x_unlock_func(
 				been passed to another thread to unlock */
 #endif
 	rw_lock_t*	lock);	/*!< in/out: rw-lock */
-
-
-/******************************************************************//**
-Low-level function which locks an rw-lock in s-mode when we know that it
-is possible and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_s_lock_direct(
-/*==================*/
-	rw_lock_t*	lock,		/*!< in/out: rw-lock */
-	const char*	file_name,	/*!< in: file name where requested */
-	ulint		line);		/*!< in: line where lock requested */
-/******************************************************************//**
-Low-level function which locks an rw-lock in x-mode when we know that it
-is not locked and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_x_lock_direct(
-/*==================*/
-	rw_lock_t*	lock,		/*!< in/out: rw-lock */
-	const char*	file_name,	/*!< in: file name where requested */
-	ulint		line);		/*!< in: line where lock requested */
 /******************************************************************//**
 This function is used in the insert buffer to move the ownership of an
 x-latch on a buffer frame to the current thread. The x-latch was set by
@@ -558,7 +568,7 @@ shared locks are allowed. To prevent starving of a writer blocked by
 readers, a writer may queue for x-lock by decrementing lock_word: no
 new readers will be let in while the thread waits for readers to
 exit. */
-struct rw_lock_struct {
+struct rw_lock_t {
 	volatile lint	lock_word;
 				/*!< Holds the state of the lock. */
 	volatile ulint	waiters;/*!< 1: there are waiters */
@@ -583,7 +593,7 @@ struct rw_lock_struct {
 				/*!< Event for next-writer to wait on. A thread
 				must decrement lock_word before waiting. */
 #ifndef INNODB_RW_LOCKS_USE_ATOMICS
-	mutex_t	mutex;		/*!< The mutex protecting rw_lock_struct */
+	ib_mutex_t	mutex;		/*!< The mutex protecting rw_lock_t */
 #endif /* INNODB_RW_LOCKS_USE_ATOMICS */
 
 	UT_LIST_NODE_T(rw_lock_t) list;
@@ -615,7 +625,7 @@ struct rw_lock_struct {
 	unsigned	last_x_line:14;	/*!< Line number where last time x-locked */
 #ifdef UNIV_DEBUG
 	ulint	magic_n;	/*!< RW_LOCK_MAGIC_N */
-/** Value of rw_lock_struct::magic_n */
+/** Value of rw_lock_t::magic_n */
 #define	RW_LOCK_MAGIC_N	22643
 #endif /* UNIV_DEBUG */
 };
@@ -623,7 +633,7 @@ struct rw_lock_struct {
 #ifdef UNIV_SYNC_DEBUG
 /** The structure for storing debug info of an rw-lock.  All access to this
 structure must be protected by rw_lock_debug_mutex_enter(). */
-struct	rw_lock_debug_struct {
+struct	rw_lock_debug_t {
 
 	os_thread_id_t thread_id;  /*!< The thread id of the thread which
 				locked the rw-lock */
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
index eab89e2619e..8786ad84643 100644
--- a/storage/innobase/include/sync0rw.ic
+++ b/storage/innobase/include/sync0rw.ic
@@ -90,7 +90,7 @@ rw_lock_set_waiter_flag(
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
 #ifdef INNODB_RW_LOCKS_USE_ATOMICS
-	os_compare_and_swap_ulint(&lock->waiters, 0, 1);
+	(void) os_compare_and_swap_ulint(&lock->waiters, 0, 1);
 #else /* INNODB_RW_LOCKS_USE_ATOMICS */
 	lock->waiters = 1;
 #endif /* INNODB_RW_LOCKS_USE_ATOMICS */
@@ -107,7 +107,7 @@ rw_lock_reset_waiter_flag(
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
 #ifdef INNODB_RW_LOCKS_USE_ATOMICS
-	os_compare_and_swap_ulint(&lock->waiters, 1, 0);
+	(void) os_compare_and_swap_ulint(&lock->waiters, 1, 0);
 #else /* INNODB_RW_LOCKS_USE_ATOMICS */
 	lock->waiters = 0;
 #endif /* INNODB_RW_LOCKS_USE_ATOMICS */
@@ -128,7 +128,7 @@ rw_lock_get_writer(
 		/* return NOT_LOCKED in s-lock state, like the writer
 		member of the old lock implementation. */
 		return(RW_LOCK_NOT_LOCKED);
-	} else if (((-lock_word) % X_LOCK_DECR) == 0) {
+	} else if ((lock_word == 0) || (lock_word <= -X_LOCK_DECR)) {
 		return(RW_LOCK_EX);
 	} else {
 		ut_ad(lock_word > -X_LOCK_DECR);
@@ -158,7 +158,7 @@ rw_lock_get_reader_count(
 
 #ifndef INNODB_RW_LOCKS_USE_ATOMICS
 UNIV_INLINE
-mutex_t*
+ib_mutex_t*
 rw_lock_get_mutex(
 /*==============*/
 	rw_lock_t*	lock)
@@ -178,11 +178,10 @@ rw_lock_get_x_lock_count(
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 	lint lock_copy = lock->lock_word;
-	/* If there is a reader, lock_word is not divisible by X_LOCK_DECR */
-	if (lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) {
+	if ((lock_copy != 0) && (lock_copy > -X_LOCK_DECR)) {
 		return(0);
 	}
-	return(((-lock_copy) / X_LOCK_DECR) + 1);
+	return((lock_copy == 0) ? 1 : (2 - (lock_copy + X_LOCK_DECR)));
 }
 
 /******************************************************************//**
@@ -325,58 +324,6 @@ rw_lock_s_lock_low(
 }
 
 /******************************************************************//**
-Low-level function which locks an rw-lock in s-mode when we know that it
-is possible and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_s_lock_direct(
-/*==================*/
-	rw_lock_t*	lock,		/*!< in/out: rw-lock */
-	const char*	file_name,	/*!< in: file name where requested */
-	ulint		line)		/*!< in: line where lock requested */
-{
-	ut_ad(lock->lock_word == X_LOCK_DECR);
-
-	/* Indicate there is a new reader by decrementing lock_word */
-	lock->lock_word--;
-
-	lock->last_s_file_name = file_name;
-	lock->last_s_line = line;
-
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, line);
-#endif
-}
-
-/******************************************************************//**
-Low-level function which locks an rw-lock in x-mode when we know that it
-is not locked and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_x_lock_direct(
-/*==================*/
-	rw_lock_t*	lock,		/*!< in/out: rw-lock */
-	const char*	file_name,	/*!< in: file name where requested */
-	ulint		line)		/*!< in: line where lock requested */
-{
-	ut_ad(rw_lock_validate(lock));
-	ut_ad(lock->lock_word == X_LOCK_DECR);
-
-	lock->lock_word -= X_LOCK_DECR;
-	lock->writer_thread = os_thread_get_curr_id();
-	lock->recursive = TRUE;
-
-	lock->last_x_file_name = file_name;
-	lock->last_x_line = line;
-
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
-#endif
-}
-
-/******************************************************************//**
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in shared mode for the current thread. If the rw-lock is locked
 in exclusive mode, or there is an exclusive lock request waiting, the
@@ -458,10 +405,11 @@ rw_lock_x_lock_func_nowait(
 		/* Relock: this lock_word modification is safe since no other
 		threads can modify (lock, unlock, or reserve) lock_word while
 		there is an exclusive writer and this is the writer thread. */
-		lock->lock_word -= X_LOCK_DECR;
-
-		/* Recursive x-locks must be multiples of X_LOCK_DECR. */
-		ut_ad(((-lock->lock_word) % X_LOCK_DECR) == 0);
+		if (lock->lock_word == 0) {
+			lock->lock_word = -X_LOCK_DECR;
+		} else {
+			lock->lock_word--;
+		}
 
 		/* Watch for too many recursive locks */
 		ut_ad(lock->lock_word < 0);
@@ -494,7 +442,9 @@ rw_lock_s_unlock_func(
 #endif
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
-	ut_ad((lock->lock_word % X_LOCK_DECR) != 0);
+	ut_ad(lock->lock_word > -X_LOCK_DECR);
+	ut_ad(lock->lock_word != 0);
+	ut_ad(lock->lock_word < X_LOCK_DECR);
 
 #ifdef UNIV_SYNC_DEBUG
 	rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
@@ -530,7 +480,7 @@ rw_lock_x_unlock_func(
 #endif
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
-	ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
+	ut_ad(lock->lock_word == 0 || lock->lock_word <= -X_LOCK_DECR);
 
 	/* lock->recursive flag also indicates if lock->writer_thread is
 	valid or stale. If we are the last of the recursive callers
@@ -541,15 +491,23 @@ rw_lock_x_unlock_func(
 	if (lock->lock_word == 0) {
 		/* Last caller in a possible recursive chain. */
 		lock->recursive = FALSE;
-		UNIV_MEM_INVALID(&lock->writer_thread,
-				 sizeof lock->writer_thread);
 	}
 
 #ifdef UNIV_SYNC_DEBUG
 	rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
 #endif
 
-	if (rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) {
+	ulint x_lock_incr;
+	if (lock->lock_word == 0) {
+		x_lock_incr = X_LOCK_DECR;
+	} else if (lock->lock_word == -X_LOCK_DECR) {
+		x_lock_incr = X_LOCK_DECR;
+	} else {
+		ut_ad(lock->lock_word < -X_LOCK_DECR);
+		x_lock_incr = 1;
+	}
+
+	if (rw_lock_lock_word_incr(lock, x_lock_incr) == X_LOCK_DECR) {
 		/* Lock is now free. May have to signal read/write waiters.
 		We do not need to signal wait_ex waiters, since they cannot
 		exist when there is a writer. */
@@ -590,7 +548,7 @@ pfs_rw_lock_create_func(
 	ulint		cline)		/*!< in: file line where created */
 {
 	/* Initialize the rwlock for performance schema */
-	lock->pfs_psi = PSI_CALL(init_rwlock)(key, lock);
+	lock->pfs_psi = PSI_RWLOCK_CALL(init_rwlock)(key, lock);
 
 	/* The actual function to initialize an rwlock */
 	rw_lock_create_func(lock,
@@ -623,13 +581,13 @@ pfs_rw_lock_x_lock_func(
 		PSI_rwlock_locker_state	state;
 
 		/* Record the entry of rw x lock request in performance schema */
-		locker = PSI_CALL(start_rwlock_wrwait)(
+		locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
 			&state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK, file_name, line);
 
 		rw_lock_x_lock_func(lock, pass, file_name, line);
 
 		if (locker != NULL)
-			PSI_CALL(end_rwlock_wrwait)(locker, 0);
+			PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0);
 	}
 	else
 	{
@@ -659,13 +617,13 @@ pfs_rw_lock_x_lock_func_nowait(
 		PSI_rwlock_locker_state		state;
 
 		/* Record the entry of rw x lock request in performance schema */
-		locker = PSI_CALL(start_rwlock_wrwait)(
+		locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
 			&state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK, file_name, line);
 
 		ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
 
 		if (locker != NULL)
-			PSI_CALL(end_rwlock_wrwait)(locker, ret);
+			PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, ret);
 	}
 	else
 	{
@@ -686,7 +644,7 @@ pfs_rw_lock_free_func(
 {
 	if (lock->pfs_psi != NULL)
 	{
-		PSI_CALL(destroy_rwlock)(lock->pfs_psi);
+		PSI_RWLOCK_CALL(destroy_rwlock)(lock->pfs_psi);
 		lock->pfs_psi = NULL;
 	}
 
@@ -714,13 +672,13 @@ pfs_rw_lock_s_lock_func(
 		PSI_rwlock_locker_state	state;
 
 		/* Instrumented to inform we are aquiring a shared rwlock */
-		locker = PSI_CALL(start_rwlock_rdwait)(
+		locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
 			&state, lock->pfs_psi, PSI_RWLOCK_READLOCK, file_name, line);
 
 		rw_lock_s_lock_func(lock, pass, file_name, line);
 
 		if (locker != NULL)
-			PSI_CALL(end_rwlock_rdwait)(locker, 0);
+			PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
 	}
 	else
 	{
@@ -753,13 +711,13 @@ pfs_rw_lock_s_lock_low(
 		PSI_rwlock_locker_state	state;
 
 		/* Instrumented to inform we are aquiring a shared rwlock */
-		locker = PSI_CALL(start_rwlock_rdwait)(
+		locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
 			&state, lock->pfs_psi, PSI_RWLOCK_READLOCK, file_name, line);
 
 		ret = rw_lock_s_lock_low(lock, pass, file_name, line);
 
 		if (locker != NULL)
-			PSI_CALL(end_rwlock_rdwait)(locker, ret);
+			PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, ret);
 	}
 	else
 	{
@@ -786,7 +744,7 @@ pfs_rw_lock_x_unlock_func(
 {
 	/* Inform performance schema we are unlocking the lock */
 	if (lock->pfs_psi != NULL)
-		PSI_CALL(unlock_rwlock)(lock->pfs_psi);
+		PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
 
 	rw_lock_x_unlock_func(
 #ifdef UNIV_SYNC_DEBUG
@@ -812,7 +770,7 @@ pfs_rw_lock_s_unlock_func(
 {
 	/* Inform performance schema we are unlocking the lock */
 	if (lock->pfs_psi != NULL)
-		PSI_CALL(unlock_rwlock)(lock->pfs_psi);
+		PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
 
 	rw_lock_s_unlock_func(
 #ifdef UNIV_SYNC_DEBUG
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index 1adcf938903..9950a6fbf6b 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -1,7 +1,8 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
+Copyright (c) 2012, Facebook Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -95,6 +96,7 @@ extern mysql_pfs_key_t	mem_pool_mutex_key;
 extern mysql_pfs_key_t	mutex_list_mutex_key;
 extern mysql_pfs_key_t	purge_sys_bh_mutex_key;
 extern mysql_pfs_key_t	recv_sys_mutex_key;
+extern mysql_pfs_key_t	recv_writer_mutex_key;
 extern mysql_pfs_key_t	rseg_mutex_key;
 # ifdef UNIV_SYNC_DEBUG
 extern mysql_pfs_key_t	rw_lock_debug_mutex_key;
@@ -120,9 +122,13 @@ extern mysql_pfs_key_t	srv_sys_tasks_mutex_key;
 #ifndef HAVE_ATOMIC_BUILTINS
 extern mysql_pfs_key_t	srv_conc_mutex_key;
 #endif /* !HAVE_ATOMIC_BUILTINS */
+#ifndef HAVE_ATOMIC_BUILTINS_64
+extern mysql_pfs_key_t	monitor_mutex_key;
+#endif /* !HAVE_ATOMIC_BUILTINS_64 */
 extern mysql_pfs_key_t	event_os_mutex_key;
 extern mysql_pfs_key_t	ut_list_mutex_key;
 extern mysql_pfs_key_t	os_mutex_key;
+extern mysql_pfs_key_t  zip_pad_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
 
 /******************************************************************//**
@@ -223,7 +229,7 @@ UNIV_INTERN
 void
 mutex_create_func(
 /*==============*/
-	mutex_t*	mutex,		/*!< in: pointer to memory */
+	ib_mutex_t*	mutex,		/*!< in: pointer to memory */
 #ifdef UNIV_DEBUG
 	const char*	cmutex_name,	/*!< in: mutex name */
 # ifdef UNIV_SYNC_DEBUG
@@ -242,7 +248,7 @@ UNIV_INTERN
 void
 mutex_free_func(
 /*============*/
-	mutex_t*	mutex);	/*!< in: mutex */
+	ib_mutex_t*	mutex);	/*!< in: mutex */
 /**************************************************************//**
 NOTE! The following macro should be used in mutex locking, not the
 corresponding function. */
@@ -259,7 +265,7 @@ UNIV_INLINE
 void
 mutex_enter_func(
 /*=============*/
-	mutex_t*	mutex,		/*!< in: pointer to mutex */
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
 	const char*	file_name,	/*!< in: file name where locked */
 	ulint		line);		/*!< in: line where locked */
 /********************************************************************//**
@@ -271,7 +277,7 @@ UNIV_INTERN
 ulint
 mutex_enter_nowait_func(
 /*====================*/
-	mutex_t*	mutex,		/*!< in: pointer to mutex */
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
 	const char*	file_name,	/*!< in: file name where mutex
 					requested */
 	ulint		line);		/*!< in: line where requested */
@@ -282,7 +288,7 @@ UNIV_INLINE
 void
 mutex_exit_func(
 /*============*/
-	mutex_t*	mutex);	/*!< in: pointer to mutex */
+	ib_mutex_t*	mutex);	/*!< in: pointer to mutex */
 
 
 #ifdef UNIV_PFS_MUTEX
@@ -297,7 +303,7 @@ void
 pfs_mutex_create_func(
 /*==================*/
 	PSI_mutex_key	key,		/*!< in: Performance Schema key */
-	mutex_t*	mutex,		/*!< in: pointer to memory */
+	ib_mutex_t*	mutex,		/*!< in: pointer to memory */
 # ifdef UNIV_DEBUG
 	const char*	cmutex_name,	/*!< in: mutex name */
 #  ifdef UNIV_SYNC_DEBUG
@@ -315,7 +321,7 @@ UNIV_INLINE
 void
 pfs_mutex_enter_func(
 /*=================*/
-	mutex_t*	mutex,		/*!< in: pointer to mutex */
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
 	const char*	file_name,	/*!< in: file name where locked */
 	ulint		line);		/*!< in: line where locked */
 /********************************************************************//**
@@ -328,7 +334,7 @@ UNIV_INLINE
 ulint
 pfs_mutex_enter_nowait_func(
 /*========================*/
-	mutex_t*	mutex,		/*!< in: pointer to mutex */
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
 	const char*	file_name,	/*!< in: file name where mutex
 					requested */
 	ulint		line);		/*!< in: line where requested */
@@ -341,7 +347,7 @@ UNIV_INLINE
 void
 pfs_mutex_exit_func(
 /*================*/
-	mutex_t*	mutex);	/*!< in: pointer to mutex */
+	ib_mutex_t*	mutex);	/*!< in: pointer to mutex */
 
 /******************************************************************//**
 NOTE! Please use the corresponding macro mutex_free(), not directly
@@ -352,7 +358,7 @@ UNIV_INLINE
 void
 pfs_mutex_free_func(
 /*================*/
-	mutex_t*	mutex);	/*!< in: mutex */
+	ib_mutex_t*	mutex);	/*!< in: mutex */
 
 #endif /* UNIV_PFS_MUTEX */
 
@@ -390,7 +396,7 @@ UNIV_INTERN
 ibool
 mutex_validate(
 /*===========*/
-	const mutex_t*	mutex);	/*!< in: mutex */
+	const ib_mutex_t*	mutex);	/*!< in: mutex */
 /******************************************************************//**
 Checks that the current thread owns the mutex. Works only
 in the debug version.
@@ -399,7 +405,7 @@ UNIV_INTERN
 ibool
 mutex_own(
 /*======*/
-	const mutex_t*	mutex)	/*!< in: mutex */
+	const ib_mutex_t*	mutex)	/*!< in: mutex */
 	__attribute__((warn_unused_result));
 #endif /* UNIV_DEBUG */
 #ifdef UNIV_SYNC_DEBUG
@@ -470,7 +476,7 @@ UNIV_INTERN
 void
 mutex_get_debug_info(
 /*=================*/
-	mutex_t*	mutex,		/*!< in: mutex */
+	ib_mutex_t*	mutex,		/*!< in: mutex */
 	const char**	file_name,	/*!< out: file where requested */
 	ulint*		line,		/*!< out: line where requested */
 	os_thread_id_t* thread_id);	/*!< out: id of the thread which owns
@@ -490,7 +496,7 @@ UNIV_INLINE
 lock_word_t
 mutex_get_lock_word(
 /*================*/
-	const mutex_t*	mutex);	/*!< in: mutex */
+	const ib_mutex_t*	mutex);	/*!< in: mutex */
 #ifdef UNIV_SYNC_DEBUG
 /******************************************************************//**
 NOT to be used outside this module except in debugging! Gets the waiters
@@ -500,7 +506,7 @@ UNIV_INLINE
 ulint
 mutex_get_waiters(
 /*==============*/
-	const mutex_t*	mutex);	/*!< in: mutex */
+	const ib_mutex_t*	mutex);	/*!< in: mutex */
 #endif /* UNIV_SYNC_DEBUG */
 
 /*
@@ -662,6 +668,7 @@ or row lock! */
 #define SYNC_FTS_CACHE		1005	/* FTS cache rwlock */
 #define SYNC_DICT		1000
 #define SYNC_DICT_AUTOINC_MUTEX	999
+#define SYNC_STATS_AUTO_RECALC	997
 #define SYNC_DICT_HEADER	995
 #define SYNC_IBUF_HEADER	914
 #define SYNC_IBUF_PESS_INSERT_MUTEX 912
@@ -679,14 +686,16 @@ or row lock! */
 #define SYNC_EXTERN_STORAGE	500
 #define	SYNC_FSP		400
 #define	SYNC_FSP_PAGE		395
-/*------------------------------------- Insert buffer headers */
+/*------------------------------------- Change buffer headers */
 #define SYNC_IBUF_MUTEX		370	/* ibuf_mutex */
-/*------------------------------------- Insert buffer tree */
+/*------------------------------------- Change buffer tree */
 #define SYNC_IBUF_INDEX_TREE	360
 #define SYNC_IBUF_TREE_NODE_NEW	359
 #define SYNC_IBUF_TREE_NODE	358
 #define	SYNC_IBUF_BITMAP_MUTEX	351
 #define	SYNC_IBUF_BITMAP	350
+/*------------------------------------- Change log for online create index */
+#define SYNC_INDEX_ONLINE_LOG	340
 /*------------------------------------- MySQL query cache mutex */
 /*------------------------------------- MySQL binlog mutex */
 /*-------------------------------*/
@@ -733,7 +742,7 @@ Do not use its fields directly! The structure used in the spin lock
 implementation of a mutual exclusion semaphore. */
 
 /** InnoDB mutex */
-struct mutex_struct {
+struct ib_mutex_t {
 	os_event_t	event;	/*!< Used by sync0arr.cc for the wait queue */
 	volatile lock_word_t	lock_word;	/*!< lock_word is the target
 				of the atomic test-and-set instruction when
@@ -748,7 +757,7 @@ struct mutex_struct {
 				may be) threads waiting in the global wait
 				array for this mutex to be released.
 				Otherwise, this is 0. */
-	UT_LIST_NODE_T(mutex_t)	list; /*!< All allocated mutexes are put into
+	UT_LIST_NODE_T(ib_mutex_t)	list; /*!< All allocated mutexes are put into
 				a list.	Pointers to the next and prev. */
 #ifdef UNIV_SYNC_DEBUG
 	const char*	file_name;	/*!< File where the mutex was locked */
@@ -757,23 +766,17 @@ struct mutex_struct {
 #endif /* UNIV_SYNC_DEBUG */
 	const char*	cfile_name;/*!< File name where mutex created */
 	ulint		cline;	/*!< Line where created */
+	ulong		count_os_wait;	/*!< count of os_wait */
 #ifdef UNIV_DEBUG
+
+/** Value of mutex_t::magic_n */
+# define MUTEX_MAGIC_N	979585UL
+
 	os_thread_id_t thread_id; /*!< The thread id of the thread
 				which locked the mutex. */
 	ulint		magic_n;	/*!< MUTEX_MAGIC_N */
-/** Value of mutex_struct::magic_n */
-# define MUTEX_MAGIC_N	(ulint)979585
-#endif /* UNIV_DEBUG */
-	ulong		count_os_wait;	/*!< count of os_wait */
-#ifdef UNIV_DEBUG
-	ulong		count_using;	/*!< count of times mutex used */
-	ulong		count_spin_loop; /*!< count of spin loops */
-	ulong		count_spin_rounds;/*!< count of spin rounds */
-	ulong		count_os_yield;	/*!< count of os_wait */
-	ulonglong	lspent_time;	/*!< mutex os_wait timer msec */
-	ulonglong	lmax_spent_time;/*!< mutex os_wait timer msec */
 	const char*	cmutex_name;	/*!< mutex name */
-	ulint		mutex_type;	/*!< 0=usual mutex, 1=rw_lock mutex */
+	ulint		ib_mutex_type;	/*!< 0=usual mutex, 1=rw_lock mutex */
 #endif /* UNIV_DEBUG */
 #ifdef UNIV_PFS_MUTEX
 	struct PSI_mutex* pfs_psi;	/*!< The performance schema
@@ -799,12 +802,12 @@ extern ibool	sync_order_checks_on;
 extern ibool	sync_initialized;
 
 /** Global list of database mutexes (not OS mutexes) created. */
-typedef UT_LIST_BASE_NODE_T(mutex_t)  ut_list_base_node_t;
+typedef UT_LIST_BASE_NODE_T(ib_mutex_t)  ut_list_base_node_t;
 /** Global list of database mutexes (not OS mutexes) created. */
 extern ut_list_base_node_t  mutex_list;
 
 /** Mutex protecting the mutex_list variable */
-extern mutex_t mutex_list_mutex;
+extern ib_mutex_t mutex_list_mutex;
 
 #ifndef HAVE_ATOMIC_BUILTINS
 /**********************************************************//**
@@ -813,7 +816,7 @@ UNIV_INLINE
 void
 os_atomic_dec_ulint_func(
 /*=====================*/
-	mutex_t*		mutex,		/*!< in: mutex guarding the
+	ib_mutex_t*		mutex,		/*!< in: mutex guarding the
 						decrement */
 	volatile ulint*		var,		/*!< in/out: variable to
 						decrement */
@@ -824,7 +827,7 @@ UNIV_INLINE
 void
 os_atomic_inc_ulint_func(
 /*=====================*/
-	mutex_t*		mutex,		/*!< in: mutex guarding the
+	ib_mutex_t*		mutex,		/*!< in: mutex guarding the
 						increment */
 	volatile ulint*		var,		/*!< in/out: variable to
 						increment */
diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic
index 746e73ebee7..ad77ad6d5a4 100644
--- a/storage/innobase/include/sync0sync.ic
+++ b/storage/innobase/include/sync0sync.ic
@@ -36,7 +36,7 @@ UNIV_INTERN
 void
 mutex_set_waiters(
 /*==============*/
-	mutex_t*	mutex,	/*!< in: mutex */
+	ib_mutex_t*	mutex,	/*!< in: mutex */
 	ulint		n);	/*!< in: value to set */
 /******************************************************************//**
 Reserves a mutex for the current thread. If the mutex is reserved, the
@@ -46,7 +46,7 @@ UNIV_INTERN
 void
 mutex_spin_wait(
 /*============*/
-	mutex_t*	mutex,		/*!< in: pointer to mutex */
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
 	const char*	file_name,	/*!< in: file name where mutex
 					requested */
 	ulint		line);		/*!< in: line where requested */
@@ -57,7 +57,7 @@ UNIV_INTERN
 void
 mutex_set_debug_info(
 /*=================*/
-	mutex_t*	mutex,		/*!< in: mutex */
+	ib_mutex_t*	mutex,		/*!< in: mutex */
 	const char*	file_name,	/*!< in: file where requested */
 	ulint		line);		/*!< in: line where requested */
 #endif /* UNIV_SYNC_DEBUG */
@@ -67,7 +67,7 @@ UNIV_INTERN
 void
 mutex_signal_object(
 /*================*/
-	mutex_t*	mutex);	/*!< in: mutex */
+	ib_mutex_t*	mutex);	/*!< in: mutex */
 
 /******************************************************************//**
 Performs an atomic test-and-set instruction to the lock_word field of a
@@ -75,9 +75,9 @@ mutex.
 @return	the previous value of lock_word: 0 or 1 */
 UNIV_INLINE
 byte
-mutex_test_and_set(
+ib_mutex_test_and_set(
 /*===============*/
-	mutex_t*	mutex)	/*!< in: mutex */
+	ib_mutex_t*	mutex)	/*!< in: mutex */
 {
 #if defined(HAVE_ATOMIC_BUILTINS)
 	return(os_atomic_test_and_set_byte(&mutex->lock_word, 1));
@@ -105,7 +105,7 @@ UNIV_INLINE
 void
 mutex_reset_lock_word(
 /*==================*/
-	mutex_t*	mutex)	/*!< in: mutex */
+	ib_mutex_t*	mutex)	/*!< in: mutex */
 {
 #if defined(HAVE_ATOMIC_BUILTINS)
 	/* In theory __sync_lock_release should be used to release the lock.
@@ -125,7 +125,7 @@ UNIV_INLINE
 lock_word_t
 mutex_get_lock_word(
 /*================*/
-	const mutex_t*	mutex)	/*!< in: mutex */
+	const ib_mutex_t*	mutex)	/*!< in: mutex */
 {
 	ut_ad(mutex);
 
@@ -139,7 +139,7 @@ UNIV_INLINE
 ulint
 mutex_get_waiters(
 /*==============*/
-	const mutex_t*	mutex)	/*!< in: mutex */
+	const ib_mutex_t*	mutex)	/*!< in: mutex */
 {
 	const volatile ulint*	ptr;	/*!< declared volatile to ensure that
 					the value is read from memory */
@@ -158,7 +158,7 @@ UNIV_INLINE
 void
 mutex_exit_func(
 /*============*/
-	mutex_t*	mutex)	/*!< in: pointer to mutex */
+	ib_mutex_t*	mutex)	/*!< in: pointer to mutex */
 {
 	ut_ad(mutex_own(mutex));
 
@@ -199,7 +199,7 @@ UNIV_INLINE
 void
 mutex_enter_func(
 /*=============*/
-	mutex_t*	mutex,		/*!< in: pointer to mutex */
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
 	const char*	file_name,	/*!< in: file name where locked */
 	ulint		line)		/*!< in: line where locked */
 {
@@ -209,9 +209,7 @@ mutex_enter_func(
 	/* Note that we do not peek at the value of lock_word before trying
 	the atomic test_and_set; we could peek, and possibly save time. */
 
-	ut_d(mutex->count_using++);
-
-	if (!mutex_test_and_set(mutex)) {
+	if (!ib_mutex_test_and_set(mutex)) {
 		ut_d(mutex->thread_id = os_thread_get_curr_id());
 #ifdef UNIV_SYNC_DEBUG
 		mutex_set_debug_info(mutex, file_name, line);
@@ -232,28 +230,28 @@ UNIV_INLINE
 void
 pfs_mutex_enter_func(
 /*=================*/
-	mutex_t*	mutex,	/*!< in: pointer to mutex */
+	ib_mutex_t*	mutex,	/*!< in: pointer to mutex */
 	const char*	file_name,	/*!< in: file name where locked */
 	ulint		line)		/*!< in: line where locked */
 {
-	if (mutex->pfs_psi != NULL)
-	{
+	if (mutex->pfs_psi != NULL) {
 		PSI_mutex_locker*	locker;
 		PSI_mutex_locker_state	state;
 
-		locker = PSI_CALL(start_mutex_wait)(&state, mutex->pfs_psi,
+		locker = PSI_MUTEX_CALL(start_mutex_wait)(
+			&state, mutex->pfs_psi,
 			PSI_MUTEX_LOCK, file_name, line);
 
 		mutex_enter_func(mutex, file_name, line);
 
-		if (locker != NULL)
-			PSI_CALL(end_mutex_wait)(locker, 0);
-	}
-	else
-	{
+		if (locker != NULL) {
+			PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
+		}
+	} else {
 		mutex_enter_func(mutex, file_name, line);
 	}
 }
+
 /********************************************************************//**
 NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
 this function!
@@ -264,33 +262,33 @@ UNIV_INLINE
 ulint
 pfs_mutex_enter_nowait_func(
 /*========================*/
-	mutex_t*	mutex,		/*!< in: pointer to mutex */
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
 	const char*	file_name,	/*!< in: file name where mutex
 					requested */
 	ulint		line)		/*!< in: line where requested */
 {
-	ulint	ret;
+	ulint		ret;
 
-	if (mutex->pfs_psi != NULL)
-	{
+	if (mutex->pfs_psi != NULL) {
 		PSI_mutex_locker*	locker;
 		PSI_mutex_locker_state		state;
 
-		locker = PSI_CALL(start_mutex_wait)(&state, mutex->pfs_psi,
+		locker = PSI_MUTEX_CALL(start_mutex_wait)(
+			&state, mutex->pfs_psi,
 			PSI_MUTEX_TRYLOCK, file_name, line);
 
 		ret = mutex_enter_nowait_func(mutex, file_name, line);
 
-		if (locker != NULL)
-			PSI_CALL(end_mutex_wait)(locker, (int) ret);
-	}
-	else
-	{
+		if (locker != NULL) {
+			PSI_MUTEX_CALL(end_mutex_wait)(locker, (int) ret);
+		}
+	} else {
 		ret = mutex_enter_nowait_func(mutex, file_name, line);
 	}
 
 	return(ret);
 }
+
 /******************************************************************//**
 NOTE! Please use the corresponding macro mutex_exit(), not directly
 this function!
@@ -300,10 +298,11 @@ UNIV_INLINE
 void
 pfs_mutex_exit_func(
 /*================*/
-	mutex_t*	mutex)	/*!< in: pointer to mutex */
+	ib_mutex_t*	mutex)	/*!< in: pointer to mutex */
 {
-	if (mutex->pfs_psi != NULL)
-		PSI_CALL(unlock_mutex)(mutex->pfs_psi);
+	if (mutex->pfs_psi != NULL) {
+		PSI_MUTEX_CALL(unlock_mutex)(mutex->pfs_psi);
+	}
 
 	mutex_exit_func(mutex);
 }
@@ -319,7 +318,7 @@ void
 pfs_mutex_create_func(
 /*==================*/
 	mysql_pfs_key_t	key,		/*!< in: Performance Schema key */
-	mutex_t*	mutex,		/*!< in: pointer to memory */
+	ib_mutex_t*	mutex,		/*!< in: pointer to memory */
 # ifdef UNIV_DEBUG
 	const char*	cmutex_name,	/*!< in: mutex name */
 #  ifdef UNIV_SYNC_DEBUG
@@ -329,7 +328,7 @@ pfs_mutex_create_func(
 	const char*	cfile_name,	/*!< in: file name where created */
 	ulint		cline)		/*!< in: file line where created */
 {
-	mutex->pfs_psi = PSI_CALL(init_mutex)(key, mutex);
+	mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, mutex);
 
 	mutex_create_func(mutex,
 # ifdef UNIV_DEBUG
@@ -341,6 +340,7 @@ pfs_mutex_create_func(
 			  cfile_name,
 			  cline);
 }
+
 /******************************************************************//**
 NOTE! Please use the corresponding macro mutex_free(), not directly
 this function!
@@ -350,11 +350,10 @@ UNIV_INLINE
 void
 pfs_mutex_free_func(
 /*================*/
-	mutex_t*	mutex)	/*!< in: mutex */
+	ib_mutex_t*	mutex)	/*!< in: mutex */
 {
-	if (mutex->pfs_psi != NULL)
-	{
-		PSI_CALL(destroy_mutex)(mutex->pfs_psi);
+	if (mutex->pfs_psi != NULL) {
+		PSI_MUTEX_CALL(destroy_mutex)(mutex->pfs_psi);
 		mutex->pfs_psi = NULL;
 	}
 
@@ -370,7 +369,7 @@ UNIV_INLINE
 void
 os_atomic_dec_ulint_func(
 /*=====================*/
-	mutex_t*	mutex,		/*!< in: mutex guarding the dec */
+	ib_mutex_t*	mutex,		/*!< in: mutex guarding the dec */
 	volatile ulint*	var,		/*!< in/out: variable to decrement */
 	ulint		delta)		/*!< in: delta to decrement */
 {
@@ -391,7 +390,7 @@ UNIV_INLINE
 void
 os_atomic_inc_ulint_func(
 /*=====================*/
-	mutex_t*	mutex,		/*!< in: mutex guarding the increment */
+	ib_mutex_t*	mutex,		/*!< in: mutex guarding the increment */
 	volatile ulint*	var,		/*!< in/out: variable to increment */
 	ulint		delta)		/*!< in: delta to increment */
 {
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
index 679cf6a9074..0d143004a7a 100644
--- a/storage/innobase/include/sync0types.h
+++ b/storage/innobase/include/sync0types.h
@@ -26,9 +26,6 @@ Created 9/5/1995 Heikki Tuuri
 #ifndef sync0types_h
 #define sync0types_h
 
-/** Rename mutex_t to avoid name space collision on some systems */
-#define mutex_t ib_mutex_t
-/** InnoDB mutex */
-typedef struct mutex_struct		mutex_t;
+struct ib_mutex_t;
 
 #endif
diff --git a/storage/innobase/include/trx0i_s.h b/storage/innobase/include/trx0i_s.h
index c286fc4d9ae..662971a7841 100644
--- a/storage/innobase/include/trx0i_s.h
+++ b/storage/innobase/include/trx0i_s.h
@@ -79,25 +79,21 @@ do {								\
 } while (0)
 
 /** A row of INFORMATION_SCHEMA.innodb_locks */
-typedef struct i_s_locks_row_struct	i_s_locks_row_t;
-/** A row of INFORMATION_SCHEMA.innodb_trx */
-typedef struct i_s_trx_row_struct i_s_trx_row_t;
-/** A row of INFORMATION_SCHEMA.innodb_lock_waits */
-typedef struct i_s_lock_waits_row_struct i_s_lock_waits_row_t;
+struct i_s_locks_row_t;
 
 /** Objects of trx_i_s_cache_t::locks_hash */
-typedef struct i_s_hash_chain_struct	i_s_hash_chain_t;
+struct i_s_hash_chain_t;
 
 /** Objects of this type are added to the hash table
 trx_i_s_cache_t::locks_hash */
-struct i_s_hash_chain_struct {
+struct i_s_hash_chain_t {
 	i_s_locks_row_t*	value;	/*!< row of
 					INFORMATION_SCHEMA.innodb_locks*/
 	i_s_hash_chain_t*	next;	/*!< next item in the hash chain */
 };
 
 /** This structure represents INFORMATION_SCHEMA.innodb_locks row */
-struct i_s_locks_row_struct {
+struct i_s_locks_row_t {
 	trx_id_t	lock_trx_id;	/*!< transaction identifier */
 	const char*	lock_mode;	/*!< lock mode from
 					lock_get_mode_str() */
@@ -128,16 +124,16 @@ struct i_s_locks_row_struct {
 };
 
 /** This structure represents INFORMATION_SCHEMA.innodb_trx row */
-struct i_s_trx_row_struct {
+struct i_s_trx_row_t {
 	trx_id_t		trx_id;		/*!< transaction identifier */
 	const char*		trx_state;	/*!< transaction state from
 						trx_get_que_state_str() */
-	ib_time_t		trx_started;	/*!< trx_struct::start_time */
+	ib_time_t		trx_started;	/*!< trx_t::start_time */
 	const i_s_locks_row_t*	requested_lock_row;
 					/*!< pointer to a row
 					in innodb_locks if trx
 					is waiting, or NULL */
-	ib_time_t	trx_wait_started; /*!< trx_struct::wait_started */
+	ib_time_t	trx_wait_started; /*!< trx_t::wait_started */
 	ullint		trx_weight;	/*!< TRX_WEIGHT() */
 	ulint		trx_mysql_thread_id; /*!< thd_get_thread_id() */
 	const char*	trx_query;	/*!< MySQL statement being
@@ -145,36 +141,34 @@ struct i_s_trx_row_struct {
 	struct charset_info_st*	trx_query_cs;
 					/*!< charset encode the MySQL
 					statement */
-	const char*	trx_operation_state; /*!< trx_struct::op_info */
+	const char*	trx_operation_state; /*!< trx_t::op_info */
 	ulint		trx_tables_in_use;/*!< n_mysql_tables_in_use in
-					 trx_struct */
+					 trx_t */
 	ulint		trx_tables_locked;
 					/*!< mysql_n_tables_locked in
-					trx_struct */
+					trx_t */
 	ulint		trx_lock_structs;/*!< list len of trx_locks in
-					trx_struct */
+					trx_t */
 	ulint		trx_lock_memory_bytes;
 					/*!< mem_heap_get_size(
 					trx->lock_heap) */
 	ulint		trx_rows_locked;/*!< lock_number_of_rows_locked() */
-	ullint		trx_rows_modified;/*!< trx_struct::undo_no */
+	ullint		trx_rows_modified;/*!< trx_t::undo_no */
 	ulint		trx_concurrency_tickets;
 					/*!< n_tickets_to_enter_innodb in
-					trx_struct */
+					trx_t */
 	const char*	trx_isolation_level;
-					/*!< isolation_level in trx_struct*/
+					/*!< isolation_level in trx_t */
 	ibool		trx_unique_checks;
-					/*!< check_unique_secondary in
-					trx_struct*/
+					/*!< check_unique_secondary in trx_t*/
 	ibool		trx_foreign_key_checks;
-					/*!< check_foreigns in trx_struct */
+					/*!< check_foreigns in trx_t */
 	const char*	trx_foreign_key_error;
-					/*!< detailed_error in trx_struct */
+					/*!< detailed_error in trx_t */
 	ibool		trx_has_search_latch;
-					/*!< has_search_latch in trx_struct */
+					/*!< has_search_latch in trx_t */
 	ulint		trx_search_latch_timeout;
-					/*!< search_latch_timeout in
-					trx_struct */
+					/*!< search_latch_timeout in trx_t */
 	ulint		trx_is_read_only;
 					/*!< trx_t::read_only */
 	ulint		trx_is_autocommit_non_locking;
@@ -183,13 +177,13 @@ struct i_s_trx_row_struct {
 };
 
 /** This structure represents INFORMATION_SCHEMA.innodb_lock_waits row */
-struct i_s_lock_waits_row_struct {
+struct i_s_lock_waits_row_t {
 	const i_s_locks_row_t*	requested_lock_row;	/*!< requested lock */
 	const i_s_locks_row_t*	blocking_lock_row;	/*!< blocking lock */
 };
 
 /** Cache of INFORMATION_SCHEMA table data */
-typedef struct trx_i_s_cache_struct	trx_i_s_cache_t;
+struct trx_i_s_cache_t;
 
 /** Auxiliary enum used by functions that need to select one of the
 INFORMATION_SCHEMA tables */
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
index 0199083467c..1e13c883800 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@@ -108,7 +108,8 @@ enum purge_state_t {
 	PURGE_STATE_INIT,		/*!< Purge instance created */
 	PURGE_STATE_RUN,		/*!< Purge should be running */
 	PURGE_STATE_STOP,		/*!< Purge should be stopped */
-	PURGE_STATE_EXIT		/*!< Purge has been shutdown */
+	PURGE_STATE_EXIT,		/*!< Purge has been shutdown */
+	PURGE_STATE_DISABLED		/*!< Purge was never started */
 };
 
 /*******************************************************************//**
@@ -121,16 +122,16 @@ trx_purge_state(void);
 
 /** This is the purge pointer/iterator. We need both the undo no and the
 transaction no up to which purge has parsed and applied the records. */
-typedef struct purge_iter_struct {
+struct purge_iter_t {
 	trx_id_t	trx_no;		/*!< Purge has advanced past all
 					transactions whose number is less
 					than this */
 	undo_no_t	undo_no;	/*!< Purge has advanced past all records
 					whose undo number is less than this */
-} purge_iter_t;
+};
 
 /** The control structure used in the purge operation */
-struct trx_purge_struct{
+struct trx_purge_t{
 	sess_t*		sess;		/*!< System session running the purge
 					query */
 	trx_t*		trx;		/*!< System transaction running the
@@ -146,7 +147,8 @@ struct trx_purge_struct{
 					protects state and running */
 	os_event_t	event;		/*!< State signal event */
 	ulint		n_stop;		/*!< Counter to track number stops */
-	bool		running;	/*!< true, if purge is active */
+	volatile bool	running;	/*!< true, if purge is active,
+					we check this without the latch too */
 	volatile purge_state_t	state;	/*!< Purge coordinator thread states,
 					we check this in several places
 					without holding the latch. */
@@ -171,6 +173,10 @@ struct trx_purge_struct{
 	purge_iter_t	limit;		/* The 'purge pointer' which advances
 					during a purge, and which is used in
 					history list truncation */
+#ifdef UNIV_DEBUG
+	purge_iter_t	done;		/* Indicate 'purge pointer' which have
+					purged already accurately. */
+#endif /* UNIV_DEBUG */
 	/*-----------------------------*/
 	ibool		next_stored;	/*!< TRUE if the info of the next record
 					to purge is stored below: if yes, then
@@ -196,17 +202,15 @@ struct trx_purge_struct{
 	ib_bh_t*	ib_bh;		/*!< Binary min-heap, ordered on
 					rseg_queue_t::trx_no. It is protected
 					by the bh_mutex */
-	mutex_t		bh_mutex;	/*!< Mutex protecting ib_bh */
+	ib_mutex_t		bh_mutex;	/*!< Mutex protecting ib_bh */
 };
 
 /** Info required to purge a record */
-struct trx_purge_rec_struct {
+struct trx_purge_rec_t {
 	trx_undo_rec_t*	undo_rec;	/*!< Record to purge */
 	roll_ptr_t	roll_ptr;	/*!< File pointr to UNDO record */
 };
 
-typedef struct trx_purge_rec_struct trx_purge_rec_t;
-
 #ifndef UNIV_NONINL
 #include "trx0purge.ic"
 #endif
diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h
index c9fae45dad4..cd1ecc096fd 100644
--- a/storage/innobase/include/trx0rec.h
+++ b/storage/innobase/include/trx0rec.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -105,10 +105,11 @@ trx_undo_rec_get_pars(
 					TRX_UNDO_INSERT_REC, ... */
 	ulint*		cmpl_info,	/*!< out: compiler info, relevant only
 					for update type records */
-	ibool*		updated_extern,	/*!< out: TRUE if we updated an
+	bool*		updated_extern,	/*!< out: true if we updated an
 					externally stored fild */
 	undo_no_t*	undo_no,	/*!< out: undo log record number */
-	table_id_t*	table_id);	/*!< out: table id */
+	table_id_t*	table_id)	/*!< out: table id */
+	__attribute__((nonnull, warn_unused_result));
 /*******************************************************************//**
 Builds a row reference from an undo log record.
 @return	pointer to remaining part of undo record */
@@ -178,8 +179,9 @@ trx_undo_update_rec_get_update(
 				needed is allocated */
 	upd_t**		upd);	/*!< out, own: update vector */
 /*******************************************************************//**
-Builds a partial row from an update undo log record. It contains the
-columns which occur as ordering in any index of the table.
+Builds a partial row from an update undo log record, for purge.
+It contains the columns which occur as ordering in any index of the table.
+Any missing columns are indicated by col->mtype == DATA_MISSING.
 @return	pointer to remaining part of undo record */
 UNIV_INTERN
 byte*
@@ -197,8 +199,9 @@ trx_undo_rec_get_partial_row(
 	ibool		ignore_prefix, /*!< in: flag to indicate if we
 				expect blob prefixes in undo. Used
 				only in the assertion. */
-	mem_heap_t*	heap);	/*!< in: memory heap from which the memory
+	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
 				needed is allocated */
+	__attribute__((nonnull, warn_unused_result));
 /***********************************************************************//**
 Writes information to an undo log about an insert, update, or a delete marking
 of a clustered index record. This information is used in a rollback of the
@@ -206,7 +209,7 @@ transaction and in consistent reads that must look to the history of this
 transaction.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 trx_undo_report_row_operation(
 /*==========================*/
 	ulint		flags,		/*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
@@ -225,10 +228,12 @@ trx_undo_report_row_operation(
 	const rec_t*	rec,		/*!< in: case of an update or delete
 					marking, the record in the clustered
 					index, otherwise NULL */
-	roll_ptr_t*	roll_ptr);	/*!< out: rollback pointer to the
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec) */
+	roll_ptr_t*	roll_ptr)	/*!< out: rollback pointer to the
 					inserted undo log record,
 					0 if BTR_NO_UNDO_LOG
 					flag was specified */
+	__attribute__((nonnull(3,4,10), warn_unused_result));
 /******************************************************************//**
 Copies an undo record to heap. This function can be called if we know that
 the undo log record exists.
@@ -238,16 +243,17 @@ trx_undo_rec_t*
 trx_undo_get_undo_rec_low(
 /*======================*/
 	roll_ptr_t	roll_ptr,	/*!< in: roll pointer to record */
-	mem_heap_t*	heap);		/*!< in: memory heap where copied */
+	mem_heap_t*	heap)		/*!< in: memory heap where copied */
+	__attribute__((nonnull, warn_unused_result));
 /*******************************************************************//**
 Build a previous version of a clustered index record. The caller must
-hold a latch on the index page of the clustered index record, to
-guarantee that the stack of versions is locked all the way down to the
-purge_sys->view.
-@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
-earlier than purge_view, which means that it may have been removed */
+hold a latch on the index page of the clustered index record.
+@retval true if previous version was built, or if it was an insert
+or the table has been rebuilt
+@retval false if the previous version is earlier than purge_view,
+which means that it may have been removed */
 UNIV_INTERN
-ulint
+bool
 trx_undo_prev_version_build(
 /*========================*/
 	const rec_t*	index_rec,/*!< in: clustered index record in the
@@ -256,12 +262,13 @@ trx_undo_prev_version_build(
 				index_rec page and purge_view */
 	const rec_t*	rec,	/*!< in: version of a clustered index record */
 	dict_index_t*	index,	/*!< in: clustered index */
-	ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
 	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
 				needed is allocated */
-	rec_t**		old_vers);/*!< out, own: previous version, or NULL if
+	rec_t**		old_vers)/*!< out, own: previous version, or NULL if
 				rec is the first inserted version, or if
 				history data has been deleted */
+	__attribute__((nonnull));
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
 Parses a redo log record of adding an undo log record.
diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic
index 847c26f03a8..08704f6b821 100644
--- a/storage/innobase/include/trx0rec.ic
+++ b/storage/innobase/include/trx0rec.ic
@@ -90,7 +90,7 @@ trx_undo_rec_get_offset(
 /*====================*/
 	undo_no_t	undo_no)	/*!< in: undo no read from node */
 {
-	return (3 + mach_ull_get_much_compressed_size(undo_no));
+	return(3 + mach_ull_get_much_compressed_size(undo_no));
 }
 
 /***********************************************************************//**
diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h
index 3b724e03830..9d020a10725 100644
--- a/storage/innobase/include/trx0roll.h
+++ b/storage/innobase/include/trx0roll.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -146,29 +146,32 @@ trx_rollback_step(
 Rollback a transaction used in MySQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 trx_rollback_for_mysql(
 /*===================*/
-	trx_t*	trx);	/*!< in/out: transaction */
+	trx_t*	trx)	/*!< in/out: transaction */
+	__attribute__((nonnull));
 /*******************************************************************//**
 Rollback the latest SQL statement for MySQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 trx_rollback_last_sql_stat_for_mysql(
 /*=================================*/
-	trx_t*	trx);	/*!< in/out: transaction */
+	trx_t*	trx)	/*!< in/out: transaction */
+	__attribute__((nonnull));
 /*******************************************************************//**
 Rollback a transaction to a given savepoint or do a complete rollback.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 trx_rollback_to_savepoint(
 /*======================*/
 	trx_t*		trx,	/*!< in: transaction handle */
-	trx_savept_t*	savept);/*!< in: pointer to savepoint undo number, if
+	trx_savept_t*	savept)	/*!< in: pointer to savepoint undo number, if
 				partial rollback requested, or NULL for
 				complete rollback */
+	__attribute__((nonnull(1)));
 /*******************************************************************//**
 Rolls back a transaction back to a named savepoint. Modifications after the
 savepoint are undone but InnoDB does NOT release the corresponding locks
@@ -179,17 +182,18 @@ were set after this savepoint are deleted.
 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
 otherwise DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 trx_rollback_to_savepoint_for_mysql(
 /*================================*/
 	trx_t*		trx,			/*!< in: transaction handle */
 	const char*	savepoint_name,		/*!< in: savepoint name */
-	ib_int64_t*	mysql_binlog_cache_pos);/*!< out: the MySQL binlog cache
+	ib_int64_t*	mysql_binlog_cache_pos)	/*!< out: the MySQL binlog cache
 						position corresponding to this
 						savepoint; MySQL needs this
 						information to remove the
 						binlog entries of the queries
 						executed after the savepoint */
+	__attribute__((nonnull, warn_unused_result));
 /*******************************************************************//**
 Creates a named savepoint. If the transaction is not yet started, starts it.
 If there is already a savepoint of the same name, this call erases that old
@@ -197,28 +201,28 @@ savepoint and replaces it with a new. Savepoints are deleted in a transaction
 commit or rollback.
 @return	always DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 trx_savepoint_for_mysql(
 /*====================*/
 	trx_t*		trx,			/*!< in: transaction handle */
 	const char*	savepoint_name,		/*!< in: savepoint name */
-	ib_int64_t	binlog_cache_pos);	/*!< in: MySQL binlog cache
+	ib_int64_t	binlog_cache_pos)	/*!< in: MySQL binlog cache
 						position corresponding to this
 						connection at the time of the
 						savepoint */
-
+	__attribute__((nonnull));
 /*******************************************************************//**
 Releases a named savepoint. Savepoints which
 were set after this savepoint are deleted.
 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
 otherwise DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 trx_release_savepoint_for_mysql(
 /*============================*/
 	trx_t*		trx,			/*!< in: transaction handle */
-	const char*	savepoint_name);	/*!< in: savepoint name */
-
+	const char*	savepoint_name)		/*!< in: savepoint name */
+	__attribute__((nonnull, warn_unused_result));
 /*******************************************************************//**
 Frees savepoint structs starting from savep. */
 UNIV_INTERN
@@ -230,8 +234,8 @@ trx_roll_savepoints_free(
 					if this is NULL, free all savepoints
 					of trx */
 
-/** A cell of trx_undo_arr_struct; used during a rollback and a purge */
-struct	trx_undo_inf_struct{
+/** A cell of trx_undo_arr_t; used during a rollback and a purge */
+struct	trx_undo_inf_t{
 	ibool		in_use;	/*!< true if cell is being used */
 	trx_id_t	trx_no;	/*!< transaction number: not defined during
 				a rollback */
@@ -241,7 +245,7 @@ struct	trx_undo_inf_struct{
 /** During a rollback and a purge, undo numbers of undo records currently being
 processed are stored in this array */
 
-struct trx_undo_arr_struct{
+struct trx_undo_arr_t{
 	ulint		n_cells;	/*!< number of cells in the array */
 	ulint		n_used;		/*!< number of cells in use */
 	trx_undo_inf_t*	infos;		/*!< the array of undo infos */
@@ -258,7 +262,7 @@ enum roll_node_state {
 };
 
 /** Rollback command node in a query graph */
-struct roll_node_struct{
+struct roll_node_t{
 	que_common_t		common;	/*!< node type: QUE_NODE_ROLLBACK */
 	enum roll_node_state	state;	/*!< node execution state */
 	ibool			partial;/*!< TRUE if we want a partial
@@ -270,7 +274,7 @@ struct roll_node_struct{
 };
 
 /** A savepoint set with SQL's "SAVEPOINT savepoint_id" command */
-struct trx_named_savept_struct{
+struct trx_named_savept_t{
 	char*		name;		/*!< savepoint name */
 	trx_savept_t	savept;		/*!< the undo number corresponding to
 					the savepoint */
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index 66e5449cf57..185b05876b4 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -151,11 +151,11 @@ trx_rseg_get_n_undo_tablespaces(
 #define TRX_RSEG_MAX_N_TRXS	(TRX_RSEG_N_SLOTS / 2)
 
 /* The rollback segment memory object */
-struct trx_rseg_struct{
+struct trx_rseg_t{
 	/*--------------------------------------------------------*/
 	ulint		id;	/*!< rollback segment id == the index of
 				its slot in the trx system file copy */
-	mutex_t		mutex;	/*!< mutex protecting the fields in this
+	ib_mutex_t		mutex;	/*!< mutex protecting the fields in this
 				struct except id, which is constant */
 	ulint		space;	/*!< space where the rollback segment is
 				header is placed */
@@ -192,13 +192,11 @@ struct trx_rseg_struct{
 };
 
 /** For prioritising the rollback segments for purge. */
-struct rseg_queue_struct {
+struct rseg_queue_t {
         trx_id_t	trx_no;         /*!< trx_rseg_t::last_trx_no */
         trx_rseg_t*     rseg;           /*!< Rollback segment */
 };
 
-typedef struct rseg_queue_struct rseg_queue_t;
-
 /* Undo log segment slot in a rollback segment header */
 /*-------------------------------------------------------------*/
 #define	TRX_RSEG_SLOT_PAGE_NO	0	/* Page number of the header page of
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index b1aa3d2224c..70f214d1ac7 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -426,7 +426,7 @@ trx_sys_file_format_max_get(void);
 Check for the max file format tag stored on disk.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 trx_sys_file_format_max_check(
 /*==========================*/
 	ulint		max_format_id);	/*!< in: the max format id to check */
@@ -600,18 +600,28 @@ identifier is added to this 64-bit constant. */
 
 #ifndef UNIV_HOTBACKUP
 /** The transaction system central memory data structure. */
-struct trx_sys_struct{
+struct trx_sys_t{
 
-	mutex_t		mutex;		/*!< mutex protecting most fields in
+	ib_mutex_t		mutex;		/*!< mutex protecting most fields in
 					this structure except when noted
 					otherwise */
-	ulint		n_mysql_trx;	/*!< Number of transactions currently
-					allocated for MySQL */
 	ulint		n_prepared_trx;	/*!< Number of transactions currently
 					in the XA PREPARED state */
+	ulint		n_prepared_recovered_trx; /*!< Number of transactions
+					currently in XA PREPARED state that are
+					also recovered. Such transactions cannot
+					be added during runtime. They can only
+					occur after recovery if mysqld crashed
+					while there were XA PREPARED
+					transactions. We disable query cache
+					if such transactions exist. */
 	trx_id_t	max_trx_id;	/*!< The smallest number not yet
 					assigned as a transaction id or
 					transaction number */
+#ifdef UNIV_DEBUG
+	trx_id_t	rw_max_trx_id;	/*!< Max trx id of read-write transactions
+					which exist or existed */
+#endif
 	trx_list_t	rw_trx_list;	/*!< List of active and committed in
 					memory read-write transactions, sorted
 					on trx id, biggest first. Recovered
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 3e6cfc7d0da..bb84c1806f2 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -121,20 +121,69 @@ UNIV_INTERN
 void
 trx_lists_init_at_db_start(void);
 /*============================*/
+
+#ifdef UNIV_DEBUG
+#define trx_start_if_not_started_xa(t)				\
+	{							\
+	(t)->start_line = __LINE__;				\
+	(t)->start_file = __FILE__;				\
+	trx_start_if_not_started_xa_low((t));			\
+	}
+#else
+#define trx_start_if_not_started_xa(t)				\
+	trx_start_if_not_started_xa_low((t))
+#endif /* UNIV_DEBUG */
+
 /*************************************************************//**
 Starts the transaction if it is not yet started. */
 UNIV_INTERN
 void
-trx_start_if_not_started_xa(
-/*========================*/
+trx_start_if_not_started_xa_low(
+/*============================*/
 	trx_t*	trx);	/*!< in: transaction */
 /*************************************************************//**
 Starts the transaction if it is not yet started. */
 UNIV_INTERN
 void
-trx_start_if_not_started(
-/*=====================*/
+trx_start_if_not_started_low(
+/*=========================*/
 	trx_t*	trx);	/*!< in: transaction */
+
+#ifdef UNIV_DEBUG
+#define trx_start_if_not_started(t)				\
+	{							\
+	(t)->start_line = __LINE__;				\
+	(t)->start_file = __FILE__;				\
+	trx_start_if_not_started_low((t));			\
+	}
+#else
+#define trx_start_if_not_started(t)				\
+	trx_start_if_not_started_low((t))
+#endif /* UNIV_DEBUG */
+
+/*************************************************************//**
+Starts the transaction for a DDL operation. */
+UNIV_INTERN
+void
+trx_start_for_ddl_low(
+/*==================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	trx_dict_op_t	op)	/*!< in: dictionary operation type */
+	__attribute__((nonnull));
+
+#ifdef UNIV_DEBUG
+#define trx_start_for_ddl(t, o)					\
+	{							\
+	ut_ad((t)->start_file == 0);				\
+	(t)->start_line = __LINE__;				\
+	(t)->start_file = __FILE__;				\
+	trx_start_for_ddl_low((t), (o));			\
+	}
+#else
+#define trx_start_for_ddl(t, o)					\
+	trx_start_for_ddl_low((t), (o))
+#endif /* UNIV_DEBUG */
+
 /****************************************************************//**
 Commits a transaction. */
 UNIV_INTERN
@@ -155,7 +204,7 @@ trx_cleanup_at_db_startup(
 Does the transaction commit for MySQL.
 @return	DB_SUCCESS or error number */
 UNIV_INTERN
-ulint
+dberr_t
 trx_commit_for_mysql(
 /*=================*/
 	trx_t*	trx);	/*!< in/out: transaction */
@@ -189,13 +238,13 @@ trx_get_trx_by_xid(
 	const XID*	xid);	/*!< in: X/Open XA transaction identifier */
 /**********************************************************************//**
 If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE.
-@return	0 or error number */
+with trx->flush_log_later == TRUE. */
 UNIV_INTERN
-ulint
+void
 trx_commit_complete_for_mysql(
 /*==========================*/
-	trx_t*	trx);	/*!< in: trx handle */
+	trx_t*	trx)	/*!< in/out: transaction */
+	__attribute__((nonnull));
 /**********************************************************************//**
 Marks the latest SQL statement ended. */
 UNIV_INTERN
@@ -251,9 +300,9 @@ trx_print_low(
 	ulint		max_query_len,
 			/*!< in: max query length to print,
 			or 0 to use the default max length */
-	ulint		n_lock_rec,
+	ulint		n_rec_locks,
 			/*!< in: lock_number_of_rows_locked(&trx->lock) */
-	ulint		n_lock_struct,
+	ulint		n_trx_locks,
 			/*!< in: length of trx->lock.trx_locks */
 	ulint		heap_size)
 			/*!< in: mem_heap_get_size(trx->lock.lock_heap) */
@@ -286,26 +335,11 @@ trx_print(
 					or 0 to use the default max length */
 	__attribute__((nonnull));
 
-/** Type of data dictionary operation */
-typedef enum trx_dict_op {
-	/** The transaction is not modifying the data dictionary. */
-	TRX_DICT_OP_NONE = 0,
-	/** The transaction is creating a table or an index, or
-	dropping a table.  The table must be dropped in crash
-	recovery.  This and TRX_DICT_OP_NONE are the only possible
-	operation modes in crash recovery. */
-	TRX_DICT_OP_TABLE = 1,
-	/** The transaction is creating or dropping an index in an
-	existing table.  In crash recovery, the data dictionary
-	must be locked, but the table must not be dropped. */
-	TRX_DICT_OP_INDEX = 2
-} trx_dict_op_t;
-
 /**********************************************************************//**
 Determine if a transaction is a dictionary operation.
 @return	dictionary operation mode */
 UNIV_INLINE
-enum trx_dict_op
+enum trx_dict_op_t
 trx_get_dict_operation(
 /*===================*/
 	const trx_t*	trx)	/*!< in: transaction */
@@ -317,7 +351,7 @@ void
 trx_set_dict_operation(
 /*===================*/
 	trx_t*			trx,	/*!< in/out: transaction */
-	enum trx_dict_op	op);	/*!< in: operation, not
+	enum trx_dict_op_t	op);	/*!< in: operation, not
 					TRX_DICT_OP_NONE */
 
 #ifndef UNIV_HOTBACKUP
@@ -359,7 +393,7 @@ UNIV_INTERN
 ibool
 trx_is_interrupted(
 /*===============*/
-	trx_t*	trx);	/*!< in: transaction */
+	const trx_t*	trx);	/*!< in: transaction */
 /**********************************************************************//**
 Determines if the currently running transaction is in strict mode.
 @return	TRUE if strict */
@@ -405,6 +439,15 @@ trx_get_que_state_str(
 /*==================*/
 	const trx_t*	trx);	/*!< in: transaction */
 
+/****************************************************************//**
+Assign a read-only transaction a rollback-segment, if it is attempting
+to write to a TEMPORARY table. */
+UNIV_INTERN
+void
+trx_assign_rseg(
+/*============*/
+	trx_t*		trx);		/*!< A read-only transaction that
+					needs to be assigned a RBS. */
 /*******************************************************************//**
 Transactions that aren't started by the MySQL server don't set
 the trx_t::mysql_thd field. For such transactions we set the lock
@@ -450,7 +493,6 @@ non-locking select */
 	ut_ad(!trx_is_autocommit_non_locking((t)));			\
 	switch ((t)->state) {						\
 	case TRX_STATE_PREPARED:					\
-		ut_a(!(t)->read_only);					\
 		/* fall through */					\
 	case TRX_STATE_ACTIVE:						\
 	case TRX_STATE_COMMITTED_IN_MEMORY:				\
@@ -463,7 +505,7 @@ non-locking select */
 
 #ifdef UNIV_DEBUG
 /*******************************************************************//**
-Assert that an autocommit non-locking slect cannot be in the
+Assert that an autocommit non-locking select cannot be in the
 ro_trx_list nor the rw_trx_list and that it is a read-only transaction.
 The tranasction must be in the mysql_trx_list. */
 # define assert_trx_nonlocking_or_in_list(t)				\
@@ -511,7 +553,7 @@ code and no mutex is required when the query thread is no longer waiting. */
 
 /** The locks and state of an active transaction. Protected by
 lock_sys->mutex, trx->mutex or both. */
-struct trx_lock_struct {
+struct trx_lock_t {
 	ulint		n_active_thrs;	/*!< number of active query threads */
 
 	trx_que_t	que_state;	/*!< valid when trx->state
@@ -620,10 +662,10 @@ lock_rec_convert_impl_to_expl()) will access transactions associated
 to other connections. The locks of transactions are protected by
 lock_sys->mutex and sometimes by trx->mutex. */
 
-struct trx_struct{
+struct trx_t{
 	ulint		magic_n;
 
-	mutex_t		mutex;		/*!< Mutex protecting the fields
+	ib_mutex_t		mutex;		/*!< Mutex protecting the fields
 					state and lock
 					(except some fields of lock, which
 					are protected by lock_sys->mutex) */
@@ -657,8 +699,7 @@ struct trx_struct{
 
 	Latching and various transaction lists membership rules:
 
-	XA (2PC) transactions are always treated as read-write and
-	non-autocommit.
+	XA (2PC) transactions are always treated as non-autocommit.
 
 	Transitions to ACTIVE or NOT_STARTED occur when
 	!in_rw_trx_list and !in_ro_trx_list (no trx_sys->mutex needed).
@@ -793,9 +834,9 @@ struct trx_struct{
 					transaction branch */
 	lsn_t		commit_lsn;	/*!< lsn at the time of the commit */
 	table_id_t	table_id;	/*!< Table to drop iff dict_operation
-					is TRUE, or 0. */
+					== TRX_DICT_OP_TABLE, or 0. */
 	/*------------------------------*/
-	void*		mysql_thd;	/*!< MySQL thread handle corresponding
+	THD*		mysql_thd;	/*!< MySQL thread handle corresponding
 					to this trx, or NULL */
 	const char*	mysql_log_file_name;
 					/*!< if MySQL binlog is used, this field
@@ -838,7 +879,7 @@ struct trx_struct{
 					trx_sys->mysql_trx_list */
 #endif /* UNIV_DEBUG */
 	/*------------------------------*/
-	enum db_err	error_state;	/*!< 0 if no error, otherwise error
+	dberr_t		error_state;	/*!< 0 if no error, otherwise error
 					number; NOTE That ONLY the thread
 					doing the transaction is allowed to
 					set this field: this is NOT protected
@@ -873,7 +914,7 @@ struct trx_struct{
 			trx_savepoints;	/*!< savepoints set with SAVEPOINT ...,
 					oldest first */
 	/*------------------------------*/
-	mutex_t		undo_mutex;	/*!< mutex protecting the fields in this
+	ib_mutex_t		undo_mutex;	/*!< mutex protecting the fields in this
 					section (down to undo_no_arr), EXCEPT
 					last_sql_stat_start, which can be
 					accessed only when we know that there
@@ -929,12 +970,24 @@ struct trx_struct{
 	ulint		will_lock;	/*!< Will acquire some locks. Increment
 					each time we determine that a lock will
 					be acquired by the MySQL layer. */
+	bool		ddl;		/*!< true if it is a transaction that
+					is being started for a DDL operation */
 	/*------------------------------*/
-	fts_trx_t*	fts_trx;	/* FTS information, or NULL if
+	fts_trx_t*	fts_trx;	/*!< FTS information, or NULL if
 					transaction hasn't modified tables
 					with FTS indexes (yet). */
 	doc_id_t	fts_next_doc_id;/* The document id used for updates */
 	/*------------------------------*/
+	ulint		flush_tables;	/*!< if "covering" the FLUSH TABLES",
+					count of tables being flushed. */
+
+	/*------------------------------*/
+#ifdef UNIV_DEBUG
+	ulint		start_line;	/*!< Track where it was started from */
+	const char*	start_file;	/*!< Filename where it was started */
+#endif /* UNIV_DEBUG */
+
+	/*------------------------------*/
 	char detailed_error[256];	/*!< detailed error message for last
 					error, or empty. */
 };
@@ -1003,7 +1056,7 @@ enum commit_node_state {
 };
 
 /** Commit command node in a query graph */
-struct commit_node_struct{
+struct commit_node_t{
 	que_common_t	common;	/*!< node type: QUE_NODE_COMMIT */
 	enum commit_node_state
 			state;	/*!< node execution state */
diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic
index ceeb121ab70..69ee17ea98b 100644
--- a/storage/innobase/include/trx0trx.ic
+++ b/storage/innobase/include/trx0trx.ic
@@ -44,7 +44,7 @@ trx_state_eq(
 #ifdef UNIV_DEBUG
 	switch (trx->state) {
 	case TRX_STATE_PREPARED:
-		assert_trx_in_rw_list(trx);
+		ut_ad(!trx_is_autocommit_non_locking(trx));
 		return(trx->state == state);
 
 	case TRX_STATE_ACTIVE:
@@ -108,12 +108,12 @@ trx_get_que_state_str(
 Determine if a transaction is a dictionary operation.
 @return	dictionary operation mode */
 UNIV_INLINE
-enum trx_dict_op
+enum trx_dict_op_t
 trx_get_dict_operation(
 /*===================*/
 	const trx_t*	trx)	/*!< in: transaction */
 {
-	enum trx_dict_op op = (enum trx_dict_op) trx->dict_operation;
+	trx_dict_op_t op = static_cast<trx_dict_op_t>(trx->dict_operation);
 
 #ifdef UNIV_DEBUG
 	switch (op) {
@@ -124,7 +124,7 @@ trx_get_dict_operation(
 	}
 	ut_error;
 #endif /* UNIV_DEBUG */
-	return((enum trx_dict_op) op);
+	return(op);
 }
 /**********************************************************************//**
 Flag a transaction a dictionary operation. */
@@ -133,11 +133,11 @@ void
 trx_set_dict_operation(
 /*===================*/
 	trx_t*			trx,	/*!< in/out: transaction */
-	enum trx_dict_op	op)	/*!< in: operation, not
+	enum trx_dict_op_t	op)	/*!< in: operation, not
 					TRX_DICT_OP_NONE */
 {
 #ifdef UNIV_DEBUG
-	enum trx_dict_op	old_op = trx_get_dict_operation(trx);
+	enum trx_dict_op_t	old_op = trx_get_dict_operation(trx);
 
 	switch (op) {
 	case TRX_DICT_OP_NONE:
@@ -159,6 +159,7 @@ trx_set_dict_operation(
 ok:
 #endif /* UNIV_DEBUG */
 
+	trx->ddl = true;
 	trx->dict_operation = op;
 }
 
diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h
index 650d5878e64..4f515cb5248 100644
--- a/storage/innobase/include/trx0types.h
+++ b/storage/innobase/include/trx0types.h
@@ -36,7 +36,7 @@ the terminating NUL character. */
 #define TRX_ID_MAX_LEN		17
 
 /** Transaction execution states when trx->state == TRX_STATE_ACTIVE */
-enum trx_que_enum {
+enum trx_que_t {
 	TRX_QUE_RUNNING,		/*!< transaction is running */
 	TRX_QUE_LOCK_WAIT,		/*!< transaction is waiting for
 					a lock */
@@ -45,43 +45,54 @@ enum trx_que_enum {
 };
 
 /** Transaction states (trx_t::state) */
-enum trx_state_enum {
+enum trx_state_t {
 	TRX_STATE_NOT_STARTED,
 	TRX_STATE_ACTIVE,
 	TRX_STATE_PREPARED,			/* Support for 2PC/XA */
 	TRX_STATE_COMMITTED_IN_MEMORY
 };
 
+/** Type of data dictionary operation */
+enum trx_dict_op_t {
+	/** The transaction is not modifying the data dictionary. */
+	TRX_DICT_OP_NONE = 0,
+	/** The transaction is creating a table or an index, or
+	dropping a table.  The table must be dropped in crash
+	recovery.  This and TRX_DICT_OP_NONE are the only possible
+	operation modes in crash recovery. */
+	TRX_DICT_OP_TABLE = 1,
+	/** The transaction is creating or dropping an index in an
+	existing table.  In crash recovery, the data dictionary
+	must be locked, but the table must not be dropped. */
+	TRX_DICT_OP_INDEX = 2
+};
+
 /** Memory objects */
 /* @{ */
 /** Transaction */
-typedef struct trx_struct	trx_t;
+struct trx_t;
 /** The locks and state of an active transaction */
-typedef struct trx_lock_struct	trx_lock_t;
+struct trx_lock_t;
 /** Transaction system */
-typedef struct trx_sys_struct	trx_sys_t;
+struct trx_sys_t;
 /** Signal */
-typedef struct trx_sig_struct	trx_sig_t;
+struct trx_sig_t;
 /** Rollback segment */
-typedef struct trx_rseg_struct	trx_rseg_t;
+struct trx_rseg_t;
 /** Transaction undo log */
-typedef struct trx_undo_struct	trx_undo_t;
+struct trx_undo_t;
 /** Array of undo numbers of undo records being rolled back or purged */
-typedef struct trx_undo_arr_struct trx_undo_arr_t;
+struct trx_undo_arr_t;
 /** A cell of trx_undo_arr_t */
-typedef struct trx_undo_inf_struct trx_undo_inf_t;
+struct trx_undo_inf_t;
 /** The control structure used in the purge operation */
-typedef struct trx_purge_struct	trx_purge_t;
+struct trx_purge_t;
 /** Rollback command node in a query graph */
-typedef struct roll_node_struct	roll_node_t;
+struct roll_node_t;
 /** Commit command node in a query graph */
-typedef struct commit_node_struct commit_node_t;
+struct commit_node_t;
 /** SAVEPOINT command node in a query graph */
-typedef struct trx_named_savept_struct trx_named_savept_t;
-/** Transaction concurrency state */
-typedef enum trx_state_enum trx_state_t;
-/** Transaction query thread state */
-typedef enum trx_que_enum trx_que_t;
+struct trx_named_savept_t;
 /* @} */
 
 /** Rollback contexts */
@@ -109,9 +120,7 @@ typedef ib_id_t	roll_ptr_t;
 typedef ib_id_t	undo_no_t;
 
 /** Transaction savepoint */
-typedef struct trx_savept_struct trx_savept_t;
-/** Transaction savepoint */
-struct trx_savept_struct{
+struct trx_savept_t{
 	undo_no_t	least_undo_no;	/*!< least undo number to undo */
 };
 
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
index ed2ce66bbb6..4021d71c68a 100644
--- a/storage/innobase/include/trx0undo.h
+++ b/storage/innobase/include/trx0undo.h
@@ -65,6 +65,15 @@ ibool
 trx_undo_roll_ptr_is_insert(
 /*========================*/
 	roll_ptr_t	roll_ptr);	/*!< in: roll pointer */
+/***********************************************************************//**
+Returns true if the record is of the insert type.
+@return	true if the record was freshly inserted (not updated). */
+UNIV_INLINE
+bool
+trx_undo_trx_id_is_insert(
+/*======================*/
+	const byte*	trx_id)	/*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
+	__attribute__((nonnull, pure, warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /*****************************************************************//**
 Writes a roll ptr to an index page. In case that the size changes in
@@ -285,11 +294,12 @@ undo log reused.
 are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY
 DB_OUT_OF_MEMORY */
 UNIV_INTERN
-ulint
+dberr_t
 trx_undo_assign_undo(
 /*=================*/
 	trx_t*		trx,	/*!< in: transaction */
-	ulint		type);	/*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+	ulint		type)	/*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Sets the state of the undo log segment at a transaction finish.
 @return	undo log segment header page, x-latched */
@@ -404,7 +414,7 @@ trx_undo_mem_free(
 /** Transaction undo log memory object; this is protected by the undo_mutex
 in the corresponding transaction object */
 
-struct trx_undo_struct{
+struct trx_undo_t{
 	/*-----------------------------*/
 	ulint		id;		/*!< undo log slot number within the
 					rollback segment */
diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic
index 4b38e63297c..577759d6c3d 100644
--- a/storage/innobase/include/trx0undo.ic
+++ b/storage/innobase/include/trx0undo.ic
@@ -101,6 +101,21 @@ trx_undo_roll_ptr_is_insert(
 	ut_ad(roll_ptr < (1ULL << 56));
 	return((ibool) (roll_ptr >> 55));
 }
+
+/***********************************************************************//**
+Returns true if the record is of the insert type.
+@return	true if the record was freshly inserted (not updated). */
+UNIV_INLINE
+bool
+trx_undo_trx_id_is_insert(
+/*======================*/
+	const byte*	trx_id)	/*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
+{
+#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
+# error
+#endif
+	return(static_cast<bool>(trx_id[DATA_TRX_ID_LEN] >> 7));
+}
 #endif /* !UNIV_HOTBACKUP */
 
 /*****************************************************************//**
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 60eb1fede91..fbb62e8de01 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -380,11 +380,16 @@ This number varies depending on UNIV_PAGE_SIZE. */
 /** Maximum number of parallel threads in a parallelized operation */
 #define UNIV_MAX_PARALLELISM	32
 
-/** The maximum length of a table name. This is the MySQL limit and is
-defined in mysql_com.h like NAME_CHAR_LEN*SYSTEM_CHARSET_MBMAXLEN, the
-number does not include a terminating '\0'. InnoDB probably can handle
-longer names internally */
-#define MAX_TABLE_NAME_LEN	192
+/** This is the "mbmaxlen" for my_charset_filename (defined in
+strings/ctype-utf8.c), which is used to encode File and Database names. */
+#define FILENAME_CHARSET_MAXNAMLEN	5
+
+/** The maximum length of an encode table name in bytes.  The max
+table and database names are NAME_CHAR_LEN (64) characters. After the
+encoding, the max length would be NAME_CHAR_LEN (64) *
+FILENAME_CHARSET_MAXNAMLEN (5) = 320 bytes. The number does not include a
+terminating '\0'. InnoDB can handle longer names internally */
+#define MAX_TABLE_NAME_LEN	320
 
 /** The maximum length of a database name. Like MAX_TABLE_NAME_LEN this is
 the MySQL's NAME_LEN, see check_and_convert_db_name(). */
@@ -398,6 +403,16 @@ database name and table name. In addition, 14 bytes is added for:
 #define MAX_FULL_NAME_LEN				\
 	(MAX_TABLE_NAME_LEN + MAX_DATABASE_NAME_LEN + 14)
 
+/** The maximum length in bytes that a database name can occupy when stored in
+UTF8, including the terminating '\0', see dict_fs2utf8(). You must include
+mysql_com.h if you are to use this macro. */
+#define MAX_DB_UTF8_LEN		(NAME_LEN + 1)
+
+/** The maximum length in bytes that a table name can occupy when stored in
+UTF8, including the terminating '\0', see dict_fs2utf8(). You must include
+mysql_com.h if you are to use this macro. */
+#define MAX_TABLE_UTF8_LEN	(NAME_LEN + sizeof(srv_mysql50_table_name_prefix))
+
 /*
 			UNIVERSAL TYPE DEFINITIONS
 			==========================
@@ -417,6 +432,7 @@ macro ULINTPF. */
 # define UINT32PF	"%I32u"
 # define INT64PF	"%I64d"
 # define UINT64PF	"%I64u"
+# define UINT64PFx	"%016I64u"
 typedef __int64 ib_int64_t;
 typedef unsigned __int64 ib_uint64_t;
 typedef unsigned __int32 ib_uint32_t;
@@ -425,6 +441,7 @@ typedef unsigned __int32 ib_uint32_t;
 # define UINT32PF	"%"PRIu32
 # define INT64PF	"%"PRId64
 # define UINT64PF	"%"PRIu64
+# define UINT64PFx	"%016"PRIx64
 typedef int64_t ib_int64_t;
 typedef uint64_t ib_uint64_t;
 typedef uint32_t ib_uint32_t;
@@ -489,6 +506,8 @@ headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
 
 #endif
 
+#define UNIV_NOTHROW
+
 /** The following number as the length of a logical field means that the field
 has the SQL NULL as its value. NOTE that because we assume that the length
 of a field is a 32-bit integer when we store it, for example, to an undo log
@@ -588,15 +607,23 @@ typedef void* os_thread_ret_t;
 # define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
 # define UNIV_MEM_DESC(addr, size) VALGRIND_CREATE_BLOCK(addr, size, #addr)
 # define UNIV_MEM_UNDESC(b) VALGRIND_DISCARD(b)
-# define UNIV_MEM_ASSERT_RW(addr, size) do {				\
+# define UNIV_MEM_ASSERT_RW_LOW(addr, size, should_abort) do {		\
 	const void* _p = (const void*) (ulint)				\
 		VALGRIND_CHECK_MEM_IS_DEFINED(addr, size);		\
-	if (UNIV_LIKELY_NULL(_p))					\
+	if (UNIV_LIKELY_NULL(_p)) {					\
 		fprintf(stderr, "%s:%d: %p[%u] undefined at %ld\n",	\
 			__FILE__, __LINE__,				\
 			(const void*) (addr), (unsigned) (size), (long)	\
 			(((const char*) _p) - ((const char*) (addr))));	\
-	} while (0)
+		if (should_abort) {					\
+			ut_error;					\
+		}							\
+	}								\
+} while (0)
+# define UNIV_MEM_ASSERT_RW(addr, size)					\
+	UNIV_MEM_ASSERT_RW_LOW(addr, size, false)
+# define UNIV_MEM_ASSERT_RW_ABORT(addr, size)				\
+	UNIV_MEM_ASSERT_RW_LOW(addr, size, true)
 # define UNIV_MEM_ASSERT_W(addr, size) do {				\
 	const void* _p = (const void*) (ulint)				\
 		VALGRIND_CHECK_MEM_IS_ADDRESSABLE(addr, size);		\
@@ -613,7 +640,9 @@ typedef void* os_thread_ret_t;
 # define UNIV_MEM_ALLOC(addr, size) do {} while(0)
 # define UNIV_MEM_DESC(addr, size) do {} while(0)
 # define UNIV_MEM_UNDESC(b) do {} while(0)
+# define UNIV_MEM_ASSERT_RW_LOW(addr, size, should_abort) do {} while(0)
 # define UNIV_MEM_ASSERT_RW(addr, size) do {} while(0)
+# define UNIV_MEM_ASSERT_RW_ABORT(addr, size) do {} while(0)
 # define UNIV_MEM_ASSERT_W(addr, size) do {} while(0)
 #endif
 #define UNIV_MEM_ASSERT_AND_FREE(addr, size) do {	\
diff --git a/storage/innobase/include/usr0sess.h b/storage/innobase/include/usr0sess.h
index 4a0710c5060..b5c80b97b43 100644
--- a/storage/innobase/include/usr0sess.h
+++ b/storage/innobase/include/usr0sess.h
@@ -53,7 +53,7 @@ sess_close(
 
 /* The session handle. This data structure is only used by purge and is
 not really necessary. We should get rid of it. */
-struct sess_struct{
+struct sess_t{
 	ulint		state;		/*!< state of the session */
 	trx_t*		trx;		/*!< transaction object permanently
 					assigned for the session: the
diff --git a/storage/innobase/include/usr0types.h b/storage/innobase/include/usr0types.h
index 403ad0223a8..6ba937cacc8 100644
--- a/storage/innobase/include/usr0types.h
+++ b/storage/innobase/include/usr0types.h
@@ -26,6 +26,6 @@ Created 6/25/1996 Heikki Tuuri
 #ifndef usr0types_h
 #define usr0types_h
 
-typedef struct sess_struct	sess_t;
+struct sess_t;
 
 #endif
diff --git a/storage/innobase/include/ut0bh.h b/storage/innobase/include/ut0bh.h
index 4c029e256a9..84ea6dd915a 100644
--- a/storage/innobase/include/ut0bh.h
+++ b/storage/innobase/include/ut0bh.h
@@ -31,7 +31,7 @@ Created 2010-05-28 by Sunny Bains
 /** Comparison function for objects in the binary heap. */
 typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2);
 
-typedef struct ib_bh_struct ib_bh_t;
+struct ib_bh_t;
 
 /**********************************************************************//**
 Get the number of elements in the binary heap.
@@ -138,7 +138,7 @@ ib_bh_pop(
 	ib_bh_t*	ib_bh);			/*!< in/out: instance */
 
 /** Binary heap data structure */
-struct ib_bh_struct {
+struct ib_bh_t {
 	ulint		max_elems;		/*!< max elements allowed */
 	ulint		n_elems;		/*!< current size */
 	ulint		sizeof_elem;		/*!< sizeof element */
diff --git a/storage/innobase/include/ut0counter.h b/storage/innobase/include/ut0counter.h
new file mode 100644
index 00000000000..fe0f36dfff2
--- /dev/null
+++ b/storage/innobase/include/ut0counter.h
@@ -0,0 +1,203 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ut0counter.h
+
+Counter utility class
+
+Created 2012/04/12 by Sunny Bains
+*******************************************************/
+
+#ifndef UT0COUNTER_H
+#define UT0COUNTER_H
+
+#include "univ.i"
+#include <string.h>
+#include "os0thread.h"
+
+/** CPU cache line size */
+#define CACHE_LINE_SIZE		64
+
+/** Default number of slots to use in ib_counter_t */
+#define IB_N_SLOTS		64
+
+/** Get the offset into the counter array. */
+template <typename Type, int N>
+struct generic_indexer_t {
+	/** Default constructor/destructor should be OK. */
+
+        /** @return offset within m_counter */
+        size_t offset(size_t index) const UNIV_NOTHROW {
+                return(((index % N) + 1) * (CACHE_LINE_SIZE / sizeof(Type)));
+        }
+};
+
+#ifdef HAVE_SCHED_GETCPU
+#include <utmpx.h>
+/** Use the cpu id to index into the counter array. If it fails then
+use the thread id. */
+template <typename Type, int N>
+struct get_sched_indexer_t : public generic_indexer_t<Type, N> {
+	/** Default constructor/destructor should be OK. */
+
+	/* @return result from sched_getcpu(), the thread id if it fails. */
+	size_t get_rnd_index() const UNIV_NOTHROW {
+
+		size_t	cpu = sched_getcpu();
+		if (cpu == -1) {
+			cpu = (lint) os_thread_get_curr_id();
+		}
+
+		return(cpu);
+	}
+};
+#endif /* HAVE_SCHED_GETCPU */
+
+/** Use the thread id to index into the counter array. */
+template <typename Type, int N>
+struct thread_id_indexer_t : public generic_indexer_t<Type, N> {
+	/** Default constructor/destructor should are OK. */
+
+	/* @return a random number, currently we use the thread id. Where
+	thread id is represented as a pointer, it may not work as
+	effectively. */
+	size_t get_rnd_index() const UNIV_NOTHROW {
+		return((lint) os_thread_get_curr_id());
+	}
+};
+
+/** For counters wher N=1 */
+template <typename Type, int N=1>
+struct single_indexer_t {
+	/** Default constructor/destructor should are OK. */
+
+        /** @return offset within m_counter */
+        size_t offset(size_t index) const UNIV_NOTHROW {
+		ut_ad(N == 1);
+                return((CACHE_LINE_SIZE / sizeof(Type)));
+        }
+
+	/* @return 1 */
+	size_t get_rnd_index() const UNIV_NOTHROW {
+		ut_ad(N == 1);
+		return(1);
+	}
+};
+
+/** Class for using fuzzy counters. The counter is not protected by any
+mutex and the results are not guaranteed to be 100% accurate but close
+enough. Creates an array of counters and separates each element by the
+CACHE_LINE_SIZE bytes */
+template <
+	typename Type,
+	int N = IB_N_SLOTS,
+	template<typename, int> class Indexer = thread_id_indexer_t>
+class ib_counter_t {
+public:
+	ib_counter_t() { memset(m_counter, 0x0, sizeof(m_counter)); }
+
+	~ib_counter_t()
+	{
+		ut_ad(validate());
+	}
+
+	bool validate() UNIV_NOTHROW {
+#ifdef UNIV_DEBUG
+		size_t	n = (CACHE_LINE_SIZE / sizeof(Type));
+
+		/* Check that we aren't writing outside our defined bounds. */
+		for (size_t i = 0; i < UT_ARR_SIZE(m_counter); i += n) {
+			for (size_t j = 1; j < n - 1; ++j) {
+				ut_ad(m_counter[i + j] == 0);
+			}
+		}
+#endif /* UNIV_DEBUG */
+		return(true);
+	}
+
+	/** If you can't use a good index id. Increment by 1. */
+	void inc() UNIV_NOTHROW { add(1); }
+
+	/** If you can't use a good index id.
+	* @param n  - is the amount to increment */
+	void add(Type n) UNIV_NOTHROW {
+		size_t	i = m_policy.offset(m_policy.get_rnd_index());
+
+		ut_ad(i < UT_ARR_SIZE(m_counter));
+
+		m_counter[i] += n;
+	}
+
+	/** Use this if you can use a unique indentifier, saves a
+	call to get_rnd_index().
+	@param i - index into a slot
+	@param n - amount to increment */
+	void add(size_t index, Type n) UNIV_NOTHROW {
+		size_t	i = m_policy.offset(index);
+
+		ut_ad(i < UT_ARR_SIZE(m_counter));
+
+		m_counter[i] += n;
+	}
+
+	/** If you can't use a good index id. Decrement by 1. */
+	void dec() UNIV_NOTHROW { sub(1); }
+
+	/** If you can't use a good index id.
+	* @param - n is the amount to decrement */
+	void sub(Type n) UNIV_NOTHROW {
+		size_t	i = m_policy.offset(m_policy.get_rnd_index());
+
+		ut_ad(i < UT_ARR_SIZE(m_counter));
+
+		m_counter[i] -= n;
+	}
+
+	/** Use this if you can use a unique indentifier, saves a
+	call to get_rnd_index().
+	@param i - index into a slot
+	@param n - amount to decrement */
+	void sub(size_t index, Type n) UNIV_NOTHROW {
+		size_t	i = m_policy.offset(index);
+
+		ut_ad(i < UT_ARR_SIZE(m_counter));
+
+		m_counter[i] -= n;
+	}
+
+	/* @return total value - not 100% accurate, since it is not atomic. */
+	operator Type() const UNIV_NOTHROW {
+		Type	total = 0;
+
+		for (size_t i = 0; i < N; ++i) {
+			total += m_counter[m_policy.offset(i)];
+		}
+
+		return(total);
+	}
+
+private:
+	/** Indexer into the array */
+	Indexer<Type, N>m_policy;
+
+        /** Slot 0 is unused. */
+	Type		m_counter[(N + 1) * (CACHE_LINE_SIZE / sizeof(Type))];
+};
+
+#endif /* UT0COUNTER_H */
diff --git a/storage/innobase/include/ut0crc32.h b/storage/innobase/include/ut0crc32.h
index 456648001aa..86217692764 100644
--- a/storage/innobase/include/ut0crc32.h
+++ b/storage/innobase/include/ut0crc32.h
@@ -45,4 +45,7 @@ or 0x1EDC6F41 without the high-order bit) */
 typedef ib_uint32_t (*ib_ut_crc32_t)(const byte* ptr, ulint len);
 
 extern ib_ut_crc32_t	ut_crc32;
+
+extern bool	ut_crc32_sse2_enabled;
+
 #endif /* ut0crc32_h */
diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h
index e9ad62fb81b..0f2da165da7 100644
--- a/storage/innobase/include/ut0dbg.h
+++ b/storage/innobase/include/ut0dbg.h
@@ -145,10 +145,10 @@ ut_dbg_stop_thread(
 #include <sys/resource.h>
 
 /** structure used for recording usage statistics */
-typedef struct speedo_struct {
+struct speedo_t {
 	struct rusage	ru;	/*!< getrusage() result */
 	struct timeval	tv;	/*!< gettimeofday() result */
-} speedo_t;
+};
 
 /*******************************************************************//**
 Resets a speedo (records the current time in it). */
diff --git a/storage/innobase/include/ut0list.h b/storage/innobase/include/ut0list.h
index 57d6bdc33a6..29fc8669ce4 100644
--- a/storage/innobase/include/ut0list.h
+++ b/storage/innobase/include/ut0list.h
@@ -48,9 +48,8 @@ automatically freeing the list node when the item's heap is freed.
 
 #include "mem0mem.h"
 
-typedef struct ib_list_struct ib_list_t;
-typedef struct ib_list_node_struct ib_list_node_t;
-typedef struct ib_list_helper_struct ib_list_helper_t;
+struct ib_list_t;
+struct ib_list_node_t;
 
 /****************************************************************//**
 Create a new list using mem_alloc. Lists created with this function must be
@@ -152,7 +151,7 @@ ib_list_is_empty(
 	const ib_list_t*	list);	/* in: list */
 
 /* List. */
-struct ib_list_struct {
+struct ib_list_t {
 	ib_list_node_t*		first;		/*!< first node */
 	ib_list_node_t*		last;		/*!< last node */
 	ibool			is_heap_list;	/*!< TRUE if this list was
@@ -160,7 +159,7 @@ struct ib_list_struct {
 };
 
 /* A list node. */
-struct ib_list_node_struct {
+struct ib_list_node_t {
 	ib_list_node_t*		prev;		/*!< previous node */
 	ib_list_node_t*		next;		/*!< next node */
 	void*			data;		/*!< user data */
@@ -169,7 +168,7 @@ struct ib_list_node_struct {
 /* Quite often, the only additional piece of data you need is the per-item
 memory heap, so we have this generic struct available to use in those
 cases. */
-struct ib_list_helper_struct {
+struct ib_list_helper_t {
 	mem_heap_t*	heap;		/*!< memory heap */
 	void*		data;		/*!< user data */
 };
diff --git a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
index 51c89f15a77..b53e7ade4c1 100644
--- a/storage/innobase/include/ut0lst.h
+++ b/storage/innobase/include/ut0lst.h
@@ -65,8 +65,7 @@ The name of the field in the node struct should be the name given
 to the list.
 @param TYPE	the list node type name */
 /* Example:
-typedef struct LRU_node_struct	LRU_node_t;
-struct LRU_node_struct {
+struct LRU_node_t {
 	UT_LIST_NODE_T(LRU_node_t)	LRU_list;
 	...
 }
diff --git a/storage/innobase/include/ut0rbt.h b/storage/innobase/include/ut0rbt.h
index e8a4430e76b..e0593e99bde 100644
--- a/storage/innobase/include/ut0rbt.h
+++ b/storage/innobase/include/ut0rbt.h
@@ -44,25 +44,19 @@ Created 2007-03-20 Sunny Bains
 #define	FALSE		0
 #endif
 
-/* Red black tree typedefs */
-typedef struct ib_rbt_struct ib_rbt_t;
-typedef struct ib_rbt_node_struct ib_rbt_node_t;
-/* FIXME: Iterator is a better name than _bound_ */
-typedef struct ib_rbt_bound_struct ib_rbt_bound_t;
+struct ib_rbt_node_t;
 typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node);
 typedef int (*ib_rbt_compare)(const void* p1, const void* p2);
 typedef int (*ib_rbt_arg_compare)(const void*, const void* p1, const void* p2);
 
 /** Red black tree color types */
-enum ib_rbt_color_enum {
+enum ib_rbt_color_t {
 	IB_RBT_RED,
 	IB_RBT_BLACK
 };
 
-typedef enum ib_rbt_color_enum ib_rbt_color_t;
-
 /** Red black tree node */
-struct ib_rbt_node_struct {
+struct ib_rbt_node_t {
 	ib_rbt_color_t	color;			/* color of this node */
 
 	ib_rbt_node_t*	left;			/* points left child */
@@ -73,7 +67,7 @@ struct ib_rbt_node_struct {
 };
 
 /** Red black tree instance.*/
-struct	ib_rbt_struct {
+struct	ib_rbt_t {
 	ib_rbt_node_t*	nil;			/* Black colored node that is
 						used as a sentinel. This is
 						pre-allocated too.*/
@@ -89,12 +83,12 @@ struct	ib_rbt_struct {
 			compare_with_arg;	/* Fn. to use for comparison
 						with argument */
 	ulint		sizeof_value;		/* Sizeof the item in bytes */
-	const void*	cmp_arg;		/* Compare func argument */
+	void*		cmp_arg;		/* Compare func argument */
 };
 
 /** The result of searching for a key in the tree, this is useful for
 a speedy lookup and insert if key doesn't exist.*/
-struct ib_rbt_bound_struct {
+struct ib_rbt_bound_t {
 	const ib_rbt_node_t*
 			last;			/* Last node visited */
 
@@ -142,7 +136,7 @@ rbt_create_arg_cmp(
 	size_t		sizeof_value,		/*!< in: size in bytes */
 	ib_rbt_arg_compare
 			compare,		/*!< in: comparator */
-	const void*	cmp_arg);		/*!< in: compare fn arg */
+	void*		cmp_arg);		/*!< in: compare fn arg */
 /**********************************************************************//**
 Delete a node from the red black tree, identified by key */
 UNIV_INTERN
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
index 35b8a580e68..1260e0381bf 100644
--- a/storage/innobase/include/ut0ut.h
+++ b/storage/innobase/include/ut0ut.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -65,16 +65,16 @@ typedef time_t	ib_time_t;
 
 # elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
 #  define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
-# elif defined(HAVE_ATOMIC_BUILTINS)
-#  define UT_RELAX_CPU() do { \
-     volatile lint	volatile_var; \
-     os_compare_and_swap_lint(&volatile_var, 0, 1); \
-   } while (0)
 # elif defined(HAVE_WINDOWS_ATOMICS)
    /* In the Win32 API, the x86 PAUSE instruction is executed by calling
    the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
    independent way by using YieldProcessor. */
 #  define UT_RELAX_CPU() YieldProcessor()
+# elif defined(HAVE_ATOMIC_BUILTINS)
+#  define UT_RELAX_CPU() do { \
+     volatile lint	volatile_var; \
+     os_compare_and_swap_lint(&volatile_var, 0, 1); \
+   } while (0)
 # else
 #  define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */
 # endif
@@ -345,7 +345,7 @@ ut_print_filename(
 
 #ifndef UNIV_HOTBACKUP
 /* Forward declaration of transaction handle */
-struct trx_struct;
+struct trx_t;
 
 /**********************************************************************//**
 Outputs a fixed-length string, quoted as an SQL identifier.
@@ -357,7 +357,7 @@ void
 ut_print_name(
 /*==========*/
 	FILE*		f,	/*!< in: output stream */
-	struct trx_struct*trx,	/*!< in: transaction */
+	const trx_t*	trx,	/*!< in: transaction */
 	ibool		table_id,/*!< in: TRUE=print a table name,
 				FALSE=print other identifier */
 	const char*	name);	/*!< in: name to print */
@@ -372,13 +372,31 @@ void
 ut_print_namel(
 /*===========*/
 	FILE*		f,	/*!< in: output stream */
-	struct trx_struct*trx,	/*!< in: transaction (NULL=no quotes) */
+	const trx_t*	trx,	/*!< in: transaction (NULL=no quotes) */
 	ibool		table_id,/*!< in: TRUE=print a table name,
 				FALSE=print other identifier */
 	const char*	name,	/*!< in: name to print */
 	ulint		namelen);/*!< in: length of name */
 
 /**********************************************************************//**
+Formats a table or index name, quoted as an SQL identifier. If the name
+contains a slash '/', the result will contain two identifiers separated by
+a period (.), as in SQL database_name.identifier.
+@return pointer to 'formatted' */
+UNIV_INTERN
+char*
+ut_format_name(
+/*===========*/
+	const char*	name,		/*!< in: table or index name, must be
+					'\0'-terminated */
+	ibool		is_table,	/*!< in: if TRUE then 'name' is a table
+					name */
+	char*		formatted,	/*!< out: formatted result, will be
+					'\0'-terminated */
+	ulint		formatted_size);/*!< out: no more than this number of
+					bytes will be written to 'formatted' */
+
+/**********************************************************************//**
 Catenate files. */
 UNIV_INTERN
 void
@@ -442,7 +460,7 @@ UNIV_INTERN
 const char*
 ut_strerr(
 /*======*/
-	enum db_err	num);	/*!< in: error number */
+	dberr_t	num);	/*!< in: error number */
 
 /****************************************************************
 Sort function for ulint arrays. */
diff --git a/storage/innobase/include/ut0vec.h b/storage/innobase/include/ut0vec.h
index f2a5aba8116..432fb348a09 100644
--- a/storage/innobase/include/ut0vec.h
+++ b/storage/innobase/include/ut0vec.h
@@ -29,8 +29,8 @@ Created 4/6/2006 Osku Salerma
 #include "univ.i"
 #include "mem0mem.h"
 
-typedef struct ib_alloc_struct ib_alloc_t;
-typedef struct ib_vector_struct ib_vector_t;
+struct ib_alloc_t;
+struct ib_vector_t;
 
 typedef void* (*ib_mem_alloc_t)(
 					/* out: Pointer to allocated memory */
@@ -64,7 +64,7 @@ freeing it when done with the vector.
 
 /********************************************************************
 Create a new vector with the given initial size. */
-
+UNIV_INTERN
 ib_vector_t*
 ib_vector_create(
 /*=============*/
@@ -124,7 +124,7 @@ ib_vector_size(
 
 /********************************************************************
 Increase the size of the vector. */
-
+UNIV_INTERN
 void
 ib_vector_resize(
 /*=============*/
@@ -311,7 +311,7 @@ ib_ut_allocator_free(
 	ib_alloc_t*	ib_ut_alloc);	/* in: alloc instace to free */
 
 /* Allocator used by ib_vector_t. */
-struct ib_alloc_struct {
+struct ib_alloc_t {
 	ib_mem_alloc_t	mem_malloc;	/* For allocating memory */
 	ib_mem_free_t	mem_release;	/* For freeing memory */
 	ib_mem_resize_t	mem_resize;	/* For resizing memory */
@@ -320,7 +320,7 @@ struct ib_alloc_struct {
 };
 
 /* See comment at beginning of file. */
-struct ib_vector_struct {
+struct ib_vector_t {
 	ib_alloc_t*	allocator;	/* Allocator, because one size
 					doesn't fit all */
 	void*		data;		/* data elements */
diff --git a/storage/innobase/include/ut0vec.ic b/storage/innobase/include/ut0vec.ic
index 1255caee2d9..f41a85e1d1d 100644
--- a/storage/innobase/include/ut0vec.ic
+++ b/storage/innobase/include/ut0vec.ic
@@ -346,9 +346,10 @@ ib_vector_remove(
 	ib_vector_t*	vec,	/*!< in: vector */
 	const void*	elem)	/*!< in: value to remove */
 {
-	void*		current;
+	void*		current = NULL;
 	void*		next;
 	ulint		i;
+	ulint		old_used_count = vec->used;
 
 	for (i = 0; i < vec->used; i++) {
 		current = ib_vector_get(vec, i);
@@ -359,14 +360,14 @@ ib_vector_remove(
 			}
 
 			next = ib_vector_get(vec, i + 1);
-			memcpy(current, next, vec->sizeof_value
-			       * (vec->used - i - 1));
+			memmove(current, next, vec->sizeof_value
+			        * (vec->used - i - 1));
+			--vec->used;
+			break;
 		}
 	}
 
-	--vec->used;
-
-	return(current);
+	return((old_used_count != vec->used) ? current : NULL);
 }
 
 /********************************************************************
diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h
index ed4e65e4dc6..33385ddf2d4 100644
--- a/storage/innobase/include/ut0wqueue.h
+++ b/storage/innobase/include/ut0wqueue.h
@@ -37,7 +37,7 @@ processing.
 #include "os0sync.h"
 #include "sync0types.h"
 
-typedef struct ib_wqueue_struct ib_wqueue_t;
+struct ib_wqueue_t;
 
 /****************************************************************//**
 Create a new work queue.
@@ -96,8 +96,8 @@ ib_wqueue_timedwait(
 	ib_time_t	wait_in_usecs); /* in: wait time in micro seconds */
 
 /* Work queue. */
-struct ib_wqueue_struct {
-	mutex_t		mutex;	/*!< mutex protecting everything */
+struct ib_wqueue_t {
+	ib_mutex_t		mutex;	/*!< mutex protecting everything */
 	ib_list_t*	items;	/*!< work item list */
 	os_event_t	event;	/*!< event we use to signal additions to list */
 };
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 476b305ca70..1152152cc77 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -37,15 +37,17 @@ Created 5/7/1996 Heikki Tuuri
 #include "usr0sess.h"
 #include "trx0purge.h"
 #include "dict0mem.h"
+#include "dict0boot.h"
 #include "trx0sys.h"
 #include "pars0pars.h" /* pars_complete_graph_for_exec() */
 #include "que0que.h" /* que_node_get_parent() */
 #include "row0mysql.h" /* row_mysql_handle_errors() */
-#include "row0sel.h" /* sel_node_create(), sel_node_struct */
+#include "row0sel.h" /* sel_node_create(), sel_node_t */
 #include "row0types.h" /* sel_node_t */
 #include "srv0mon.h"
 #include "ut0vec.h"
 #include "btr0btr.h"
+#include "dict0boot.h"
 
 /* Restricts the length of search we will do in the waits-for
 graph of transactions */
@@ -345,10 +347,7 @@ static const byte lock_strength_matrix[5][5] = {
 };
 
 /** Deadlock check context. */
-typedef struct lock_deadlock_ctx_struct lock_deadlock_ctx_t;
-
-/** Deadlock check context. */
-struct lock_deadlock_ctx_struct {
+struct lock_deadlock_ctx_t {
 	const trx_t*	start;		/*!< Joining transaction that is
 					requesting a lock in an incompatible
 					mode */
@@ -366,10 +365,8 @@ struct lock_deadlock_ctx_struct {
 					was aborted */
 };
 
-typedef struct lock_stack_struct lock_stack_t;
-
 /** DFS visited node information used during deadlock checking. */
-struct lock_stack_struct {
+struct lock_stack_t {
 	const lock_t*	lock;			/*!< Current lock */
 	const lock_t*	wait_lock;		/*!< Waiting for lock */
 	unsigned	heap_no:16;		/*!< heap number if rec lock */
@@ -415,9 +412,10 @@ lock_rec_validate_page(
 /* The lock system */
 UNIV_INTERN lock_sys_t*	lock_sys	= NULL;
 
-/* We store info on the latest deadlock error to this buffer. InnoDB
+/** We store info on the latest deadlock error to this buffer. InnoDB
 Monitor will then fetch it and print */
 UNIV_INTERN ibool	lock_deadlock_found = FALSE;
+/** Only created if !srv_read_only_mode */
 static FILE*		lock_latest_err_file;
 
 /********************************************************************//**
@@ -502,7 +500,7 @@ lock_check_trx_id_sanity(
 	dict_index_t*	index,		/*!< in: index */
 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec, index) */
 {
-	ibool		is_ok;
+	bool		is_ok;
 	trx_id_t	max_trx_id;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
@@ -520,10 +518,10 @@ lock_check_trx_id_sanity(
 
 /*********************************************************************//**
 Checks that a record is seen in a consistent read.
-@return TRUE if sees, or FALSE if an earlier version of the record
+@return true if sees, or false if an earlier version of the record
 should be retrieved */
 UNIV_INTERN
-ibool
+bool
 lock_clust_rec_cons_read_sees(
 /*==========================*/
 	const rec_t*	rec,	/*!< in: user record which should be read or
@@ -550,14 +548,14 @@ lock_clust_rec_cons_read_sees(
 Checks that a non-clustered index record is seen in a consistent read.
 
 NOTE that a non-clustered index page contains so little information on
-its modifications that also in the case FALSE, the present version of
+its modifications that also in the case false, the present version of
 rec may be the right, but we must check this from the clustered index
 record.
 
-@return TRUE if certainly sees, or FALSE if an earlier version of the
+@return true if certainly sees, or false if an earlier version of the
 clustered index record might be needed */
 UNIV_INTERN
-ulint
+bool
 lock_sec_rec_cons_read_sees(
 /*========================*/
 	const rec_t*		rec,	/*!< in: user record which
@@ -574,7 +572,7 @@ lock_sec_rec_cons_read_sees(
 
 	if (recv_recovery_is_on()) {
 
-		return(FALSE);
+		return(false);
 	}
 
 	max_trx_id = page_get_max_trx_id(page_align(rec));
@@ -593,12 +591,6 @@ lock_sys_create(
 {
 	ulint	lock_sys_sz;
 
-	srv_n_lock_wait_count = 0;
-	srv_n_lock_wait_time = 0;
-	srv_n_lock_max_wait_time = 0;
-	srv_lock_timeout_active = FALSE;
-	srv_n_lock_wait_current_count = 0;
-
 	lock_sys_sz = sizeof(*lock_sys)
 		+ OS_THREAD_MAX_N * sizeof(srv_slot_t);
 
@@ -618,12 +610,14 @@ lock_sys_create(
 	mutex_create(lock_sys_wait_mutex_key,
 		     &lock_sys->wait_mutex, SYNC_LOCK_WAIT_SYS);
 
-	lock_sys->rec_hash = hash_create(n_cells);
+	lock_sys->timeout_event = os_event_create();
 
-	lock_latest_err_file = os_file_create_tmpfile();
-	ut_a(lock_latest_err_file);
+	lock_sys->rec_hash = hash_create(n_cells);
 
-	srv_timeout_event = os_event_create(NULL);
+	if (!srv_read_only_mode) {
+		lock_latest_err_file = os_file_create_tmpfile();
+		ut_a(lock_latest_err_file);
+	}
 }
 
 /*********************************************************************//**
@@ -858,13 +852,16 @@ lock_reset_lock_and_trx_wait(
 /*=========================*/
 	lock_t*	lock)	/*!< in/out: record lock */
 {
-	ut_ad(lock->trx->lock.wait_lock == lock);
 	ut_ad(lock_get_wait(lock));
 	ut_ad(lock_mutex_own());
 
 	/* Reset the back pointer in trx to this waiting lock request */
-
-	lock->trx->lock.wait_lock = NULL;
+	if (!(lock->type_mode & LOCK_CONV_BY_OTHER)) {
+		ut_ad(lock->trx->lock.wait_lock == lock);
+		lock->trx->lock.wait_lock = NULL;
+	} else {
+		ut_ad(lock_get_type_low(lock) == LOCK_REC);
+	}
 	lock->type_mode &= ~LOCK_WAIT;
 }
 
@@ -1476,7 +1473,7 @@ Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
 to precise_mode.
 @return	lock or NULL */
 UNIV_INLINE
-const lock_t*
+lock_t*
 lock_rec_has_expl(
 /*==============*/
 	ulint			precise_mode,/*!< in: LOCK_S or LOCK_X
@@ -1489,7 +1486,7 @@ lock_rec_has_expl(
 	ulint			heap_no,/*!< in: heap number of the record */
 	const trx_t*		trx)	/*!< in: transaction */
 {
-	const lock_t*	lock;
+	lock_t*	lock;
 
 	ut_ad(lock_mutex_own());
 	ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
@@ -1498,14 +1495,14 @@ lock_rec_has_expl(
 
 	for (lock = lock_rec_get_first(block, heap_no);
 	     lock != NULL;
-	     lock = lock_rec_get_next_const(heap_no, lock)) {
+	     lock = lock_rec_get_next(heap_no, lock)) {
 
 		if (lock->trx == trx
+		    && !lock_is_wait_not_by_other(lock->type_mode)
 		    && lock_mode_stronger_or_eq(
 			    lock_get_mode(lock),
 			    static_cast<enum lock_mode>(
 				    precise_mode & LOCK_MODE_MASK))
-		    && !lock_get_wait(lock)
 		    && (!lock_rec_get_rec_not_gap(lock)
 			|| (precise_mode & LOCK_REC_NOT_GAP)
 			|| heap_no == PAGE_HEAP_NO_SUPREMUM)
@@ -1756,6 +1753,7 @@ lock_rec_create(
 
 	ut_ad(lock_mutex_own());
 	ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
 
 	/* Non-locking autocommit read-only transactions should not set
 	any locks. */
@@ -1813,7 +1811,7 @@ lock_rec_create(
 	}
 	ut_ad(trx_mutex_own(trx));
 
-	if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+	if (lock_is_wait_not_by_other(type_mode)) {
 
 		lock_set_lock_and_trx_wait(lock, trx);
 	}
@@ -1838,7 +1836,7 @@ DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
 there was a deadlock, but another transaction was chosen as a victim,
 and we got the lock immediately: no need to wait then */
 static
-enum db_err
+dberr_t
 lock_rec_enqueue_waiting(
 /*=====================*/
 	ulint			type_mode,/*!< in: lock mode this
@@ -1853,14 +1851,16 @@ lock_rec_enqueue_waiting(
 	const buf_block_t*	block,	/*!< in: buffer block containing
 					the record */
 	ulint			heap_no,/*!< in: heap number of the record */
+	lock_t*			lock,	/*!< in: lock object; NULL if a new
+					one should be created. */
 	dict_index_t*		index,	/*!< in: index of record */
 	que_thr_t*		thr)	/*!< in: query thread */
 {
 	trx_t*			trx;
-	lock_t*			lock;
 	trx_id_t		victim_trx_id;
 
 	ut_ad(lock_mutex_own());
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
 
 	trx = thr_get_trx(thr);
 
@@ -1893,10 +1893,20 @@ lock_rec_enqueue_waiting(
 		ut_ad(0);
 	}
 
-	/* Enqueue the lock request that will wait to be granted, note that
-	we already own the trx mutex. */
-	lock = lock_rec_create(
-		type_mode | LOCK_WAIT, block, heap_no, index, trx, TRUE);
+	if (lock == NULL) {
+		/* Enqueue the lock request that will wait
+		to be granted, note that we already own
+		the trx mutex. */
+		lock = lock_rec_create(
+			type_mode | LOCK_WAIT, block, heap_no,
+			index, trx, TRUE);
+	} else {
+		ut_ad(lock->type_mode & LOCK_WAIT);
+		ut_ad(lock->type_mode & LOCK_CONV_BY_OTHER);
+
+		lock->type_mode &= ~LOCK_CONV_BY_OTHER;
+		lock_set_lock_and_trx_wait(lock, trx);
+	}
 
 	/* Release the mutex to obey the latching order.
 	This is safe, because lock_deadlock_check_and_resolve()
@@ -1979,6 +1989,7 @@ lock_rec_add_to_queue(
 
 	ut_ad(lock_mutex_own());
 	ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
 #ifdef UNIV_DEBUG
 	switch (type_mode & LOCK_MODE_MASK) {
 	case LOCK_X:
@@ -2100,6 +2111,7 @@ lock_rec_lock_fast(
 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
 	      || mode - (LOCK_MODE_MASK & mode) == 0
 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
 
 	lock = lock_rec_get_first_on_page(block);
 
@@ -2146,7 +2158,7 @@ lock, or in the case of a page supremum record, a gap type lock.
 @return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
 static
-enum db_err
+dberr_t
 lock_rec_lock_slow(
 /*===============*/
 	ibool			impl,	/*!< in: if TRUE, no lock is set
@@ -2163,7 +2175,8 @@ lock_rec_lock_slow(
 	que_thr_t*		thr)	/*!< in: query thread */
 {
 	trx_t*			trx;
-	enum db_err		err = DB_SUCCESS;
+	lock_t*			lock;
+	dberr_t			err = DB_SUCCESS;
 
 	ut_ad(lock_mutex_own());
 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
@@ -2175,12 +2188,33 @@ lock_rec_lock_slow(
 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
 	      || mode - (LOCK_MODE_MASK & mode) == 0
 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
 
 	trx = thr_get_trx(thr);
 
 	trx_mutex_enter(trx);
 
-	if (lock_rec_has_expl(mode, block, heap_no, trx)) {
+	lock = lock_rec_has_expl(mode, block, heap_no, trx);
+	if (lock) {
+		if (lock->type_mode & LOCK_CONV_BY_OTHER) {
+			/* This lock or lock waiting was created by the other
+			transaction, not by the transaction (trx) itself.
+			So, the transaction (trx) should treat it collectly
+			according as whether granted or not. */
+
+			if (lock->type_mode & LOCK_WAIT) {
+				/* This lock request was not granted yet.
+				Should wait for granted. */
+
+				goto enqueue_waiting;
+			} else {
+				/* This lock request was already granted.
+				Just clearing the flag. */
+
+				lock->type_mode &= ~LOCK_CONV_BY_OTHER;
+			}
+		}
+
 		/* The trx already has a strong enough lock on rec: do
 		nothing */
 
@@ -2193,8 +2227,10 @@ lock_rec_lock_slow(
 		have a lock strong enough already granted on the
 		record, we have to wait. */
 
+		ut_ad(lock == NULL);
+enqueue_waiting:
 		err = lock_rec_enqueue_waiting(
-			mode, block, heap_no, index, thr);
+			mode, block, heap_no, lock, index, thr);
 
 	} else if (!impl) {
 		/* Set the requested lock on the record, note that
@@ -2220,7 +2256,7 @@ of a page supremum record, a gap type lock.
 @return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
 static
-enum db_err
+dberr_t
 lock_rec_lock(
 /*==========*/
 	ibool			impl,	/*!< in: if TRUE, no lock is set
@@ -2246,6 +2282,7 @@ lock_rec_lock(
 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
 	      || mode - (LOCK_MODE_MASK & mode) == 0);
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
 
 	/* We try a simplified and faster subroutine for the most
 	common cases */
@@ -2348,7 +2385,8 @@ lock_grant(
 	TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
 	for it */
 
-	if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+	if (!(lock->type_mode & LOCK_CONV_BY_OTHER)
+	    && lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
 		que_thr_t*	thr;
 
 		thr = que_thr_end_lock_wait(lock->trx);
@@ -2375,6 +2413,7 @@ lock_rec_cancel(
 
 	ut_ad(lock_mutex_own());
 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+	ut_ad(!(lock->type_mode & LOCK_CONV_BY_OTHER));
 
 	/* Reset the bit (there can be only one set bit) in the lock bitmap */
 	lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
@@ -2541,8 +2580,12 @@ lock_rec_reset_and_release_wait(
 	     lock != NULL;
 	     lock = lock_rec_get_next(heap_no, lock)) {
 
-		if (lock_get_wait(lock)) {
+		if (lock_is_wait_not_by_other(lock->type_mode)) {
 			lock_rec_cancel(lock);
+		} else if (lock_get_wait(lock)) {
+			/* just reset LOCK_WAIT */
+			lock_rec_reset_nth_bit(lock, heap_no);
+			lock_reset_lock_and_trx_wait(lock);
 		} else {
 			lock_rec_reset_nth_bit(lock, heap_no);
 		}
@@ -3439,11 +3482,13 @@ lock_deadlock_start_print()
 /*=======================*/
 {
 	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
 
 	rewind(lock_latest_err_file);
 	ut_print_timestamp(lock_latest_err_file);
 
 	if (srv_print_all_deadlocks) {
+		ut_print_timestamp(stderr);
 		fprintf(stderr, "InnoDB: transactions deadlock detected, "
 			"dumping detailed information.\n");
 		ut_print_timestamp(stderr);
@@ -3458,10 +3503,12 @@ lock_deadlock_fputs(
 /*================*/
 	const char*	msg)	/*!< in: message to print */
 {
-	fputs(msg, lock_latest_err_file);
+	if (!srv_read_only_mode) {
+		fputs(msg, lock_latest_err_file);
 
-	if (srv_print_all_deadlocks) {
-		fputs(msg, stderr);
+		if (srv_print_all_deadlocks) {
+			fputs(msg, stderr);
+		}
 	}
 }
 
@@ -3475,24 +3522,21 @@ lock_deadlock_trx_print(
 	ulint		max_query_len)	/*!< in: max query length to print,
 					or 0 to use the default max length */
 {
-	ulint	n_lock_rec;
-	ulint	n_lock_struct;
-	ulint	heap_size;
-
 	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
 
-	n_lock_rec = lock_number_of_rows_locked(&trx->lock);
-	n_lock_struct = UT_LIST_GET_LEN(trx->lock.trx_locks);
-	heap_size = mem_heap_get_size(trx->lock.lock_heap);
+	ulint	n_rec_locks = lock_number_of_rows_locked(&trx->lock);
+	ulint	n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
+	ulint	heap_size = mem_heap_get_size(trx->lock.lock_heap);
 
 	mutex_enter(&trx_sys->mutex);
 
 	trx_print_low(lock_latest_err_file, trx, max_query_len,
-		      n_lock_rec, n_lock_struct, heap_size);
+		      n_rec_locks, n_trx_locks, heap_size);
 
 	if (srv_print_all_deadlocks) {
 		trx_print_low(stderr, trx, max_query_len,
-			      n_lock_rec, n_lock_struct, heap_size);
+			      n_rec_locks, n_trx_locks, heap_size);
 	}
 
 	mutex_exit(&trx_sys->mutex);
@@ -3507,6 +3551,7 @@ lock_deadlock_lock_print(
 	const lock_t*	lock)	/*!< in: record or table type lock */
 {
 	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
 
 	if (lock_get_type_low(lock) == LOCK_REC) {
 		lock_rec_print(lock_latest_err_file, lock);
@@ -3629,6 +3674,7 @@ lock_deadlock_notify(
 						deadlock */
 {
 	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
 
 	lock_deadlock_start_print();
 
@@ -3648,9 +3694,15 @@ lock_deadlock_notify(
 
 	lock_deadlock_lock_print(lock);
 
-	lock_deadlock_fputs("*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
+	/* It is possible that the joining transaction was granted its
+	lock when we rolled back some other waiting transaction. */
+
+	if (ctx->start->lock.wait_lock != 0) {
+		lock_deadlock_fputs(
+			"*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
 
-	lock_deadlock_lock_print(ctx->start->lock.wait_lock);
+		lock_deadlock_lock_print(ctx->start->lock.wait_lock);
+	}
 
 #ifdef UNIV_DEBUG
 	if (lock_print_waits) {
@@ -3669,6 +3721,7 @@ lock_deadlock_select_victim(
 	const lock_deadlock_ctx_t*	ctx)	/*!< in: deadlock context */
 {
 	ut_ad(lock_mutex_own());
+	ut_ad(ctx->start->lock.wait_lock != 0);
 	ut_ad(ctx->wait_lock->trx != ctx->start);
 
 	if (trx_weight_ge(ctx->wait_lock->trx, ctx->start)) {
@@ -3694,8 +3747,10 @@ lock_deadlock_check(
 {
 	ut_ad(lock_mutex_own());
 
-	/* If it is the joining transaction wait lock. */
-	if (lock == ctx->start->lock.wait_lock) {
+	/* If it is the joining transaction wait lock or the joining
+	transaction was granted its lock due to deadlock detection. */
+	if (lock == ctx->start->lock.wait_lock
+	    || ctx->start->lock.wait_lock == NULL) {
 		; /* Skip */
 	} else if (lock == ctx->wait_lock) {
 
@@ -3776,7 +3831,8 @@ lock_deadlock_push(
 }
 
 /********************************************************************//**
-Looks iteratively for a deadlock.
+Looks iteratively for a deadlock. Note: the joining transaction may
+have been granted its lock by the deadlock checks.
 @return 0 if no deadlock else the victim transaction id.*/
 static
 trx_id_t
@@ -3811,7 +3867,9 @@ lock_deadlock_search(
 
 			/* Found a cycle. */
 
-			lock_deadlock_notify(ctx, lock);
+			if (!srv_read_only_mode) {
+				lock_deadlock_notify(ctx, lock);
+			}
 
 			return(lock_deadlock_select_victim(ctx)->id);
 
@@ -3882,6 +3940,7 @@ lock_deadlock_joining_trx_print(
 	const lock_t*	lock)		/*!< in: lock trx wants */
 {
 	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
 
 	/* If the lock search exceeds the max step
 	or the max depth, the current trx will be
@@ -3968,7 +4027,9 @@ lock_deadlock_check_and_resolve(
 			ut_a(trx == ctx.start);
 			ut_a(victim_trx_id == trx->id);
 
-			lock_deadlock_joining_trx_print(trx, lock);
+			if (!srv_read_only_mode) {
+				lock_deadlock_joining_trx_print(trx, lock);
+			}
 
 			MONITOR_INC(MONITOR_DEADLOCK);
 
@@ -4017,6 +4078,7 @@ lock_table_create(
 	ut_ad(table && trx);
 	ut_ad(lock_mutex_own());
 	ut_ad(trx_mutex_own(trx));
+	ut_ad(!(type_mode & LOCK_CONV_BY_OTHER));
 
 	/* Non-locking autocommit read-only transactions should not set
 	any locks. */
@@ -4203,7 +4265,7 @@ DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
 transaction was chosen as a victim, and we got the lock immediately:
 no need to wait then */
 static
-ulint
+dberr_t
 lock_table_enqueue_waiting(
 /*=======================*/
 	ulint		mode,	/*!< in: lock mode this transaction is
@@ -4333,7 +4395,7 @@ Locks the specified database table in the mode given. If the lock cannot
 be granted immediately, the query thread is put to wait.
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_table(
 /*=======*/
 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is set,
@@ -4344,7 +4406,7 @@ lock_table(
 	que_thr_t*	thr)	/*!< in: query thread */
 {
 	trx_t*		trx;
-	ulint		err;
+	dberr_t		err;
 	const lock_t*	wait_for;
 
 	ut_ad(table && thr);
@@ -4570,11 +4632,38 @@ lock_release(
 	     lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) {
 
 		if (lock_get_type_low(lock) == LOCK_REC) {
-			lock_rec_dequeue_from_page(lock);
 
+#ifdef UNIV_DEBUG
+			/* Check if the transcation locked a record
+			in a system table in X mode. It should have set
+			the dict_op code correctly if it did. */
+			if (lock->index->table->id < DICT_HDR_FIRST_ID
+			    && lock_get_mode(lock) == LOCK_X) {
+
+				ut_ad(lock_get_mode(lock) != LOCK_IX);
+				ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
+			}
+#endif /* UNIV_DEBUG */
+
+			lock_rec_dequeue_from_page(lock);
 		} else {
+			dict_table_t*	table;
+
+			table = lock->un_member.tab_lock.table;
+#ifdef UNIV_DEBUG
 			ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
 
+			/* Check if the transcation locked a system table
+			in IX mode. It should have set the dict_op code
+			correctly if it did. */
+			if (table->id < DICT_HDR_FIRST_ID
+			    && (lock_get_mode(lock) == LOCK_X
+				|| lock_get_mode(lock) == LOCK_IX)) {
+
+				ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
+			}
+#endif /* UNIV_DEBUG */
+
 			if (lock_get_mode(lock) != LOCK_IS
 			    && trx->undo_no != 0) {
 
@@ -4582,8 +4671,7 @@ lock_release(
 				block the use of the MySQL query cache for
 				all currently active transactions. */
 
-				lock->un_member.tab_lock.table
-					->query_cache_inv_trx_id = max_trx_id;
+				table->query_cache_inv_trx_id = max_trx_id;
 			}
 
 			lock_table_dequeue(lock);
@@ -5059,7 +5147,9 @@ lock_print_info_summary(
 		      "LATEST DETECTED DEADLOCK\n"
 		      "------------------------\n", file);
 
-		ut_copy_file(file, lock_latest_err_file);
+		if (!srv_read_only_mode) {
+			ut_copy_file(file, lock_latest_err_file);
+		}
 	}
 
 	fputs("------------\n"
@@ -5085,6 +5175,10 @@ lock_print_info_summary(
 		/* Should never be in this state while the system is running. */
 		ut_error;
 
+	case PURGE_STATE_DISABLED:
+		fprintf(file, "disabled");
+		break;
+
 	case PURGE_STATE_RUN:
 		fprintf(file, "running");
 		/* Check if it is waiting for more data to arrive. */
@@ -5418,6 +5512,8 @@ lock_rec_queue_validate(
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
 	ut_ad(lock_mutex_own() == locked_lock_trx_sys);
+	ut_ad(!index || dict_index_is_clust(index)
+	      || !dict_index_is_online_ddl(index));
 
 	heap_no = page_rec_get_heap_no(rec);
 
@@ -5694,20 +5790,26 @@ lock_rec_block_validate(
 	If the lock exists in lock_rec_validate_page() we assert
 	!block->page.file_page_was_freed. */
 
+	buf_block_t*	block;
 	mtr_t		mtr;
 
-	mtr_start(&mtr);
+	/* Make sure that the tablespace is not deleted while we are
+	trying to access the page. */
+	if (!fil_inc_pending_ops(space)) {
+		mtr_start(&mtr);
+		block = buf_page_get_gen(
+			space, fil_space_get_zip_size(space),
+			page_no, RW_X_LATCH, NULL,
+			BUF_GET_POSSIBLY_FREED,
+			__FILE__, __LINE__, &mtr);
 
-	buf_block_t*	block = buf_page_get_gen(
-		space, fil_space_get_zip_size(space),
-		page_no, RW_X_LATCH, NULL,
-		BUF_GET_POSSIBLY_FREED,
-		__FILE__, __LINE__, &mtr);
+		buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
-	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+		ut_ad(lock_rec_validate_page(block));
+		mtr_commit(&mtr);
 
-	ut_ad(lock_rec_validate_page(block));
-	mtr_commit(&mtr);
+		fil_decr_pending_ops(space);
+	}
 }
 
 /*********************************************************************//**
@@ -5765,7 +5867,7 @@ the query thread to the lock wait state and inserts a waiting request
 for a gap x-lock to the lock queue.
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_rec_insert_check_and_lock(
 /*===========================*/
 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is
@@ -5783,10 +5885,13 @@ lock_rec_insert_check_and_lock(
 	const rec_t*	next_rec;
 	trx_t*		trx;
 	lock_t*		lock;
-	ulint		err;
+	dberr_t		err;
 	ulint		next_rec_heap_no;
 
 	ut_ad(block->frame == page_align(rec));
+	ut_ad(!dict_index_is_online_ddl(index)
+	      || dict_index_is_clust(index)
+	      || (flags & BTR_CREATE_FLAG));
 
 	if (flags & BTR_NO_LOCKING_FLAG) {
 
@@ -5803,11 +5908,9 @@ lock_rec_insert_check_and_lock(
 	to hold trx->mutex here. */
 
 	/* When inserting a record into an index, the table must be at
-	least IX-locked or we must be building an index, in which case
-	the table must be at least S-locked. */
-	ut_ad(lock_table_has(trx, index->table, LOCK_IX)
-	      || (*index->name == TEMP_INDEX_PREFIX
-		  && lock_table_has(trx, index->table, LOCK_S)));
+	least IX-locked. When we are building an index, we would pass
+	BTR_NO_LOCKING_FLAG and skip the locking altogether. */
+	ut_ad(lock_table_has(trx, index->table, LOCK_IX));
 
 	lock = lock_rec_get_first(block, next_rec_heap_no);
 
@@ -5850,7 +5953,7 @@ lock_rec_insert_check_and_lock(
 
 		err = lock_rec_enqueue_waiting(
 			LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
-			block, next_rec_heap_no, index, thr);
+			block, next_rec_heap_no, NULL, index, thr);
 
 		trx_mutex_exit(trx);
 	} else {
@@ -5871,6 +5974,9 @@ lock_rec_insert_check_and_lock(
 		page_update_max_trx_id(block,
 				       buf_block_get_page_zip(block),
 				       trx->id, mtr);
+	default:
+		/* We only care about the two return values. */
+		break;
 	}
 
 #ifdef UNIV_DEBUG
@@ -5920,6 +6026,7 @@ lock_rec_convert_impl_to_expl(
 		this transaction. The transaction may have been
 		committed a long time ago. */
 	} else {
+		ut_ad(!dict_index_is_online_ddl(index));
 		trx_id = lock_sec_rec_some_has_impl(rec, index, offsets);
 		/* The transaction can be committed before the
 		trx_is_active(trx_id, NULL) check below, because we are not
@@ -5943,10 +6050,26 @@ lock_rec_convert_impl_to_expl(
 		if (impl_trx != NULL
 		    && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
 				       heap_no, impl_trx)) {
+			ulint	type_mode = (LOCK_REC | LOCK_X
+					     | LOCK_REC_NOT_GAP);
+
+			/* If the delete-marked record was locked already,
+			we should reserve lock waiting for impl_trx as
+			implicit lock. Because cannot lock at this moment.*/
+
+			if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))
+			    && lock_rec_other_has_conflicting(
+					static_cast<enum lock_mode>
+					(LOCK_X | LOCK_REC_NOT_GAP), block,
+					heap_no, impl_trx)) {
+
+				type_mode |= (LOCK_WAIT
+					      | LOCK_CONV_BY_OTHER);
+			}
 
 			lock_rec_add_to_queue(
-				LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
-				block, heap_no, index, impl_trx, FALSE);
+				type_mode, block, heap_no, index,
+				impl_trx, FALSE);
 		}
 
 		lock_mutex_exit();
@@ -5962,7 +6085,7 @@ lock wait state and inserts a waiting request for a record x-lock to the
 lock queue.
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_clust_rec_modify_check_and_lock(
 /*=================================*/
 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -5974,7 +6097,7 @@ lock_clust_rec_modify_check_and_lock(
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
 	que_thr_t*		thr)	/*!< in: query thread */
 {
-	ulint	err;
+	dberr_t	err;
 	ulint	heap_no;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
@@ -6020,7 +6143,7 @@ Checks if locks of other transactions prevent an immediate modify (delete
 mark or delete unmark) of a secondary index record.
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_sec_rec_modify_check_and_lock(
 /*===============================*/
 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -6032,13 +6155,15 @@ lock_sec_rec_modify_check_and_lock(
 				clustered index record first: see the
 				comment below */
 	dict_index_t*	index,	/*!< in: secondary index */
-	que_thr_t*	thr,	/*!< in: query thread */
+	que_thr_t*	thr,	/*!< in: query thread
+				(can be NULL if BTR_NO_LOCKING_FLAG) */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
-	ulint	err;
+	dberr_t	err;
 	ulint	heap_no;
 
 	ut_ad(!dict_index_is_clust(index));
+	ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
 	ut_ad(block->frame == page_align(rec));
 
 	if (flags & BTR_NO_LOCKING_FLAG) {
@@ -6103,7 +6228,7 @@ secondary index record.
 @return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-enum db_err
+dberr_t
 lock_sec_rec_read_check_and_lock(
 /*=============================*/
 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -6124,10 +6249,11 @@ lock_sec_rec_read_check_and_lock(
 					LOCK_REC_NOT_GAP */
 	que_thr_t*		thr)	/*!< in: query thread */
 {
-	enum db_err	err;
-	ulint		heap_no;
+	dberr_t	err;
+	ulint	heap_no;
 
 	ut_ad(!dict_index_is_clust(index));
+	ut_ad(!dict_index_is_online_ddl(index));
 	ut_ad(block->frame == page_align(rec));
 	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
 	ut_ad(rec_offs_validate(rec, index, offsets));
@@ -6180,7 +6306,7 @@ lock on the record.
 @return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-enum db_err
+dberr_t
 lock_clust_rec_read_check_and_lock(
 /*===============================*/
 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -6201,8 +6327,8 @@ lock_clust_rec_read_check_and_lock(
 					LOCK_REC_NOT_GAP */
 	que_thr_t*		thr)	/*!< in: query thread */
 {
-	enum db_err	err;
-	ulint		heap_no;
+	dberr_t	err;
+	ulint	heap_no;
 
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(block->frame == page_align(rec));
@@ -6230,7 +6356,8 @@ lock_clust_rec_read_check_and_lock(
 	ut_ad(mode != LOCK_S
 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
 
-	err = lock_rec_lock(FALSE, mode | gap_mode, block, heap_no, index, thr);
+	err = lock_rec_lock(FALSE, mode | gap_mode,
+			    block, heap_no, index, thr);
 
 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
 
@@ -6251,7 +6378,7 @@ lock_clust_rec_read_check_and_lock() that does not require the parameter
 "offsets".
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 UNIV_INTERN
-ulint
+dberr_t
 lock_clust_rec_read_check_and_lock_alt(
 /*===================================*/
 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
@@ -6274,7 +6401,7 @@ lock_clust_rec_read_check_and_lock_alt(
 	mem_heap_t*	tmp_heap	= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
-	ulint		err;
+	dberr_t		err;
 	rec_offs_init(offsets_);
 
 	offsets = rec_get_offsets(rec, index, offsets,
@@ -6469,6 +6596,8 @@ lock_get_table(
 {
 	switch (lock_get_type_low(lock)) {
 	case LOCK_REC:
+		ut_ad(dict_index_is_clust(lock->index)
+		      || !dict_index_is_online_ddl(lock->index));
 		return(lock->index->table);
 	case LOCK_TABLE:
 		return(lock->un_member.tab_lock.table);
@@ -6521,6 +6650,8 @@ lock_rec_get_index(
 	const lock_t*	lock)	/*!< in: lock */
 {
 	ut_a(lock_get_type_low(lock) == LOCK_REC);
+	ut_ad(dict_index_is_clust(lock->index)
+	      || !dict_index_is_online_ddl(lock->index));
 
 	return(lock->index);
 }
@@ -6536,6 +6667,8 @@ lock_rec_get_index_name(
 	const lock_t*	lock)	/*!< in: lock */
 {
 	ut_a(lock_get_type_low(lock) == LOCK_REC);
+	ut_ad(dict_index_is_clust(lock->index)
+	      || !dict_index_is_online_ddl(lock->index));
 
 	return(lock->index->name);
 }
@@ -6581,6 +6714,7 @@ lock_cancel_waiting_and_release(
 
 	ut_ad(lock_mutex_own());
 	ut_ad(trx_mutex_own(lock->trx));
+	ut_ad(!(lock->type_mode & LOCK_CONV_BY_OTHER));
 
 	lock->trx->lock.cancel = TRUE;
 
@@ -6656,10 +6790,14 @@ lock_trx_release_locks(
 {
 	assert_trx_in_list(trx);
 
-	if (UNIV_UNLIKELY(trx_state_eq(trx, TRX_STATE_PREPARED))) {
+	if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
 		mutex_enter(&trx_sys->mutex);
 		ut_a(trx_sys->n_prepared_trx > 0);
 		trx_sys->n_prepared_trx--;
+		if (trx->is_recovered) {
+			ut_a(trx_sys->n_prepared_recovered_trx > 0);
+			trx_sys->n_prepared_recovered_trx--;
+		}
 		mutex_exit(&trx_sys->mutex);
 	} else {
 		ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
@@ -6714,12 +6852,12 @@ was selected as a deadlock victim, or if it has to wait then cancel
 the wait lock.
 @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
 UNIV_INTERN
-enum db_err
+dberr_t
 lock_trx_handle_wait(
 /*=================*/
 	trx_t*	trx)	/*!< in/out: trx lock state */
 {
-	enum db_err	err;
+	dberr_t	err;
 
 	lock_mutex_enter();
 
@@ -6800,6 +6938,8 @@ lock_table_locks_lookup(
 			ut_a(lock->trx == trx);
 
 			if (lock_get_type_low(lock) == LOCK_REC) {
+				ut_ad(!dict_index_is_online_ddl(lock->index)
+				      || dict_index_is_clust(lock->index));
 				if (lock->index->table == table) {
 					return(lock);
 				}
@@ -6828,18 +6968,89 @@ lock_table_has_locks(
 
 	lock_mutex_enter();
 
+	has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0;
+
 #ifdef UNIV_DEBUG
-	mutex_enter(&trx_sys->mutex);
+	if (!has_locks) {
+		mutex_enter(&trx_sys->mutex);
 
-	ut_ad(lock_table_locks_lookup(table, &trx_sys->rw_trx_list) == NULL);
-	ut_ad(lock_table_locks_lookup(table, &trx_sys->ro_trx_list) == NULL);
+		ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list));
+		ut_ad(!lock_table_locks_lookup(table, &trx_sys->ro_trx_list));
 
-	mutex_exit(&trx_sys->mutex);
+		mutex_exit(&trx_sys->mutex);
+	}
 #endif /* UNIV_DEBUG */
 
-	has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0;
-
 	lock_mutex_exit();
 
 	return(has_locks);
 }
+
+#ifdef UNIV_DEBUG
+/*******************************************************************//**
+Check if the transaction holds any locks on the sys tables
+or its records.
+@return	the strongest lock found on any sys table or 0 for none */
+UNIV_INTERN
+const lock_t*
+lock_trx_has_sys_table_locks(
+/*=========================*/
+	const trx_t*	trx)	/*!< in: transaction to check */
+{
+	lint		i;
+	const lock_t*	strongest_lock = 0;
+	lock_mode	strongest = LOCK_NONE;
+
+	lock_mutex_enter();
+
+	/* Find a valid mode. Note: ib_vector_size() can be 0. */
+	for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
+		const lock_t*	lock;
+
+		lock = *static_cast<const lock_t**>(
+			ib_vector_get(trx->lock.table_locks, i));
+
+		if (lock != NULL
+		    && dict_is_sys_table(lock->un_member.tab_lock.table->id)) {
+
+			strongest = lock_get_mode(lock);
+			ut_ad(strongest != LOCK_NONE);
+			strongest_lock = lock;
+			break;
+		}
+	}
+
+	if (strongest == LOCK_NONE) {
+		lock_mutex_exit();
+		return(NULL);
+	}
+
+	for (/* No op */; i >= 0; --i) {
+		const lock_t*	lock;
+
+		lock = *static_cast<const lock_t**>(
+			ib_vector_get(trx->lock.table_locks, i));
+
+		if (lock == NULL) {
+			continue;
+		}
+
+		ut_ad(trx == lock->trx);
+		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+		ut_ad(lock->un_member.tab_lock.table != NULL);
+
+		lock_mode	mode = lock_get_mode(lock);
+
+		if (dict_is_sys_table(lock->un_member.tab_lock.table->id)
+		    && lock_mode_stronger_or_eq(mode, strongest)) {
+
+			strongest = mode;
+			strongest_lock = lock;
+		}
+	}
+
+	lock_mutex_exit();
+
+	return(strongest_lock);
+}
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc
index 99059f19813..fc355d8bb6d 100644
--- a/storage/innobase/lock/lock0wait.cc
+++ b/storage/innobase/lock/lock0wait.cc
@@ -33,14 +33,6 @@ Created 25/5/2010 Sunny Bains
 #include "ha_prototypes.h"
 #include "lock0priv.h"
 
-UNIV_INTERN ibool	srv_lock_timeout_active 	= FALSE;
-UNIV_INTERN ulint	srv_n_lock_wait_count		= 0;
-UNIV_INTERN ulint	srv_n_lock_wait_current_count	= 0;
-UNIV_INTERN ib_int64_t	srv_n_lock_wait_time		= 0;
-UNIV_INTERN ulint	srv_n_lock_max_wait_time	= 0;
-
-UNIV_INTERN os_event_t	srv_timeout_event;
-
 /*********************************************************************//**
 Print the contents of the lock_sys_t::waiting_threads array. */
 static
@@ -156,7 +148,7 @@ lock_wait_table_reserve_slot(
 			slot->thr->slot = slot;
 
 			if (slot->event == NULL) {
-				slot->event = os_event_create(NULL);
+				slot->event = os_event_create();
 				ut_a(slot->event);
 			}
 
@@ -257,8 +249,8 @@ lock_wait_suspend_thread(
 	slot = lock_wait_table_reserve_slot(thr, lock_wait_timeout);
 
 	if (thr->lock_state == QUE_THR_LOCK_ROW) {
-		srv_n_lock_wait_count++;
-		srv_n_lock_wait_current_count++;
+		srv_stats.n_lock_wait_count.inc();
+		srv_stats.n_lock_wait_current_count.inc();
 
 		if (ut_usectime(&sec, &ms) == -1) {
 			start_time = -1;
@@ -269,7 +261,7 @@ lock_wait_suspend_thread(
 
 	/* Wake the lock timeout monitor thread, if it is suspended */
 
-	os_event_set(srv_timeout_event);
+	os_event_set(lock_sys->timeout_event);
 
 	lock_wait_mutex_exit();
 	trx_mutex_exit(trx);
@@ -282,6 +274,8 @@ lock_wait_suspend_thread(
 	case RW_S_LATCH:
 		/* Release foreign key check latch */
 		row_mysql_unfreeze_data_dictionary(trx);
+
+		DEBUG_SYNC_C("lock_wait_release_s_latch_before_sleep");
 		break;
 	default:
 		/* There should never be a lock wait when the
@@ -341,14 +335,16 @@ lock_wait_suspend_thread(
 
 		diff_time = (ulint) (finish_time - start_time);
 
-		srv_n_lock_wait_current_count--;
-		srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
+		srv_stats.n_lock_wait_current_count.dec();
+		srv_stats.n_lock_wait_time.add(diff_time);
 
-		if (diff_time > srv_n_lock_max_wait_time &&
-		    /* only update the variable if we successfully
-		    retrieved the start and finish times. See Bug#36819. */
-		    start_time != -1 && finish_time != -1) {
-			srv_n_lock_max_wait_time = diff_time;
+		/* Only update the variable if we successfully
+		retrieved the start and finish times. See Bug#36819. */
+		if (diff_time > lock_sys->n_lock_max_wait_time
+		    && start_time != -1
+		    && finish_time != -1) {
+
+			lock_sys->n_lock_max_wait_time = diff_time;
 		}
 	}
 
@@ -463,11 +459,15 @@ DECLARE_THREAD(lock_wait_timeout_thread)(
 			os_thread_create */
 {
 	ib_int64_t	sig_count = 0;
+	os_event_t	event = lock_sys->timeout_event;
+
+	ut_ad(!srv_read_only_mode);
 
 #ifdef UNIV_PFS_THREAD
 	pfs_register_thread(srv_lock_timeout_thread_key);
-#endif
-	srv_lock_timeout_active = TRUE;
+#endif /* UNIV_PFS_THREAD */
+
+	lock_sys->timeout_thread_active = true;
 
 	do {
 		srv_slot_t*	slot;
@@ -475,7 +475,8 @@ DECLARE_THREAD(lock_wait_timeout_thread)(
 		/* When someone is waiting for a lock, we wake up every second
 		and check if a timeout has passed for a lock wait */
 
-		os_event_wait_time_low(srv_timeout_event, 1000000, sig_count);
+		os_event_wait_time_low(event, 1000000, sig_count);
+		sig_count = os_event_reset(event);
 
 		if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
 			break;
@@ -500,13 +501,13 @@ DECLARE_THREAD(lock_wait_timeout_thread)(
 			}
 		}
 
-		sig_count = os_event_reset(srv_timeout_event);
+		sig_count = os_event_reset(event);
 
 		lock_wait_mutex_exit();
 
 	} while (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP);
 
-	srv_lock_timeout_active = FALSE;
+	lock_sys->timeout_thread_active = false;
 
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index 5e4a9dcf515..b6909f4771a 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2009, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -37,7 +37,6 @@ Created 12/9/1995 Heikki Tuuri
 #endif
 
 #ifndef UNIV_HOTBACKUP
-#include "ha_prototypes.h"
 #include "mem0mem.h"
 #include "buf0buf.h"
 #include "buf0flu.h"
@@ -49,6 +48,7 @@ Created 12/9/1995 Heikki Tuuri
 #include "srv0start.h"
 #include "trx0sys.h"
 #include "trx0trx.h"
+#include "ha_prototypes.h"
 #include "srv0mon.h"
 
 /*
@@ -223,7 +223,7 @@ loop:
 
 		log_buffer_flush_to_disk();
 
-		srv_log_waits++;
+		srv_stats.log_waits.inc();
 
 		ut_ad(++count < 50);
 
@@ -328,7 +328,7 @@ part_loop:
 		goto part_loop;
 	}
 
-	srv_log_write_requests++;
+	srv_stats.log_write_requests.inc();
 }
 
 /************************************************************//**
@@ -748,9 +748,6 @@ log_init(void)
 
 	log_sys->lsn = LOG_START_LSN;
 
-	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
-		    log_sys->lsn - log_sys->last_checkpoint_lsn);
-
 	ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
 	ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
 
@@ -784,11 +781,11 @@ log_init(void)
 
 	log_sys->n_pending_writes = 0;
 
-	log_sys->no_flush_event = os_event_create(NULL);
+	log_sys->no_flush_event = os_event_create();
 
 	os_event_set(log_sys->no_flush_event);
 
-	log_sys->one_flushed_event = os_event_create(NULL);
+	log_sys->one_flushed_event = os_event_create();
 
 	os_event_set(log_sys->one_flushed_event);
 
@@ -796,7 +793,6 @@ log_init(void)
 
 	log_sys->next_checkpoint_no = 0;
 	log_sys->last_checkpoint_lsn = log_sys->lsn;
-	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, 0);
 	log_sys->n_pending_checkpoint_writes = 0;
 
 
@@ -832,7 +828,7 @@ log_init(void)
 
 	/* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */
 
-	log_sys->archiving_on = os_event_create(NULL);
+	log_sys->archiving_on = os_event_create();
 #endif /* UNIV_LOG_ARCHIVE */
 
 	/*----------------------------*/
@@ -1163,7 +1159,7 @@ log_group_file_header_flush(
 
 		MONITOR_INC(MONITOR_LOG_IO);
 
-		srv_os_log_pending_writes++;
+		srv_stats.os_log_pending_writes.inc();
 
 		fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
 		       (ulint) (dest_offset / UNIV_PAGE_SIZE),
@@ -1171,7 +1167,7 @@ log_group_file_header_flush(
 		       OS_FILE_LOG_BLOCK_SIZE,
 		       buf, group);
 
-		srv_os_log_pending_writes--;
+		srv_stats.os_log_pending_writes.dec();
 	}
 }
 
@@ -1238,8 +1234,9 @@ loop:
 		log_group_file_header_flush(group, (ulint)
 					    (next_offset / group->file_size),
 					    start_lsn);
-		srv_os_log_written += OS_FILE_LOG_BLOCK_SIZE;
-		srv_log_writes++;
+		srv_stats.os_log_written.add(OS_FILE_LOG_BLOCK_SIZE);
+
+		srv_stats.log_writes.inc();
 	}
 
 	if ((next_offset % group->file_size) + len > group->file_size) {
@@ -1289,7 +1286,7 @@ loop:
 
 		MONITOR_INC(MONITOR_LOG_IO);
 
-		srv_os_log_pending_writes++;
+		srv_stats.os_log_pending_writes.inc();
 
 		ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
 
@@ -1298,10 +1295,10 @@ loop:
 		       (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
 		       group);
 
-		srv_os_log_pending_writes--;
+		srv_stats.os_log_pending_writes.dec();
 
-		srv_os_log_written += write_len;
-		srv_log_writes++;
+		srv_stats.os_log_written.add(write_len);
+		srv_stats.log_writes.inc();
 	}
 
 	if (write_len < len) {
@@ -1345,6 +1342,8 @@ log_write_up_to(
 	ib_uint64_t	write_lsn;
 	ib_uint64_t	flush_lsn;
 
+	ut_ad(!srv_read_only_mode);
+
 	if (recv_no_ibuf_operations) {
 		/* Recovery is running and no operations on the log files are
 		allowed yet (the variable name .._no_ibuf_.. is misleading) */
@@ -1560,6 +1559,7 @@ log_buffer_flush_to_disk(void)
 {
 	lsn_t	lsn;
 
+	ut_ad(!srv_read_only_mode);
 	mutex_enter(&(log_sys->mutex));
 
 	lsn = log_sys->lsn;
@@ -1626,15 +1626,16 @@ log_flush_margin(void)
 Advances the smallest lsn for which there are unflushed dirty blocks in the
 buffer pool. NOTE: this function may only be called if the calling thread owns
 no synchronization objects!
-@return FALSE if there was a flush batch of the same type running,
+@return false if there was a flush batch of the same type running,
 which means that we could not start this flush batch */
 static
-ibool
+bool
 log_preflush_pool_modified_pages(
 /*=============================*/
 	lsn_t	new_oldest)	/*!< in: try to advance oldest_modified_lsn
 				at least to this lsn */
 {
+	bool	success;
 	ulint	n_pages;
 
 	if (recv_recovery_on) {
@@ -1650,13 +1651,12 @@ log_preflush_pool_modified_pages(
 		recv_apply_hashed_log_recs(TRUE);
 	}
 
-	n_pages = buf_flush_list(ULINT_MAX, new_oldest);
+	success = buf_flush_list(ULINT_MAX, new_oldest, &n_pages);
 
 	buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 
-	if (n_pages == ULINT_UNDEFINED) {
-
-		return(FALSE);
+	if (!success) {
+		MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
 	}
 
 	MONITOR_INC_VALUE_CUMULATIVE(
@@ -1665,7 +1665,7 @@ log_preflush_pool_modified_pages(
 		MONITOR_FLUSH_SYNC_PAGES,
 		n_pages);
 
-	return(TRUE);
+	return(success);
 }
 
 /******************************************************//**
@@ -1765,6 +1765,7 @@ log_group_checkpoint(
 	byte*		buf;
 	ulint		i;
 
+	ut_ad(!srv_read_only_mode);
 	ut_ad(mutex_own(&(log_sys->mutex)));
 #if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
 # error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE"
@@ -1952,12 +1953,13 @@ log_groups_write_checkpoint_info(void)
 
 	ut_ad(mutex_own(&(log_sys->mutex)));
 
-	group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
-	while (group) {
-		log_group_checkpoint(group);
+	if (!srv_read_only_mode) {
+		for (group = UT_LIST_GET_FIRST(log_sys->log_groups);
+		     group;
+		     group = UT_LIST_GET_NEXT(log_groups, group)) {
 
-		group = UT_LIST_GET_NEXT(log_groups, group);
+			log_group_checkpoint(group);
+		}
 	}
 }
 
@@ -1982,6 +1984,8 @@ log_checkpoint(
 {
 	lsn_t	oldest_lsn;
 
+	ut_ad(!srv_read_only_mode);
+
 	if (recv_recovery_is_on()) {
 		recv_apply_hashed_log_recs(TRUE);
 	}
@@ -2088,38 +2092,6 @@ log_make_checkpoint_at(
 }
 
 /****************************************************************//**
-Checks if an asynchronous flushing of dirty pages is required in the
-background. This function is only called from the page cleaner thread.
-@return lsn to which the flushing should happen or LSN_MAX
-if flushing is not required */
-UNIV_INTERN
-lsn_t
-log_async_flush_lsn(void)
-/*=====================*/
-{
-	lsn_t	age;
-	lsn_t	oldest_lsn;
-	lsn_t	new_lsn = LSN_MAX;
-
-	mutex_enter(&log_sys->mutex);
-
-	oldest_lsn = log_buf_pool_get_oldest_modification();
-
-	ut_a(log_sys->lsn >= oldest_lsn);
-	age = log_sys->lsn - oldest_lsn;
-
-	if (age > log_sys->max_modified_age_async) {
-		/* An asynchronous preflush is required */
-		ut_a(log_sys->lsn >= log_sys->max_modified_age_async);
-		new_lsn = log_sys->lsn - log_sys->max_modified_age_async;
-	}
-
-	mutex_exit(&log_sys->mutex);
-
-	return(new_lsn);
-}
-
-/****************************************************************//**
 Tries to establish a big enough margin of free space in the log groups, such
 that a new log entry can be catenated without an immediate need for a
 checkpoint. NOTE: this function may only be called if the calling thread
@@ -2136,7 +2108,7 @@ log_checkpoint_margin(void)
 	lsn_t		oldest_lsn;
 	ibool		checkpoint_sync;
 	ibool		do_checkpoint;
-	ibool		success;
+	bool		success;
 loop:
 	checkpoint_sync = FALSE;
 	do_checkpoint = FALSE;
@@ -3131,10 +3103,8 @@ logs_empty_and_mark_files_at_shutdown(void)
 	const char*		thread_name;
 	ibool			server_busy;
 
-	if (srv_print_verbose_log) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Starting shutdown...\n");
-	}
+	ib_logf(IB_LOG_LEVEL_INFO, "Starting shutdown...");
+
 	/* Wait until the master thread and all other operations are idle: our
 	algorithm only works if the server is idle at shutdown */
 
@@ -3155,9 +3125,8 @@ loop:
 		threads check will be done later. */
 
 		if (srv_print_verbose_log && count > 600) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: Waiting for %s to exit\n",
-				thread_name);
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Waiting for %s to exit", thread_name);
 			count = 0;
 		}
 
@@ -3174,9 +3143,8 @@ loop:
 	if (total_trx > 0) {
 
 		if (srv_print_verbose_log && count > 600) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: Waiting for %lu "
-				"active transactions to finish\n",
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Waiting for %lu active transactions to finish",
 				(ulong) total_trx);
 
 			count = 0;
@@ -3221,9 +3189,9 @@ loop:
 				break;
 			}
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: Waiting for %s "
-				"to be suspended\n", thread_type);
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Waiting for %s to be suspended",
+				thread_type);
 			count = 0;
 		}
 
@@ -3239,10 +3207,9 @@ loop:
 		++count;
 		os_thread_sleep(100000);
 		if (srv_print_verbose_log && count > 600) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Waiting for page_cleaner to "
-				"finish flushing of buffer pool\n");
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Waiting for page_cleaner to "
+				"finish flushing of buffer pool");
 			count = 0;
 		}
 	}
@@ -3257,10 +3224,9 @@ loop:
 
 	if (server_busy) {
 		if (srv_print_verbose_log && count > 600) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Pending checkpoint_writes: %lu\n"
-				" InnoDB: Pending log flush writes: %lu\n",
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Pending checkpoint_writes: %lu. "
+				"Pending log flush writes: %lu",
 				(ulong) log_sys->n_pending_checkpoint_writes,
 				(ulong) log_sys->n_pending_writes);
 			count = 0;
@@ -3272,9 +3238,8 @@ loop:
 
 	if (pending_io) {
 		if (srv_print_verbose_log && count > 600) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: Waiting for %lu buffer page "
-				"I/Os to complete\n",
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Waiting for %lu buffer page I/Os to complete",
 				(ulong) pending_io);
 			count = 0;
 		}
@@ -3286,41 +3251,50 @@ loop:
 	log_archive_all();
 #endif /* UNIV_LOG_ARCHIVE */
 	if (srv_fast_shutdown == 2) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: MySQL has requested a very fast shutdown"
-			" without flushing "
-			"the InnoDB buffer pool to data files."
-			" At the next mysqld startup "
-			"InnoDB will do a crash recovery!\n");
-
-		/* In this fastest shutdown we do not flush the buffer pool:
-		it is essentially a 'crash' of the InnoDB server. Make sure
-		that the log is all flushed to disk, so that we can recover
-		all committed transactions in a crash recovery. We must not
-		write the lsn stamps to the data files, since at a startup
-		InnoDB deduces from the stamps if the previous shutdown was
-		clean. */
-
-		log_buffer_flush_to_disk();
-
-		/* Check that the background threads stay suspended */
-		thread_name = srv_any_background_threads_are_active();
-		if (thread_name != NULL) {
-			fprintf(stderr,
-				"InnoDB: Warning: background thread %s"
-				" woke up during shutdown\n", thread_name);
-			goto loop;
+		if (!srv_read_only_mode) {
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"MySQL has requested a very fast shutdown "
+				"without flushing the InnoDB buffer pool to "
+				"data files. At the next mysqld startup "
+				"InnoDB will do a crash recovery!");
+
+			/* In this fastest shutdown we do not flush the
+			buffer pool:
+
+			it is essentially a 'crash' of the InnoDB server.
+			Make sure that the log is all flushed to disk, so
+			that we can recover all committed transactions in
+			a crash recovery. We must not write the lsn stamps
+			to the data files, since at a startup InnoDB deduces
+			from the stamps if the previous shutdown was clean. */
+
+			log_buffer_flush_to_disk();
+
+			/* Check that the background threads stay suspended */
+			thread_name = srv_any_background_threads_are_active();
+
+			if (thread_name != NULL) {
+				ib_logf(IB_LOG_LEVEL_WARN,
+					"Background thread %s woke up "
+					"during shutdown", thread_name);
+				goto loop;
+			}
 		}
 
 		srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
+
 		fil_close_all_files();
+
 		thread_name = srv_any_background_threads_are_active();
+
 		ut_a(!thread_name);
+
 		return;
 	}
 
-	log_make_checkpoint_at(LSN_MAX, TRUE);
+	if (!srv_read_only_mode) {
+		log_make_checkpoint_at(LSN_MAX, TRUE);
+	}
 
 	mutex_enter(&log_sys->mutex);
 
@@ -3356,15 +3330,17 @@ loop:
 	/* Check that the background threads stay suspended */
 	thread_name = srv_any_background_threads_are_active();
 	if (thread_name != NULL) {
-		fprintf(stderr,
-			"InnoDB: Warning: background thread %s"
-			" woke up during shutdown\n", thread_name);
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Background thread %s woke up during shutdown",
+			thread_name);
 
 		goto loop;
 	}
 
-	fil_flush_file_spaces(FIL_TABLESPACE);
-	fil_flush_file_spaces(FIL_LOG);
+	if (!srv_read_only_mode) {
+		fil_flush_file_spaces(FIL_TABLESPACE);
+		fil_flush_file_spaces(FIL_LOG);
+	}
 
 	/* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
 	pool: therefore it is essential that the buffer pool has been
@@ -3374,9 +3350,8 @@ loop:
 	if (!buf_all_freed()) {
 
 		if (srv_print_verbose_log && count > 600) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: Waiting for dirty buffer "
-				"pages to be flushed\n");
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Waiting for dirty buffer pages to be flushed");
 			count = 0;
 		}
 
@@ -3386,31 +3361,38 @@ loop:
 	srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
 
 	/* Make some checks that the server really is quiet */
-	ut_a(srv_get_active_thread_type() == SRV_NONE);
+	srv_thread_type	type = srv_get_active_thread_type();
+	ut_a(type == SRV_NONE);
+
+	bool	freed = buf_all_freed();
+	ut_a(freed);
 
-	ut_a(buf_all_freed());
 	ut_a(lsn == log_sys->lsn);
 
 	if (lsn < srv_start_lsn) {
-		fprintf(stderr,
-			"InnoDB: Error: log sequence number"
-			" at shutdown " LSN_PF "\n"
-			"InnoDB: is lower than at startup " LSN_PF "!\n",
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Log sequence number at shutdown " LSN_PF " "
+			"is lower than at startup " LSN_PF "!",
 			lsn, srv_start_lsn);
 	}
 
 	srv_shutdown_lsn = lsn;
 
-	fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
+	if (!srv_read_only_mode) {
+		fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
 
-	fil_flush_file_spaces(FIL_TABLESPACE);
+		fil_flush_file_spaces(FIL_TABLESPACE);
+	}
 
 	fil_close_all_files();
 
 	/* Make some checks that the server really is quiet */
-	ut_a(srv_get_active_thread_type() == SRV_NONE);
+	type = srv_get_active_thread_type();
+	ut_a(type == SRV_NONE);
+
+	freed = buf_all_freed();
+	ut_a(freed);
 
-	ut_a(buf_all_freed());
 	ut_a(lsn == log_sys->lsn);
 }
 
@@ -3544,7 +3526,7 @@ log_refresh_stats(void)
 	log_sys->last_printout_time = time(NULL);
 }
 
-/**********************************************************************
+/********************************************************//**
 Closes a log group. */
 static
 void
@@ -3574,12 +3556,12 @@ log_group_close(
 	mem_free(group);
 }
 
-/**********************************************************
-Shutdown the log system but do not release all the memory. */
+/********************************************************//**
+Closes all log groups. */
 UNIV_INTERN
 void
-log_shutdown(void)
-/*==============*/
+log_group_close_all(void)
+/*=====================*/
 {
 	log_group_t*	group;
 
@@ -3593,6 +3575,16 @@ log_shutdown(void)
 
 		log_group_close(prev_group);
 	}
+}
+
+/********************************************************//**
+Shutdown the log system but do not release all the memory. */
+UNIV_INTERN
+void
+log_shutdown(void)
+/*==============*/
+{
+	log_group_close_all();
 
 	mem_free(log_sys->buf_ptr);
 	log_sys->buf_ptr = NULL;
@@ -3610,7 +3602,7 @@ log_shutdown(void)
 
 #ifdef UNIV_LOG_ARCHIVE
 	rw_lock_free(&log_sys->archive_lock);
-	os_event_create(log_sys->archiving_on);
+	os_event_create();
 #endif /* UNIV_LOG_ARCHIVE */
 
 #ifdef UNIV_LOG_DEBUG
@@ -3620,7 +3612,7 @@ log_shutdown(void)
 	recv_sys_close();
 }
 
-/**********************************************************
+/********************************************************//**
 Free the log system data structures. */
 UNIV_INTERN
 void
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index f914fc2676c..8cefa9e4b70 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -42,8 +43,6 @@ Created 9/20/1997 Heikki Tuuri
 #include "trx0undo.h"
 #include "trx0rec.h"
 #include "fil0fil.h"
-#include "buf0dblwr.h"
-#include "srv0mon.h"
 #ifndef UNIV_HOTBACKUP
 # include "buf0rea.h"
 # include "srv0srv.h"
@@ -158,6 +157,20 @@ UNIV_INTERN mysql_pfs_key_t	trx_rollback_clean_thread_key;
 UNIV_INTERN mysql_pfs_key_t	recv_sys_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
 
+#ifndef UNIV_HOTBACKUP
+# ifdef UNIV_PFS_THREAD
+UNIV_INTERN mysql_pfs_key_t	recv_writer_thread_key;
+# endif /* UNIV_PFS_THREAD */
+
+# ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t	recv_writer_mutex_key;
+# endif /* UNIV_PFS_MUTEX */
+
+/** Flag indicating if recv_writer thread is active. */
+UNIV_INTERN bool		recv_writer_thread_active = false;
+UNIV_INTERN os_thread_t		recv_writer_thread_handle = 0;
+#endif /* !UNIV_HOTBACKUP */
+
 /* prototypes */
 
 #ifndef UNIV_HOTBACKUP
@@ -186,6 +199,11 @@ recv_sys_create(void)
 
 	mutex_create(recv_sys_mutex_key, &recv_sys->mutex, SYNC_RECV);
 
+#ifndef UNIV_HOTBACKUP
+	mutex_create(recv_writer_mutex_key, &recv_sys->writer_mutex,
+		     SYNC_LEVEL_VARYING);
+#endif /* !UNIV_HOTBACKUP */
+
 	recv_sys->heap = NULL;
 	recv_sys->addr_hash = NULL;
 }
@@ -214,6 +232,11 @@ recv_sys_close(void)
 			mem_free(recv_sys->last_block_buf_start);
 		}
 
+#ifndef UNIV_HOTBACKUP
+		ut_ad(!recv_writer_thread_active);
+		mutex_free(&recv_sys->writer_mutex);
+#endif /* !UNIV_HOTBACKUP */
+
 		mutex_free(&recv_sys->mutex);
 
 		mem_free(recv_sys);
@@ -290,6 +313,58 @@ recv_sys_var_init(void)
 
 	recv_max_page_lsn = 0;
 }
+
+/******************************************************************//**
+recv_writer thread tasked with flushing dirty pages from the buffer
+pools.
+@return a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(recv_writer_thread)(
+/*===============================*/
+	void*	arg __attribute__((unused)))
+			/*!< in: a dummy parameter required by
+			os_thread_create */
+{
+	ut_ad(!srv_read_only_mode);
+
+#ifdef UNIV_PFS_THREAD
+	pfs_register_thread(recv_writer_thread_key);
+#endif /* UNIV_PFS_THREAD */
+
+#ifdef UNIV_DEBUG_THREAD_CREATION
+	fprintf(stderr, "InnoDB: recv_writer thread running, id %lu\n",
+		os_thread_pf(os_thread_get_curr_id()));
+#endif /* UNIV_DEBUG_THREAD_CREATION */
+
+	recv_writer_thread_active = true;
+
+	while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+
+		os_thread_sleep(100000);
+
+		mutex_enter(&recv_sys->writer_mutex);
+
+		if (!recv_recovery_on) {
+			mutex_exit(&recv_sys->writer_mutex);
+			break;
+		}
+
+		/* Flush pages from end of LRU if required */
+		buf_flush_LRU_tail();
+
+		mutex_exit(&recv_sys->writer_mutex);
+	}
+
+	recv_writer_thread_active = false;
+
+	/* We count the number of threads in os_thread_exit().
+	A created thread should always use that to exit and not
+	use return() to exit. */
+	os_thread_exit(NULL);
+
+	OS_THREAD_DUMMY_RETURN;
+}
 #endif /* !UNIV_HOTBACKUP */
 
 /************************************************************
@@ -310,9 +385,7 @@ recv_sys_init(
 	flush_list during recovery process.
 	As this initialization is done while holding the buffer pool
 	mutex we perform it before acquiring recv_sys->mutex. */
-#ifndef UNIV_HOTBACKUP
 	buf_flush_init_flush_rbt();
-#endif /* !UNIV_HOTBACKUP */
 
 	mutex_enter(&(recv_sys->mutex));
 
@@ -406,6 +479,7 @@ recv_sys_debug_free(void)
 }
 # endif /* UNIV_LOG_DEBUG */
 
+# ifdef UNIV_LOG_ARCHIVE
 /********************************************************//**
 Truncates possible corrupted or extra records from a log group. */
 static
@@ -427,7 +501,6 @@ recv_truncate_group(
 	lsn_t		finish_lsn1;
 	lsn_t		finish_lsn2;
 	lsn_t		finish_lsn;
-	ulint		i;
 
 	if (archived_lsn == LSN_MAX) {
 		/* Checkpoint was taken in the NOARCHIVELOG mode */
@@ -455,11 +528,7 @@ recv_truncate_group(
 
 	ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
 
-	/* Write the log buffer full of zeros */
-	for (i = 0; i < RECV_SCAN_SIZE; i++) {
-
-		*(log_sys->buf + i) = '\0';
-	}
+	memset(log_sys->buf, 0, RECV_SCAN_SIZE);
 
 	start_lsn = ut_uint64_align_down(recovered_lsn,
 					 OS_FILE_LOG_BLOCK_SIZE);
@@ -499,11 +568,7 @@ recv_truncate_group(
 			return;
 		}
 
-		/* Write the log buffer full of zeros */
-		for (i = 0; i < RECV_SCAN_SIZE; i++) {
-
-			*(log_sys->buf + i) = '\0';
-		}
+		memset(log_sys->buf, 0, RECV_SCAN_SIZE);
 
 		start_lsn = end_lsn;
 	}
@@ -560,6 +625,7 @@ recv_copy_group(
 		start_lsn = end_lsn;
 	}
 }
+# endif /* UNIV_LOG_ARCHIVE */
 
 /********************************************************//**
 Copies a log segment from the most up-to-date log group to the other log
@@ -570,10 +636,12 @@ static
 void
 recv_synchronize_groups(
 /*====================*/
-	log_group_t*	up_to_date_group)	/*!< in: the most up-to-date
+#ifdef UNIV_LOG_ARCHIVE
+	log_group_t*	up_to_date_group	/*!< in: the most up-to-date
 						log group */
+#endif
+	)
 {
-	log_group_t*	group;
 	lsn_t		start_lsn;
 	lsn_t		end_lsn;
 	lsn_t		recovered_lsn;
@@ -590,11 +658,17 @@ recv_synchronize_groups(
 	ut_a(start_lsn != end_lsn);
 
 	log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
-			       up_to_date_group, start_lsn, end_lsn);
-
-	group = UT_LIST_GET_FIRST(log_sys->log_groups);
+#ifdef UNIV_LOG_ARCHIVE
+			       up_to_date_group,
+#else /* UNIV_LOG_ARCHIVE */
+			       UT_LIST_GET_FIRST(log_sys->log_groups),
+#endif /* UNIV_LOG_ARCHIVE */
+			       start_lsn, end_lsn);
 
-	while (group) {
+	for (log_group_t* group = UT_LIST_GET_FIRST(log_sys->log_groups);
+	     group;
+	     group = UT_LIST_GET_NEXT(log_groups, group)) {
+#ifdef UNIV_LOG_ARCHIVE
 		if (group != up_to_date_group) {
 
 			/* Copy log data if needed */
@@ -602,13 +676,11 @@ recv_synchronize_groups(
 			recv_copy_group(group, up_to_date_group,
 					recovered_lsn);
 		}
-
+#endif /* UNIV_LOG_ARCHIVE */
 		/* Update the fields in the group struct to correspond to
 		recovered_lsn */
 
 		log_group_set_fields(group, recovered_lsn);
-
-		group = UT_LIST_GET_NEXT(log_groups, group);
 	}
 
 	/* Copy the checkpoint info to the groups; remember that we have
@@ -661,8 +733,8 @@ recv_check_cp_is_consistent(
 /********************************************************//**
 Looks for the maximum consistent checkpoint from the log groups.
 @return	error code or DB_SUCCESS */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 recv_find_max_checkpoint(
 /*=====================*/
 	log_group_t**	max_group,	/*!< out: max group */
@@ -958,8 +1030,11 @@ recv_parse_or_apply_log_rec_body(
 				not NULL, then the log record is
 				applied to the page, and the log
 				record should be complete then */
-	mtr_t*		mtr)	/*!< in: mtr or NULL; should be non-NULL
+	mtr_t*		mtr,	/*!< in: mtr or NULL; should be non-NULL
 				if and only if block is non-NULL */
+	ulint		space_id)
+				/*!< in: tablespace id obtained by
+				parsing initial log record */
 {
 	dict_index_t*	index	= NULL;
 	page_t*		page;
@@ -1151,18 +1226,22 @@ recv_parse_or_apply_log_rec_body(
 				ptr, end_ptr, block, index, mtr);
 		}
 		break;
-	case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
+	case MLOG_PAGE_REORGANIZE:
+	case MLOG_COMP_PAGE_REORGANIZE:
+	case MLOG_ZIP_PAGE_REORGANIZE:
 		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
-				     type == MLOG_COMP_PAGE_REORGANIZE,
+				     type != MLOG_PAGE_REORGANIZE,
 				     &index))) {
 			ut_a(!page
 			     || (ibool)!!page_is_comp(page)
 			     == dict_table_is_comp(index->table));
-			ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
-							block, mtr);
+			ptr = btr_parse_page_reorganize(
+				ptr, end_ptr, index,
+				type == MLOG_ZIP_PAGE_REORGANIZE,
+				block, mtr);
 		}
 		break;
 	case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
@@ -1231,8 +1310,11 @@ recv_parse_or_apply_log_rec_body(
 		ut_ad(!page || page_type != FIL_PAGE_TYPE_ALLOCATED);
 		ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
 		break;
-	case MLOG_FILE_CREATE:
 	case MLOG_FILE_RENAME:
+		ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type,
+						 space_id, 0);
+		break;
+	case MLOG_FILE_CREATE:
 	case MLOG_FILE_DELETE:
 	case MLOG_FILE_CREATE2:
 		ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0, 0);
@@ -1257,6 +1339,16 @@ recv_parse_or_apply_log_rec_body(
 		ptr = page_zip_parse_compress(ptr, end_ptr,
 					      page, page_zip);
 		break;
+	case MLOG_ZIP_PAGE_COMPRESS_NO_DATA:
+		if (NULL != (ptr = mlog_parse_index(
+				ptr, end_ptr, TRUE, &index))) {
+
+			ut_a(!page || ((ibool)!!page_is_comp(page)
+				== dict_table_is_comp(index->table)));
+			ptr = page_zip_parse_compress_no_data(
+				ptr, end_ptr, page, page_zip, index);
+		}
+		break;
 	default:
 		ptr = NULL;
 		recv_sys->found_corrupt_log = TRUE;
@@ -1611,7 +1703,8 @@ recv_recover_page_func(
 
 			recv_parse_or_apply_log_rec_body(recv->type, buf,
 							 buf + recv->len,
-							 block, &mtr);
+							 block, &mtr,
+							 recv_addr->space);
 
 			end_lsn = recv->start_lsn + recv->len;
 			mach_write_to_8(FIL_PAGE_LSN + page, end_lsn);
@@ -1740,7 +1833,6 @@ recv_apply_hashed_log_recs(
 {
 	recv_addr_t* recv_addr;
 	ulint	i;
-	ulint	n_pages;
 	ibool	has_printed	= FALSE;
 	mtr_t	mtr;
 loop:
@@ -1778,11 +1870,11 @@ loop:
 
 			if (recv_addr->state == RECV_NOT_PROCESSED) {
 				if (!has_printed) {
-					ut_print_timestamp(stderr);
-					fputs("  InnoDB: Starting an"
-					      " apply batch of log records"
-					      " to the database...\n"
-					      "InnoDB: Progress in percents: ",
+					ib_logf(IB_LOG_LEVEL_INFO,
+						"Starting an apply batch"
+						" of log records"
+						" to the database...");
+					fputs("InnoDB: Progress in percent: ",
 					      stderr);
 					has_printed = TRUE;
 				}
@@ -1839,6 +1931,8 @@ loop:
 	}
 
 	if (!allow_ibuf) {
+		bool	success;
+
 		/* Flush all the file pages to disk and invalidate them in
 		the buffer pool */
 
@@ -1846,13 +1940,24 @@ loop:
 		mutex_exit(&(recv_sys->mutex));
 		mutex_exit(&(log_sys->mutex));
 
-		n_pages = buf_flush_list(ULINT_MAX, LSN_MAX);
-		ut_a(n_pages != ULINT_UNDEFINED);
+		/* Stop the recv_writer thread from issuing any LRU
+		flush batches. */
+		mutex_enter(&recv_sys->writer_mutex);
+
+		/* Wait for any currently run batch to end. */
+		buf_flush_wait_LRU_batch_end();
+
+		success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
+
+		ut_a(success);
 
 		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 
 		buf_pool_invalidate();
 
+		/* Allow batches from recv_writer thread. */
+		mutex_exit(&recv_sys->writer_mutex);
+
 		mutex_enter(&(log_sys->mutex));
 		mutex_enter(&(recv_sys->mutex));
 		ut_d(recv_no_log_write = FALSE);
@@ -1892,9 +1997,10 @@ recv_apply_log_recs_for_backup(void)
 
 	block = back_block1;
 
-	fputs("InnoDB: Starting an apply batch of log records"
-	      " to the database...\n"
-	      "InnoDB: Progress in percents: ", stderr);
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Starting an apply batch of log records to the database...");
+
+	fputs("InnoDB: Progress in percent: ", stderr);
 
 	n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
 
@@ -2079,7 +2185,7 @@ recv_parse_log_rec(
 #endif /* UNIV_LOG_LSN_DEBUG */
 
 	new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
-						   NULL, NULL);
+						   NULL, NULL, *space);
 	if (UNIV_UNLIKELY(new_ptr == NULL)) {
 
 		return(0);
@@ -2686,11 +2792,21 @@ recv_scan_log_recs(
 			if (recv_log_scan_is_startup_type
 			    && !recv_needed_recovery) {
 
-				fprintf(stderr,
-					"InnoDB: Log scan progressed"
-					" past the checkpoint lsn " LSN_PF "\n",
-					recv_sys->scanned_lsn);
-				recv_init_crash_recovery();
+				if (!srv_read_only_mode) {
+					ib_logf(IB_LOG_LEVEL_INFO,
+						"Log scan progressed past the "
+						"checkpoint lsn " LSN_PF "",
+						recv_sys->scanned_lsn);
+
+					recv_init_crash_recovery();
+				} else {
+
+					ib_logf(IB_LOG_LEVEL_WARN,
+						"Recovery skipped, "
+						"--innodb-read-only set!");
+
+					return(TRUE);
+				}
 			}
 #endif /* !UNIV_HOTBACKUP */
 
@@ -2838,20 +2954,15 @@ void
 recv_init_crash_recovery(void)
 /*==========================*/
 {
+	ut_ad(!srv_read_only_mode);
 	ut_a(!recv_needed_recovery);
 
 	recv_needed_recovery = TRUE;
 
-	ut_print_timestamp(stderr);
-
-	fprintf(stderr,
-		"  InnoDB: Database was not"
-		" shut down normally!\n"
-		"InnoDB: Starting crash recovery.\n");
-
-	fprintf(stderr,
-		"InnoDB: Reading tablespace information"
-		" from the .ibd files...\n");
+	ib_logf(IB_LOG_LEVEL_INFO, "Database was not shutdown normally!");
+	ib_logf(IB_LOG_LEVEL_INFO, "Starting crash recovery.");
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Reading tablespace information from the .ibd files...");
 
 	fil_load_single_table_tablespaces();
 
@@ -2862,11 +2973,12 @@ recv_init_crash_recovery(void)
 
 	if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
 
-		fprintf(stderr,
-			"InnoDB: Restoring possible"
-			" half-written data pages from"
-			" the doublewrite\n"
-			"InnoDB: buffer...\n");
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"Restoring possible half-written data pages ");
+
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"from the doublewrite buffer...");
+
 		buf_dblwr_init_or_restore_pages(TRUE);
 	}
 }
@@ -2878,7 +2990,7 @@ recv_recovery_from_checkpoint_finish should be called later to complete
 the recovery and free the resources used in it.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 recv_recovery_from_checkpoint_start_func(
 /*=====================================*/
 #ifdef UNIV_LOG_ARCHIVE
@@ -2890,19 +3002,18 @@ recv_recovery_from_checkpoint_start_func(
 {
 	log_group_t*	group;
 	log_group_t*	max_cp_group;
-	log_group_t*	up_to_date_group;
 	ulint		max_cp_field;
 	lsn_t		checkpoint_lsn;
 	ib_uint64_t	checkpoint_no;
-	lsn_t		old_scanned_lsn;
 	lsn_t		group_scanned_lsn = 0;
 	lsn_t		contiguous_lsn;
 #ifdef UNIV_LOG_ARCHIVE
+	log_group_t*	up_to_date_group;
 	lsn_t		archived_lsn;
 #endif /* UNIV_LOG_ARCHIVE */
 	byte*		buf;
 	byte		log_hdr_buf[LOG_FILE_HDR_SIZE];
-	ulint		err;
+	dberr_t		err;
 
 #ifdef UNIV_LOG_ARCHIVE
 	ut_ad(type != LOG_CHECKPOINT || limit_lsn == LSN_MAX);
@@ -2923,10 +3034,10 @@ recv_recovery_from_checkpoint_start_func(
 	}
 
 	if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
-		fprintf(stderr,
-			"InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n");
-		fprintf(stderr,
-			"InnoDB: Skipping log redo\n");
+
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"The user has set SRV_FORCE_NO_LOG_REDO on, "
+			"skipping log redo");
 
 		return(DB_SUCCESS);
 	}
@@ -2967,17 +3078,24 @@ recv_recovery_from_checkpoint_start_func(
 
 	if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 			   (byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
+
+		if (srv_read_only_mode) {
+
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Cannot restore from ibbackup, InnoDB running "
+				"in read-only mode!");
+
+			return(DB_ERROR);
+		}
+
 		/* This log file was created by ibbackup --restore: print
 		a note to the user about it */
 
-		fprintf(stderr,
-			"InnoDB: The log file was created by"
-			" ibbackup --apply-log at\n"
-			"InnoDB: %s\n",
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"The log file was created by ibbackup --apply-log "
+			"at %s. The following crash recovery is part of a "
+			"normal restore.",
 			log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
-		fprintf(stderr,
-			"InnoDB: NOTE: the following crash recovery"
-			" is part of a normal restore.\n");
 
 		/* Wipe over the label now */
 
@@ -3017,9 +3135,9 @@ recv_recovery_from_checkpoint_start_func(
 
 	contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
 					      OS_FILE_LOG_BLOCK_SIZE);
+#ifdef UNIV_LOG_ARCHIVE
 	if (TYPE_CHECKPOINT) {
 		up_to_date_group = max_cp_group;
-#ifdef UNIV_LOG_ARCHIVE
 	} else {
 		ulint	capacity;
 
@@ -3055,8 +3173,8 @@ recv_recovery_from_checkpoint_start_func(
 
 		group->scanned_lsn = group_scanned_lsn;
 		up_to_date_group = group;
-#endif /* UNIV_LOG_ARCHIVE */
 	}
+#endif /* UNIV_LOG_ARCHIVE */
 
 	ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
 
@@ -3071,19 +3189,21 @@ recv_recovery_from_checkpoint_start_func(
 	/* Set the flag to publish that we are doing startup scan. */
 	recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
 	while (group) {
-		old_scanned_lsn = recv_sys->scanned_lsn;
+#ifdef UNIV_LOG_ARCHIVE
+		lsn_t	old_scanned_lsn	= recv_sys->scanned_lsn;
+#endif /* UNIV_LOG_ARCHIVE */
 
 		recv_group_scan_log_recs(group, &contiguous_lsn,
 					 &group_scanned_lsn);
 		group->scanned_lsn = group_scanned_lsn;
 
+#ifdef UNIV_LOG_ARCHIVE
 		if (old_scanned_lsn < group_scanned_lsn) {
 			/* We found a more up-to-date group */
 
 			up_to_date_group = group;
 		}
 
-#ifdef UNIV_LOG_ARCHIVE
 		if ((type == LOG_ARCHIVE)
 		    && (group == recv_sys->archive_group)) {
 			group = UT_LIST_GET_NEXT(log_groups, group);
@@ -3104,70 +3224,73 @@ recv_recovery_from_checkpoint_start_func(
 		    || checkpoint_lsn != min_flushed_lsn) {
 
 			if (checkpoint_lsn < max_flushed_lsn) {
-				fprintf(stderr,
-					"InnoDB: #########################"
-					"#################################\n"
-					"InnoDB:                          "
-					"WARNING!\n"
-					"InnoDB: The log sequence number"
-					" in ibdata files is higher\n"
-					"InnoDB: than the log sequence number"
-					" in the ib_logfiles! Are you sure\n"
-					"InnoDB: you are using the right"
-					" ib_logfiles to start up"
-					" the database?\n"
-					"InnoDB: Log sequence number in"
-					" ib_logfiles is " LSN_PF ", log\n"
-					"InnoDB: sequence numbers stamped"
-					" to ibdata file headers are between\n"
-					"InnoDB: " LSN_PF " and " LSN_PF ".\n"
-					"InnoDB: #########################"
-					"#################################\n",
+
+				ib_logf(IB_LOG_LEVEL_WARN,
+					"The log sequence number "
+					"in the ibdata files is higher "
+					"than the log sequence number "
+					"in the ib_logfiles! Are you sure "
+					"you are using the right "
+					"ib_logfiles to start up the database. "
+					"Log sequence number in the "
+					"ib_logfiles is " LSN_PF ", log"
+					"sequence numbers stamped "
+					"to ibdata file headers are between "
+					"" LSN_PF " and " LSN_PF ".",
 					checkpoint_lsn,
 					min_flushed_lsn,
 					max_flushed_lsn);
 			}
 
 			if (!recv_needed_recovery) {
-				fprintf(stderr,
-					"InnoDB: The log sequence number"
-					" in ibdata files does not match\n"
-					"InnoDB: the log sequence number"
-					" in the ib_logfiles!\n");
-				recv_init_crash_recovery();
+				ib_logf(IB_LOG_LEVEL_INFO,
+					"The log sequence numbers "
+					LSN_PF " and " LSN_PF
+					" in ibdata files do not match"
+					" the log sequence number "
+					LSN_PF
+					" in the ib_logfiles!",
+					min_flushed_lsn,
+					max_flushed_lsn,
+					checkpoint_lsn);
+
+				if (!srv_read_only_mode) {
+					recv_init_crash_recovery();
+				} else {
+					ib_logf(IB_LOG_LEVEL_ERROR,
+						"Can't initiate database "
+						"recovery, running "
+						"in read-only-mode.");
+					return(DB_READ_ONLY);
+				}
 			}
 		}
 
-		if (!recv_needed_recovery) {
-			/* Init the doublewrite buffer memory structure */
-			buf_dblwr_init_or_restore_pages(FALSE);
+		if (!srv_read_only_mode) {
+			if (recv_needed_recovery) {
+				/* Spawn the background thread to
+				flush dirty pages from the buffer
+				pools. */
+				recv_writer_thread_handle =
+					os_thread_create(
+					recv_writer_thread, 0, 0);
+			} else {
+				/* Init the doublewrite buffer memory
+				 structure */
+				buf_dblwr_init_or_restore_pages(FALSE);
+			}
 		}
 	}
 
 	/* We currently have only one log group */
-	if (group_scanned_lsn < checkpoint_lsn) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: ERROR: We were only able to scan the log"
-			" up to\n"
-			"InnoDB: " LSN_PF ", but a checkpoint was at "
-			LSN_PF ".\n"
-			"InnoDB: It is possible that"
-			" the database is now corrupt!\n",
-			group_scanned_lsn,
-			checkpoint_lsn);
-	}
-
-	if (group_scanned_lsn < recv_max_page_lsn) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: ERROR: We were only able to scan the log"
-			" up to " LSN_PF "\n"
-			"InnoDB: but a database page a had an lsn " LSN_PF "."
-			" It is possible that the\n"
-			"InnoDB: database is now corrupt!\n",
-			group_scanned_lsn,
-			recv_max_page_lsn);
+	if (group_scanned_lsn < checkpoint_lsn
+	    || group_scanned_lsn < recv_max_page_lsn) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"We scanned the log up to "
+			LSN_PF ". A checkpoint was at " LSN_PF
+			" and the maximum LSN on a database page was " LSN_PF
+			". It is possible that the database is now corrupt!",
+			group_scanned_lsn, checkpoint_lsn, recv_max_page_lsn);
 	}
 
 	if (recv_sys->recovered_lsn < checkpoint_lsn) {
@@ -3179,7 +3302,10 @@ recv_recovery_from_checkpoint_start_func(
 			return(DB_SUCCESS);
 		}
 
-		ut_error;
+		/* No harm in trying to do RO access. */
+		if (!srv_read_only_mode) {
+			ut_error;
+		}
 
 		return(DB_ERROR);
 	}
@@ -3192,9 +3318,11 @@ recv_recovery_from_checkpoint_start_func(
 
 #ifdef UNIV_LOG_ARCHIVE
 	log_sys->archived_lsn = archived_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
 
 	recv_synchronize_groups(up_to_date_group);
+#else /* UNIV_LOG_ARCHIVE */
+	recv_synchronize_groups();
+#endif /* UNIV_LOG_ARCHIVE */
 
 	if (!recv_needed_recovery) {
 		ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
@@ -3225,13 +3353,13 @@ recv_recovery_from_checkpoint_start_func(
 	}
 #endif /* UNIV_LOG_ARCHIVE */
 
-	mutex_enter(&(recv_sys->mutex));
+	mutex_enter(&recv_sys->mutex);
 
 	recv_sys->apply_log_recs = TRUE;
 
-	mutex_exit(&(recv_sys->mutex));
+	mutex_exit(&recv_sys->mutex);
 
-	mutex_exit(&(log_sys->mutex));
+	mutex_exit(&log_sys->mutex);
 
 	recv_lsn_checks_on = TRUE;
 
@@ -3287,10 +3415,40 @@ recv_recovery_from_checkpoint_finish(void)
 			"InnoDB: a backup!\n");
 	}
 
-	/* Free the resources of the recovery system */
+	/* Make sure that the recv_writer thread is done. This is
+	required because it grabs various mutexes and we want to
+	ensure that when we enable sync_order_checks there is no
+	mutex currently held by any thread. */
+	mutex_enter(&recv_sys->writer_mutex);
 
+	/* Free the resources of the recovery system */
 	recv_recovery_on = FALSE;
 
+	/* By acquring the mutex we ensure that the recv_writer thread
+	won't trigger any more LRU batchtes. Now wait for currently
+	in progress batches to finish. */
+	buf_flush_wait_LRU_batch_end();
+
+	mutex_exit(&recv_sys->writer_mutex);
+
+	ulint count = 0;
+	while (recv_writer_thread_active) {
+		++count;
+		os_thread_sleep(100000);
+		if (srv_print_verbose_log && count > 600) {
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Waiting for recv_writer to "
+				"finish flushing of buffer pool");
+			count = 0;
+		}
+	}
+
+#ifdef __WIN__
+	if (recv_writer_thread_handle) {
+		CloseHandle(recv_writer_thread_handle);
+	}
+#endif /* __WIN__ */
+
 #ifndef UNIV_LOG_DEBUG
 	recv_sys_debug_free();
 #endif
@@ -3310,20 +3468,22 @@ void
 recv_recovery_rollback_active(void)
 /*===============================*/
 {
-	int		i;
-
 #ifdef UNIV_SYNC_DEBUG
 	/* Wait for a while so that created threads have time to suspend
 	themselves before we switch the latching order checks on */
 	os_thread_sleep(1000000);
 
+	ut_ad(!recv_writer_thread_active);
+
 	/* Switch latching order checks on in sync0sync.cc */
 	sync_order_checks_on = TRUE;
 #endif
 	/* We can't start any (DDL) transactions if UNDO logging
 	has been disabled, additionally disable ROLLBACK of recovered
 	user transactions. */
-	if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
+	if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
+	    && !srv_read_only_mode) {
+
 		/* Drop partially created indexes. */
 		row_merge_drop_temp_indexes();
 		/* Drop temporary tables. */
@@ -3338,7 +3498,7 @@ recv_recovery_rollback_active(void)
 		/* Rollback the uncommitted transactions which have no user
 		session */
 
-		os_thread_create(trx_rollback_or_clean_all_recovered, &i, NULL);
+		os_thread_create(trx_rollback_or_clean_all_recovered, 0, 0);
 	}
 }
 
@@ -3348,18 +3508,18 @@ UNIV_INTERN
 void
 recv_reset_logs(
 /*============*/
-	lsn_t		lsn,		/*!< in: reset to this lsn
-					rounded up to be divisible by
-					OS_FILE_LOG_BLOCK_SIZE, after
-					which we add
-					LOG_BLOCK_HDR_SIZE */
 #ifdef UNIV_LOG_ARCHIVE
 	ulint		arch_log_no,	/*!< in: next archived log file number */
-#endif /* UNIV_LOG_ARCHIVE */
-	ibool		new_logs_created)/*!< in: TRUE if resetting logs
+	ibool		new_logs_created,/*!< in: TRUE if resetting logs
 					is done at the log creation;
 					FALSE if it is done after
 					archive recovery */
+#endif /* UNIV_LOG_ARCHIVE */
+	lsn_t		lsn)		/*!< in: reset to this lsn
+					rounded up to be divisible by
+					OS_FILE_LOG_BLOCK_SIZE, after
+					which we add
+					LOG_BLOCK_HDR_SIZE */
 {
 	log_group_t*	group;
 
@@ -3375,12 +3535,12 @@ recv_reset_logs(
 #ifdef UNIV_LOG_ARCHIVE
 		group->archived_file_no = arch_log_no;
 		group->archived_offset = 0;
-#endif /* UNIV_LOG_ARCHIVE */
 
 		if (!new_logs_created) {
 			recv_truncate_group(group, group->lsn, group->lsn,
 					    group->lsn, group->lsn);
 		}
+#endif /* UNIV_LOG_ARCHIVE */
 
 		group = UT_LIST_GET_NEXT(log_groups, group);
 	}
@@ -3805,7 +3965,7 @@ recv_recovery_from_archive_start(
 
 		recv_apply_hashed_log_recs(FALSE);
 
-		recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
+		recv_reset_logs(0, FALSE, recv_sys->recovered_lsn);
 	}
 
 	mutex_exit(&(log_sys->mutex));
diff --git a/storage/innobase/mem/mem0dbg.cc b/storage/innobase/mem/mem0dbg.cc
index 83e14ad6071..308c2979551 100644
--- a/storage/innobase/mem/mem0dbg.cc
+++ b/storage/innobase/mem/mem0dbg.cc
@@ -30,7 +30,7 @@ Created 6/9/1994 Heikki Tuuri
 /* The mutex which protects in the debug version the hash table
 containing the list of live memory heaps, and also the global
 variables below. */
-UNIV_INTERN mutex_t		mem_hash_mutex;
+UNIV_INTERN ib_mutex_t		mem_hash_mutex;
 
 #ifdef UNIV_PFS_MUTEX
 /* Key to register mem_hash_mutex with performance schema */
@@ -58,8 +58,7 @@ static ibool		mem_hash_initialized		= FALSE;
 
 /* The node of the list containing currently allocated memory heaps */
 
-typedef struct mem_hash_node_struct mem_hash_node_t;
-struct mem_hash_node_struct {
+struct mem_hash_node_t {
 	UT_LIST_NODE_T(mem_hash_node_t)
 				list;	/*!< hash list node */
 	mem_heap_t*		heap;	/*!< memory heap */
diff --git a/storage/innobase/mem/mem0pool.cc b/storage/innobase/mem/mem0pool.cc
index 2135926a26f..fe9a84d21fa 100644
--- a/storage/innobase/mem/mem0pool.cc
+++ b/storage/innobase/mem/mem0pool.cc
@@ -100,12 +100,12 @@ pool, and after that its locks will grow into the buffer pool. */
 
 /** Data structure for a memory pool. The space is allocated using the buddy
 algorithm, where free list i contains areas of size 2 to power i. */
-struct mem_pool_struct{
+struct mem_pool_t{
 	byte*		buf;		/*!< memory pool */
 	ulint		size;		/*!< memory common pool size */
 	ulint		reserved;	/*!< amount of currently allocated
 					memory */
-	mutex_t		mutex;		/*!< mutex protecting this struct */
+	ib_mutex_t		mutex;		/*!< mutex protecting this struct */
 	UT_LIST_BASE_NODE_T(mem_area_t)
 			free_list[64];	/*!< lists of free memory areas: an
 					area is put to the list whose number
@@ -116,7 +116,7 @@ struct mem_pool_struct{
 UNIV_INTERN mem_pool_t*	mem_comm_pool	= NULL;
 
 #ifdef UNIV_PFS_MUTEX
-/* Key to register mutex in mem_pool_struct with performance schema */
+/* Key to register mutex in mem_pool_t with performance schema */
 UNIV_INTERN mysql_pfs_key_t	mem_pool_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
 
diff --git a/storage/innobase/mtr/mtr0log.cc b/storage/innobase/mtr/mtr0log.cc
index d549de8802e..5335cb4c9ef 100644
--- a/storage/innobase/mtr/mtr0log.cc
+++ b/storage/innobase/mtr/mtr0log.cc
@@ -240,8 +240,8 @@ mlog_parse_nbytes(
 }
 
 /********************************************************//**
-Writes 1 - 4 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
+Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
+record to the mini-transaction log if mtr is not NULL. */
 UNIV_INTERN
 void
 mlog_write_ulint(
@@ -251,8 +251,6 @@ mlog_write_ulint(
 	byte	type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
 	mtr_t*	mtr)	/*!< in: mini-transaction handle */
 {
-	byte*	log_ptr;
-
 	switch (type) {
 	case MLOG_1BYTE:
 		mach_write_to_1(ptr, val);
@@ -267,27 +265,29 @@ mlog_write_ulint(
 		ut_error;
 	}
 
-	log_ptr = mlog_open(mtr, 11 + 2 + 5);
+	if (mtr != 0) {
+		byte*	log_ptr = mlog_open(mtr, 11 + 2 + 5);
 
-	/* If no logging is requested, we may return now */
-	if (log_ptr == NULL) {
+		/* If no logging is requested, we may return now */
 
-		return;
-	}
+		if (log_ptr != 0) {
 
-	log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr);
+			log_ptr = mlog_write_initial_log_record_fast(
+				ptr, type, log_ptr, mtr);
 
-	mach_write_to_2(log_ptr, page_offset(ptr));
-	log_ptr += 2;
+			mach_write_to_2(log_ptr, page_offset(ptr));
+			log_ptr += 2;
 
-	log_ptr += mach_write_compressed(log_ptr, val);
+			log_ptr += mach_write_compressed(log_ptr, val);
 
-	mlog_close(mtr, log_ptr);
+			mlog_close(mtr, log_ptr);
+		}
+	}
 }
 
 /********************************************************//**
-Writes 8 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
+Writes 8 bytes to a file page. Writes the corresponding log
+record to the mini-transaction log, only if mtr is not NULL */
 UNIV_INTERN
 void
 mlog_write_ull(
@@ -296,29 +296,25 @@ mlog_write_ull(
 	ib_uint64_t	val,	/*!< in: value to write */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle */
 {
-	byte*	log_ptr;
-
-	ut_ad(ptr && mtr);
-
 	mach_write_to_8(ptr, val);
 
-	log_ptr = mlog_open(mtr, 11 + 2 + 9);
-
-	/* If no logging is requested, we may return now */
-	if (log_ptr == NULL) {
+	if (mtr != 0) {
+		byte*	log_ptr = mlog_open(mtr, 11 + 2 + 9);
 
-		return;
-	}
+		/* If no logging is requested, we may return now */
+		if (log_ptr != 0) {
 
-	log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_8BYTES,
-						     log_ptr, mtr);
+			log_ptr = mlog_write_initial_log_record_fast(
+				ptr, MLOG_8BYTES, log_ptr, mtr);
 
-	mach_write_to_2(log_ptr, page_offset(ptr));
-	log_ptr += 2;
+			mach_write_to_2(log_ptr, page_offset(ptr));
+			log_ptr += 2;
 
-	log_ptr += mach_ull_write_compressed(log_ptr, val);
+			log_ptr += mach_ull_write_compressed(log_ptr, val);
 
-	mlog_close(mtr, log_ptr);
+			mlog_close(mtr, log_ptr);
+		}
+	}
 }
 
 #ifndef UNIV_HOTBACKUP
@@ -439,12 +435,13 @@ UNIV_INTERN
 byte*
 mlog_open_and_write_index(
 /*======================*/
-	mtr_t*		mtr,	/*!< in: mtr */
-	const byte*	rec,	/*!< in: index record or page */
-	dict_index_t*	index,	/*!< in: record descriptor */
-	byte		type,	/*!< in: log item type */
-	ulint		size)	/*!< in: requested buffer size in bytes
-				(if 0, calls mlog_close() and returns NULL) */
+	mtr_t*			mtr,	/*!< in: mtr */
+	const byte*		rec,	/*!< in: index record or page */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	byte			type,	/*!< in: log item type */
+	ulint			size)	/*!< in: requested buffer size in bytes
+					(if 0, calls mlog_close() and
+					returns NULL) */
 {
 	byte*		log_ptr;
 	const byte*	log_start;
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index 4832e8c7710..10b4686b720 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -142,9 +142,9 @@ mtr_memo_slot_note_modification(
 	mtr_t*			mtr,	/*!< in: mtr */
 	mtr_memo_slot_t*	slot)	/*!< in: memo slot */
 {
-	ut_ad(mtr);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
 	ut_ad(mtr->modifications);
+	ut_ad(!srv_read_only_mode);
+	ut_ad(mtr->magic_n == MTR_MAGIC_N);
 
 	if (slot->object != NULL && slot->type == MTR_MEMO_PAGE_X_FIX) {
 		buf_block_t*	block = (buf_block_t*) slot->object;
@@ -170,7 +170,7 @@ mtr_memo_note_modifications(
 	dyn_array_t*	memo;
 	ulint		offset;
 
-	ut_ad(mtr);
+	ut_ad(!srv_read_only_mode);
 	ut_ad(mtr->magic_n == MTR_MAGIC_N);
 	ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
 					     commit */
@@ -191,19 +191,51 @@ mtr_memo_note_modifications(
 }
 
 /************************************************************//**
+Append the dirty pages to the flush list. */
+static
+void
+mtr_add_dirtied_pages_to_flush_list(
+/*================================*/
+	mtr_t*	mtr)	/*!< in/out: mtr */
+{
+	ut_ad(!srv_read_only_mode);
+
+	/* No need to acquire log_flush_order_mutex if this mtr has
+	not dirtied a clean page. log_flush_order_mutex is used to
+	ensure ordered insertions in the flush_list. We need to
+	insert in the flush_list iff the page in question was clean
+	before modifications. */
+	if (mtr->made_dirty) {
+		log_flush_order_mutex_enter();
+	}
+
+	/* It is now safe to release the log mutex because the
+	flush_order mutex will ensure that we are the first one
+	to insert into the flush list. */
+	log_release();
+
+	if (mtr->modifications) {
+		mtr_memo_note_modifications(mtr);
+	}
+
+	if (mtr->made_dirty) {
+		log_flush_order_mutex_exit();
+	}
+}
+
+/************************************************************//**
 Writes the contents of a mini-transaction log, if any, to the database log. */
 static
 void
 mtr_log_reserve_and_write(
 /*======================*/
-	mtr_t*	mtr)	/*!< in: mtr */
+	mtr_t*	mtr)	/*!< in/out: mtr */
 {
 	dyn_array_t*	mlog;
-	dyn_block_t*	block;
 	ulint		data_size;
 	byte*		first_data;
 
-	ut_ad(mtr);
+	ut_ad(!srv_read_only_mode);
 
 	mlog = &(mtr->log);
 
@@ -217,14 +249,21 @@ mtr_log_reserve_and_write(
 	}
 
 	if (mlog->heap == NULL) {
+		ulint	len;
+
+		len = mtr->log_mode != MTR_LOG_NO_REDO
+			? dyn_block_get_used(mlog) : 0;
+
 		mtr->end_lsn = log_reserve_and_write_fast(
-			first_data, dyn_block_get_used(mlog),
-			&mtr->start_lsn);
+			first_data, len, &mtr->start_lsn);
+
 		if (mtr->end_lsn) {
 
 			/* Success. We have the log mutex.
 			Add pages to flush list and exit */
-			goto func_exit;
+			mtr_add_dirtied_pages_to_flush_list(mtr);
+
+			return;
 		}
 	}
 
@@ -235,43 +274,24 @@ mtr_log_reserve_and_write(
 
 	if (mtr->log_mode == MTR_LOG_ALL) {
 
-		block = mlog;
+		for (dyn_block_t* block = mlog;
+		     block != 0;
+		     block = dyn_array_get_next_block(mlog, block)) {
 
-		while (block != NULL) {
-			log_write_low(dyn_block_get_data(block),
-				      dyn_block_get_used(block));
-			block = dyn_array_get_next_block(mlog, block);
+			log_write_low(
+				dyn_block_get_data(block),
+				dyn_block_get_used(block));
 		}
+
 	} else {
-		ut_ad(mtr->log_mode == MTR_LOG_NONE);
+		ut_ad(mtr->log_mode == MTR_LOG_NONE
+		      || mtr->log_mode == MTR_LOG_NO_REDO);
 		/* Do nothing */
 	}
 
 	mtr->end_lsn = log_close();
 
-func_exit:
-
-	/* No need to acquire log_flush_order_mutex if this mtr has
-	not dirtied a clean page. log_flush_order_mutex is used to
-	ensure ordered insertions in the flush_list. We need to
-	insert in the flush_list iff the page in question was clean
-	before modifications. */
-	if (mtr->made_dirty) {
-		log_flush_order_mutex_enter();
-	}
-
-	/* It is now safe to release the log mutex because the
-	flush_order mutex will ensure that we are the first one
-	to insert into the flush list. */
-	log_release();
-
-	if (mtr->modifications) {
-		mtr_memo_note_modifications(mtr);
-	}
-
-	if (mtr->made_dirty) {
-		log_flush_order_mutex_exit();
-	}
+	mtr_add_dirtied_pages_to_flush_list(mtr);
 }
 #endif /* !UNIV_HOTBACKUP */
 
@@ -294,6 +314,7 @@ mtr_commit(
 	ut_ad(!recv_no_log_write);
 
 	if (mtr->modifications && mtr->n_log_recs) {
+		ut_ad(!srv_read_only_mode);
 		mtr_log_reserve_and_write(mtr);
 	}
 
@@ -376,14 +397,8 @@ mtr_read_ulint(
 	ut_ad(mtr->state == MTR_ACTIVE);
 	ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_S_FIX)
 	      || mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
-	if (type == MLOG_1BYTE) {
-		return(mach_read_from_1(ptr));
-	} else if (type == MLOG_2BYTES) {
-		return(mach_read_from_2(ptr));
-	} else {
-		ut_ad(type == MLOG_4BYTES);
-		return(mach_read_from_4(ptr));
-	}
+
+	return(mach_read_ulint(ptr, type));
 }
 
 #ifdef UNIV_DEBUG
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 62cde1cf728..5f0dc0d3667 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -1,6 +1,6 @@
 /***********************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2009, Percona Inc.
 
 Portions of this file contain modifications contributed and copyrighted
@@ -60,24 +60,29 @@ Created 10/21/1995 Heikki Tuuri
 #include <libaio.h>
 #endif
 
+/** Insert buffer segment id */
+static const ulint IO_IBUF_SEGMENT = 0;
+
+/** Log segment id */
+static const ulint IO_LOG_SEGMENT = 1;
+
 /* This specifies the file permissions InnoDB uses when it creates files in
 Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
 my_umask */
 
 #ifndef __WIN__
 /** Umask for creating files */
-UNIV_INTERN ulint	os_innodb_umask
-			= S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+UNIV_INTERN ulint	os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
 #else
 /** Umask for creating files */
-UNIV_INTERN ulint	os_innodb_umask		= 0;
-#endif
+UNIV_INTERN ulint	os_innodb_umask	= 0;
+#endif /* __WIN__ */
 
 #ifndef UNIV_HOTBACKUP
 /* We use these mutexes to protect lseek + file i/o operation, if the
 OS does not provide an atomic pread or pwrite, or similar */
 #define OS_FILE_N_SEEK_MUTEXES	16
-UNIV_INTERN os_mutex_t	os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
+UNIV_INTERN os_ib_mutex_t	os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
 
 /* In simulated aio, merge at most this many consecutive i/os */
 #define OS_AIO_MERGE_N_CONSECUTIVE	64
@@ -147,10 +152,7 @@ UNIV_INTERN mysql_pfs_key_t  innodb_file_temp_key;
 #endif /* UNIV_PFS_IO */
 
 /** The asynchronous i/o array slot structure */
-typedef struct os_aio_slot_struct	os_aio_slot_t;
-
-/** The asynchronous i/o array slot structure */
-struct os_aio_slot_struct{
+struct os_aio_slot_t{
 	ibool		is_read;	/*!< TRUE if a read operation */
 	ulint		pos;		/*!< index of the slot in the aio
 					array */
@@ -182,15 +184,12 @@ struct os_aio_slot_struct{
 	struct iocb	control;	/* Linux control block for aio */
 	int		n_bytes;	/* bytes written/read. */
 	int		ret;		/* AIO return code */
-#endif
+#endif /* WIN_ASYNC_IO */
 };
 
 /** The asynchronous i/o array structure */
-typedef struct os_aio_array_struct	os_aio_array_t;
-
-/** The asynchronous i/o array structure */
-struct os_aio_array_struct{
-	os_mutex_t	mutex;	/*!< the mutex protecting the aio array */
+struct os_aio_array_t{
+	os_ib_mutex_t	mutex;	/*!< the mutex protecting the aio array */
 	os_event_t	not_full;
 				/*!< The event which is set to the
 				signaled state when there is space in
@@ -223,7 +222,7 @@ struct os_aio_array_struct{
 				order. This can be used in
 				WaitForMultipleObjects; used only in
 				Windows */
-#endif
+#endif /* __WIN__ */
 
 #if defined(LINUX_NATIVE_AIO)
 	io_context_t*		aio_ctx;
@@ -235,7 +234,7 @@ struct os_aio_array_struct{
 				There is one such event for each
 				possible pending IO. The size of the
 				array is equal to n_slots. */
-#endif
+#endif /* LINUX_NATIV_AIO */
 };
 
 #if defined(LINUX_NATIVE_AIO)
@@ -283,7 +282,7 @@ UNIV_INTERN ibool	os_has_said_disk_full	= FALSE;
 #if !defined(UNIV_HOTBACKUP)	\
     && (!defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8)
 /** The mutex protecting the following counts of pending I/O operations */
-static os_mutex_t	os_file_count_mutex;
+static os_ib_mutex_t	os_file_count_mutex;
 #endif /* !UNIV_HOTBACKUP && (!HAVE_ATOMIC_BUILTINS || UNIV_WORD_SIZE < 8) */
 
 /** Number of pending os_file_pread() operations */
@@ -336,7 +335,7 @@ ulint
 os_get_os_version(void)
 /*===================*/
 {
-	OSVERSIONINFO	  os_info;
+	OSVERSIONINFO	os_info;
 
 	os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
 
@@ -350,15 +349,15 @@ os_get_os_version(void)
 		switch (os_info.dwMajorVersion) {
 		case 3:
 		case 4:
-			return OS_WINNT;
+			return(OS_WINNT);
 		case 5:
-			return (os_info.dwMinorVersion == 0) ? OS_WIN2000
-							     : OS_WINXP;
+			return (os_info.dwMinorVersion == 0)
+				? OS_WIN2000 : OS_WINXP;
 		case 6:
-			return (os_info.dwMinorVersion == 0) ? OS_WINVISTA
-							     : OS_WIN7;
+			return (os_info.dwMinorVersion == 0)
+				? OS_WINVISTA : OS_WIN7;
 		default:
-			return OS_WIN7;
+			return(OS_WIN7);
 		}
 	} else {
 		ut_error;
@@ -377,16 +376,17 @@ static
 ulint
 os_file_get_last_error_low(
 /*=======================*/
-	ibool	report_all_errors,	/*!< in: TRUE if we want an error
+	bool	report_all_errors,	/*!< in: TRUE if we want an error
 					message printed of all errors */
-	ibool	on_error_silent)	/*!< in: TRUE then don't print any
+	bool	on_error_silent)	/*!< in: TRUE then don't print any
 					diagnostic to the log */
 {
-	ulint	err;
-
 #ifdef __WIN__
 
-	err = (ulint) GetLastError();
+	ulint	err = (ulint) GetLastError();
+	if (err == ERROR_SUCCESS) {
+		return(0);
+	}
 
 	if (report_all_errors
 	    || (!on_error_silent
@@ -469,15 +469,18 @@ os_file_get_last_error_low(
 		return(100 + err);
 	}
 #else
-	err = (ulint) errno;
+	int err = errno;
+	if (err == 0) {
+		return(0);
+	}
 
 	if (report_all_errors
 	    || (err != ENOSPC && err != EEXIST && !on_error_silent)) {
 
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
-			"  InnoDB: Operating system error number %lu"
-			" in a file operation.\n", (ulong) err);
+			"  InnoDB: Operating system error number %d"
+			" in a file operation.\n", err);
 
 		if (err == ENOENT) {
 			fprintf(stderr,
@@ -497,11 +500,11 @@ os_file_get_last_error_low(
 				" the access rights to\n"
 				"InnoDB: the directory.\n");
 		} else {
-			if (strerror((int) err) != NULL) {
+			if (strerror(err) != NULL) {
 				fprintf(stderr,
-					"InnoDB: Error number %lu"
+					"InnoDB: Error number %d"
 					" means '%s'.\n",
-					err, strerror((int) err));
+					err, strerror(err));
 			}
 
 
@@ -552,10 +555,10 @@ UNIV_INTERN
 ulint
 os_file_get_last_error(
 /*===================*/
-	ibool	report_all_errors)	/*!< in: TRUE if we want an error
+	bool	report_all_errors)	/*!< in: TRUE if we want an error
 					message printed of all errors */
 {
-	return(os_file_get_last_error_low(report_all_errors, FALSE));
+	return(os_file_get_last_error_low(report_all_errors, false));
 }
 
 /****************************************************************//**
@@ -577,7 +580,7 @@ os_file_handle_error_cond_exit(
 {
 	ulint	err;
 
-	err = os_file_get_last_error_low(FALSE, on_error_silent);
+	err = os_file_get_last_error_low(false, on_error_silent);
 
 	switch (err) {
 	case OS_FILE_DISK_FULL:
@@ -645,7 +648,8 @@ os_file_handle_error_cond_exit(
 
 			ut_print_timestamp(stderr);
 			fprintf(stderr, "  InnoDB: File operation call: "
-				"'%s'.\n", operation);
+				"'%s' returned OS error " ULINTPF ".\n",
+				operation, err);
 		}
 
 		if (should_exit) {
@@ -654,7 +658,9 @@ os_file_handle_error_cond_exit(
 				"operation.\n");
 
 			fflush(stderr);
-			ut_error;
+
+			ut_ad(0);  /* Report call stack, etc only in debug code. */
+			exit(1);
 		}
 	}
 
@@ -712,19 +718,23 @@ os_file_lock(
 	const char*	name)	/*!< in: file name */
 {
 	struct flock lk;
+
+	ut_ad(!srv_read_only_mode);
+
 	lk.l_type = F_WRLCK;
 	lk.l_whence = SEEK_SET;
 	lk.l_start = lk.l_len = 0;
+
 	if (fcntl(fd, F_SETLK, &lk) == -1) {
-		fprintf(stderr,
-			"InnoDB: Unable to lock %s, error: %d\n", name, errno);
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Unable to lock %s, error: %d", name, errno);
 
 		if (errno == EAGAIN || errno == EACCES) {
-			fprintf(stderr,
-				"InnoDB: Check that you do not already have"
-				" another mysqld process\n"
-				"InnoDB: using the same InnoDB data"
-				" or log files.\n");
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Check that you do not already have "
+				"another mysqld process using the "
+				"same InnoDB data or log files.");
 		}
 
 		return(-1);
@@ -742,13 +752,11 @@ void
 os_io_init_simple(void)
 /*===================*/
 {
-	ulint	i;
-
 #if !defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8
 	os_file_count_mutex = os_mutex_create();
 #endif /* !HAVE_ATOMIC_BUILTINS || UNIV_WORD_SIZE < 8 */
 
-	for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
+	for (ulint i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
 		os_file_seek_mutexes[i] = os_mutex_create();
 	}
 }
@@ -765,6 +773,8 @@ os_file_create_tmpfile(void)
 	FILE*	file	= NULL;
 	int	fd	= innobase_mysql_tmpfile();
 
+	ut_ad(!srv_read_only_mode);
+
 	if (fd >= 0) {
 		file = fdopen(fd, "w+b");
 	}
@@ -840,7 +850,7 @@ os_file_opendir(
 	}
 
 	return(dir);
-#endif
+#endif /* __WIN__ */
 }
 
 /***********************************************************************//**
@@ -874,7 +884,7 @@ os_file_closedir(
 	}
 
 	return(ret);
-#endif
+#endif /* __WIN__ */
 }
 
 /***********************************************************************//**
@@ -1054,10 +1064,12 @@ next_file:
 }
 
 /*****************************************************************//**
-This function attempts to create a directory named pathname. The new directory
-gets default permissions. On Unix the permissions are (0770 & ~umask). If the
-directory exists already, nothing is done and the call succeeds, unless the
-fail_if_exists arguments is true.
+This function attempts to create a directory named pathname. The new
+directory gets default permissions. On Unix the permissions are
+(0770 & ~umask). If the directory exists already, nothing is done and
+the call succeeds, unless the fail_if_exists arguments is true.
+If another error occurs, such as a permission error, this does not crash,
+but reports the error and returns FALSE.
 @return	TRUE if call succeeds, FALSE on error */
 UNIV_INTERN
 ibool
@@ -1075,13 +1087,14 @@ os_file_create_directory(
 	if (!(rcode != 0
 	      || (GetLastError() == ERROR_ALREADY_EXISTS
 		  && !fail_if_exists))) {
-		/* failure */
-		os_file_handle_error(pathname, "CreateDirectory");
+
+		os_file_handle_error_no_exit(
+			pathname, "CreateDirectory", FALSE);
 
 		return(FALSE);
 	}
 
-	return (TRUE);
+	return(TRUE);
 #else
 	int	rcode;
 
@@ -1089,13 +1102,13 @@ os_file_create_directory(
 
 	if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
 		/* failure */
-		os_file_handle_error(pathname, "mkdir");
+		os_file_handle_error_no_exit(pathname, "mkdir", FALSE);
 
 		return(FALSE);
 	}
 
 	return (TRUE);
-#endif
+#endif /* __WIN__ */
 }
 
 /****************************************************************//**
@@ -1115,129 +1128,180 @@ os_file_create_simple_func(
 				OS_FILE_READ_WRITE */
 	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
 {
-#ifdef __WIN__
 	os_file_t	file;
-	DWORD		create_flag;
+	ibool		retry;
+
+#ifdef __WIN__
 	DWORD		access;
+	DWORD		create_flag;
 	DWORD		attributes	= 0;
-	ibool		retry;
 
 	ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
 	ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
-try_again:
-	ut_a(name);
 
 	if (create_mode == OS_FILE_OPEN) {
+
+		create_flag = OPEN_EXISTING;
+
+	} else if (srv_read_only_mode) {
+
 		create_flag = OPEN_EXISTING;
+
 	} else if (create_mode == OS_FILE_CREATE) {
+
 		create_flag = CREATE_NEW;
+
 	} else if (create_mode == OS_FILE_CREATE_PATH) {
-		/* create subdirs along the path if needed  */
+
+		ut_a(!srv_read_only_mode);
+
+		/* Create subdirs along the path if needed  */
 		*success = os_file_create_subdirs_if_needed(name);
+
 		if (!*success) {
-			ut_error;
+
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Unable to create subdirectories '%s'",
+				name);
+
+			return((os_file_t) -1);
 		}
+
 		create_flag = CREATE_NEW;
 		create_mode = OS_FILE_CREATE;
+
 	} else {
-		create_flag = 0;
-		ut_error;
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Unknown file create mode (%lu) for file '%s'",
+			create_mode, name);
+
+		return((os_file_t) -1);
 	}
 
 	if (access_type == OS_FILE_READ_ONLY) {
 		access = GENERIC_READ;
+	} else if (srv_read_only_mode) {
+
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"read only mode set. Unable to "
+			"open file '%s' in RW mode, trying RO mode", name);
+
+		access = GENERIC_READ;
+
 	} else if (access_type == OS_FILE_READ_WRITE) {
 		access = GENERIC_READ | GENERIC_WRITE;
 	} else {
-		access = 0;
-		ut_error;
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Unknown file access type (%lu) for file '%s'",
+			access_type, name);
+
+		return((os_file_t) -1);
 	}
 
-	file = CreateFile((LPCTSTR) name,
-			  access,
-			  FILE_SHARE_READ | FILE_SHARE_WRITE,
-			  /* file can be read and written also
-			  by other processes */
-			  NULL,	/* default security attributes */
-			  create_flag,
-			  attributes,
-			  NULL);	/*!< no template file */
+	do {
+		/* Use default security attributes and no template file. */
 
-	if (file == INVALID_HANDLE_VALUE) {
-		*success = FALSE;
+		file = CreateFile(
+			(LPCTSTR) name, access, FILE_SHARE_READ, NULL,
+			create_flag, attributes, NULL);
+
+		if (file == INVALID_HANDLE_VALUE) {
+
+			*success = FALSE;
 
-		retry = os_file_handle_error(name,
-					     create_mode == OS_FILE_OPEN ?
-					     "open" : "create");
-		if (retry) {
-			goto try_again;
+			retry = os_file_handle_error(
+				name, create_mode == OS_FILE_OPEN ?
+				"open" : "create");
+
+		} else {
+			*success = TRUE;
+			retry = false;
 		}
-	} else {
-		*success = TRUE;
-	}
 
-	return(file);
+	} while (retry);
+
 #else /* __WIN__ */
-	os_file_t	file;
 	int		create_flag;
-	ibool		retry;
 
 	ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
 	ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
 
-try_again:
-	ut_a(name);
-
 	if (create_mode == OS_FILE_OPEN) {
+
 		if (access_type == OS_FILE_READ_ONLY) {
 			create_flag = O_RDONLY;
+		} else if (srv_read_only_mode) {
+			create_flag = O_RDONLY;
 		} else {
 			create_flag = O_RDWR;
 		}
+
+	} else if (srv_read_only_mode) {
+
+		create_flag = O_RDONLY;
+
 	} else if (create_mode == OS_FILE_CREATE) {
+
 		create_flag = O_RDWR | O_CREAT | O_EXCL;
+
 	} else if (create_mode == OS_FILE_CREATE_PATH) {
-		/* create subdirs along the path if needed  */
+
+		/* Create subdirs along the path if needed  */
+
 		*success = os_file_create_subdirs_if_needed(name);
+
 		if (!*success) {
-			return (-1);
+
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Unable to create subdirectories '%s'",
+				name);
+
+			return((os_file_t) -1);
 		}
+
 		create_flag = O_RDWR | O_CREAT | O_EXCL;
 		create_mode = OS_FILE_CREATE;
 	} else {
-		create_flag = 0;
-		ut_error;
-	}
 
-	if (create_mode == OS_FILE_CREATE) {
-		file = open(name, create_flag, S_IRUSR | S_IWUSR
-			    | S_IRGRP | S_IWGRP);
-	} else {
-		file = open(name, create_flag);
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Unknown file create mode (%lu) for file '%s'",
+			create_mode, name);
+
+		return((os_file_t) -1);
 	}
 
-	if (file == -1) {
-		*success = FALSE;
+	do {
+		file = ::open(name, create_flag, os_innodb_umask);
+
+		if (file == -1) {
+			*success = FALSE;
 
-		retry = os_file_handle_error(name,
-					     create_mode == OS_FILE_OPEN ?
-					     "open" : "create");
-		if (retry) {
-			goto try_again;
+			retry = os_file_handle_error(
+				name,
+				create_mode == OS_FILE_OPEN
+				?  "open" : "create");
+		} else {
+			*success = TRUE;
+			retry = false;
 		}
+
+	} while (retry);
+
 #ifdef USE_FILE_LOCK
-	} else if (access_type == OS_FILE_READ_WRITE
-		   && os_file_lock(file, name)) {
+	if (!srv_read_only_mode
+	    && *success
+	    && access_type == OS_FILE_READ_WRITE
+	    && os_file_lock(file, name)) {
+
 		*success = FALSE;
 		close(file);
 		file = -1;
-#endif
-	} else {
-		*success = TRUE;
 	}
+#endif /* USE_FILE_LOCK */
 
-	return(file);
 #endif /* __WIN__ */
+
+	return(file);
 }
 
 /****************************************************************//**
@@ -1259,12 +1323,13 @@ os_file_create_simple_no_error_handling_func(
 				used by a backup program reading the file */
 	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
 {
-#ifdef __WIN__
 	os_file_t	file;
-	DWORD		create_flag;
+
+#ifdef __WIN__
 	DWORD		access;
+	DWORD		create_flag;
 	DWORD		attributes	= 0;
-	DWORD		share_mode	= FILE_SHARE_READ | FILE_SHARE_WRITE;
+	DWORD		share_mode	= FILE_SHARE_READ;
 
 	ut_a(name);
 
@@ -1273,46 +1338,53 @@ os_file_create_simple_no_error_handling_func(
 
 	if (create_mode == OS_FILE_OPEN) {
 		create_flag = OPEN_EXISTING;
+	} else if (srv_read_only_mode) {
+		create_flag = OPEN_EXISTING;
 	} else if (create_mode == OS_FILE_CREATE) {
 		create_flag = CREATE_NEW;
 	} else {
-		create_flag = 0;
-		ut_error;
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Unknown file create mode (%lu) for file '%s'",
+			create_mode, name);
+
+		return((os_file_t) -1);
 	}
 
 	if (access_type == OS_FILE_READ_ONLY) {
 		access = GENERIC_READ;
+	} else if (srv_read_only_mode) {
+		access = GENERIC_READ;
 	} else if (access_type == OS_FILE_READ_WRITE) {
 		access = GENERIC_READ | GENERIC_WRITE;
 	} else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
+
+		ut_a(!srv_read_only_mode);
+
 		access = GENERIC_READ;
-		share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ
-			| FILE_SHARE_WRITE;	/*!< A backup program has to give
-						mysqld the maximum freedom to
-						do what it likes with the
-						file */
+
+		/*!< A backup program has to give mysqld the maximum
+		freedom to do what it likes with the file */
+
+		share_mode |= FILE_SHARE_DELETE | FILE_SHARE_WRITE;
 	} else {
-		access = 0;
-		ut_error;
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Unknown file access type (%lu) for file '%s'",
+			access_type, name);
+
+		return((os_file_t) -1);
 	}
 
 	file = CreateFile((LPCTSTR) name,
 			  access,
 			  share_mode,
-			  NULL,	/* default security attributes */
+			  NULL,			// Security attributes
 			  create_flag,
 			  attributes,
-			  NULL);	/*!< no template file */
-
-	if (file == INVALID_HANDLE_VALUE) {
-		*success = FALSE;
-	} else {
-		*success = TRUE;
-	}
+			  NULL);		// No template file
 
-	return(file);
+	*success = (file != INVALID_HANDLE_VALUE);
 #else /* __WIN__ */
-	os_file_t	file;
 	int		create_flag;
 
 	ut_a(name);
@@ -1321,40 +1393,59 @@ os_file_create_simple_no_error_handling_func(
 	ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
 
 	if (create_mode == OS_FILE_OPEN) {
+
 		if (access_type == OS_FILE_READ_ONLY) {
+
+			create_flag = O_RDONLY;
+
+		} else if (srv_read_only_mode) {
+
 			create_flag = O_RDONLY;
+
 		} else {
+
+			ut_a(access_type == OS_FILE_READ_WRITE
+			     || access_type == OS_FILE_READ_ALLOW_DELETE);
+
 			create_flag = O_RDWR;
 		}
+
+	} else if (srv_read_only_mode) {
+
+		create_flag = O_RDONLY;
+
 	} else if (create_mode == OS_FILE_CREATE) {
+
 		create_flag = O_RDWR | O_CREAT | O_EXCL;
-	} else {
-		create_flag = 0;
-		ut_error;
-	}
 
-	if (create_mode == OS_FILE_CREATE) {
-		file = open(name, create_flag, S_IRUSR | S_IWUSR
-			    | S_IRGRP | S_IWGRP);
 	} else {
-		file = open(name, create_flag);
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Unknown file create mode (%lu) for file '%s'",
+			create_mode, name);
+
+		return((os_file_t) -1);
 	}
 
-	if (file == -1) {
-		*success = FALSE;
+	file = ::open(name, create_flag, os_innodb_umask);
+
+	*success = file == -1 ? FALSE : TRUE;
+
 #ifdef USE_FILE_LOCK
-	} else if (access_type == OS_FILE_READ_WRITE
-		   && os_file_lock(file, name)) {
+	if (!srv_read_only_mode
+	    && *success
+	    && access_type == OS_FILE_READ_WRITE
+	    && os_file_lock(file, name)) {
+
 		*success = FALSE;
 		close(file);
 		file = -1;
-#endif
-	} else {
-		*success = TRUE;
+
 	}
+#endif /* USE_FILE_LOCK */
 
-	return(file);
 #endif /* __WIN__ */
+
+	return(file);
 }
 
 /****************************************************************//**
@@ -1364,42 +1455,41 @@ void
 os_file_set_nocache(
 /*================*/
 	int		fd		/*!< in: file descriptor to alter */
-	__attribute__((unused)),
-	const char*	file_name	/*!< in: used in the diagnostic message */
-	__attribute__((unused)),
+					__attribute__((unused)),
+	const char*	file_name	/*!< in: used in the diagnostic
+					message */
+					__attribute__((unused)),
 	const char*	operation_name __attribute__((unused)))
-					/*!< in: "open" or "create"; used in the
-					diagnostic message */
+					/*!< in: "open" or "create"; used
+					in the diagnostic message */
 {
 	/* some versions of Solaris may not have DIRECTIO_ON */
 #if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
 	if (directio(fd, DIRECTIO_ON) == -1) {
-		int	errno_save;
-		errno_save = (int) errno;
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Failed to set DIRECTIO_ON "
-			"on file %s: %s: %s, continuing anyway\n",
+		int	errno_save = errno;
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Failed to set DIRECTIO_ON on file %s: %s: %s, "
+			"continuing anyway.",
 			file_name, operation_name, strerror(errno_save));
 	}
 #elif defined(O_DIRECT)
 	if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
-		int	errno_save;
-		errno_save = (int) errno;
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Failed to set O_DIRECT "
-			"on file %s: %s: %s, continuing anyway\n",
+		int	errno_save = errno;
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Failed to set O_DIRECT on file %s: %s: %s, "
+			"continuing anyway",
 			file_name, operation_name, strerror(errno_save));
+
 		if (errno_save == EINVAL) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: O_DIRECT is known to result in "
-				"'Invalid argument' on Linux on tmpfs, "
-				"see MySQL Bug#26662\n");
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"O_DIRECT is known to result in 'Invalid "
+				"argument' on Linux on tmpfs, see MySQL "
+				"Bug#26662");
 		}
 	}
-#endif
+#endif /* defined(UNIV_SOLARIS) && defined(DIRECTIO_ON) */
 }
 
 /****************************************************************//**
@@ -1425,138 +1515,155 @@ os_file_create_func(
 	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
 	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
 {
+	os_file_t	file;
+	ibool		retry;
 	ibool		on_error_no_exit;
 	ibool		on_error_silent;
 
 #ifdef __WIN__
-	os_file_t	file;
-	DWORD		share_mode	= FILE_SHARE_READ;
+	DBUG_EXECUTE_IF(
+		"ib_create_table_fail_disk_full",
+		*success = FALSE;
+		SetLastError(ERROR_DISK_FULL);
+		return((os_file_t) -1);
+	);
+#else /* __WIN__ */
+	DBUG_EXECUTE_IF(
+		"ib_create_table_fail_disk_full",
+		*success = FALSE;
+		errno = ENOSPC;
+		return((os_file_t) -1);
+	);
+#endif /* __WIN__ */
+
+#ifdef __WIN__
 	DWORD		create_flag;
-	DWORD		attributes;
-	ibool		retry;
+	DWORD		share_mode	= FILE_SHARE_READ;
 
 	on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT
 		? TRUE : FALSE;
+
 	on_error_silent = create_mode & OS_FILE_ON_ERROR_SILENT
 		? TRUE : FALSE;
 
 	create_mode &= ~OS_FILE_ON_ERROR_NO_EXIT;
 	create_mode &= ~OS_FILE_ON_ERROR_SILENT;
 
+	if (create_mode == OS_FILE_OPEN_RAW) {
 
-	DBUG_EXECUTE_IF(
-		"ib_create_table_fail_disk_full",
-		*success = FALSE;
-		SetLastError(ERROR_DISK_FULL);
-		return((os_file_t) -1);
-	);
-try_again:
-	ut_a(name);
+		ut_a(!srv_read_only_mode);
 
-	if (create_mode == OS_FILE_OPEN_RAW) {
 		create_flag = OPEN_EXISTING;
-		share_mode = FILE_SHARE_WRITE;
+
+		/* On Windows Physical devices require admin privileges and
+		have to have the write-share mode set. See the remarks
+		section for the CreateFile() function documentation in MSDN. */
+
+		share_mode |= FILE_SHARE_WRITE;
+
 	} else if (create_mode == OS_FILE_OPEN
 		   || create_mode == OS_FILE_OPEN_RETRY) {
+
 		create_flag = OPEN_EXISTING;
+
+	} else if (srv_read_only_mode) {
+
+		create_flag = OPEN_EXISTING;
+
 	} else if (create_mode == OS_FILE_CREATE) {
+
 		create_flag = CREATE_NEW;
+
 	} else if (create_mode == OS_FILE_OVERWRITE) {
+
 		create_flag = CREATE_ALWAYS;
+
 	} else {
-		create_flag = 0;
-		ut_error;
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Unknown file create mode (%lu) for file '%s'",
+			create_mode, name);
+
+		return((os_file_t) -1);
 	}
 
+	DWORD		attributes = 0;
+
+#ifdef UNIV_HOTBACKUP
+	attributes |= FILE_FLAG_NO_BUFFERING;
+#else
 	if (purpose == OS_FILE_AIO) {
+
+#ifdef WIN_ASYNC_IO
 		/* If specified, use asynchronous (overlapped) io and no
 		buffering of writes in the OS */
-		attributes = 0;
-#ifdef WIN_ASYNC_IO
+
 		if (srv_use_native_aio) {
-			attributes = attributes | FILE_FLAG_OVERLAPPED;
+			attributes |= FILE_FLAG_OVERLAPPED;
 		}
-#endif
-#ifdef UNIV_NON_BUFFERED_IO
-# ifndef UNIV_HOTBACKUP
-		if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
-			/* Do not use unbuffered i/o to log files because
-			value 2 denotes that we do not flush the log at every
-			commit, but only once per second */
-		} else if (srv_win_file_flush_method
-			   == SRV_WIN_IO_UNBUFFERED) {
-			attributes = attributes | FILE_FLAG_NO_BUFFERING;
-		}
-# else /* !UNIV_HOTBACKUP */
-		attributes = attributes | FILE_FLAG_NO_BUFFERING;
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_NON_BUFFERED_IO */
+#endif /* WIN_ASYNC_IO */
+
 	} else if (purpose == OS_FILE_NORMAL) {
-		attributes = 0;
+		/* Use default setting. */
+	} else {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Unknown purpose flag (%lu) while opening file '%s'",
+			purpose, name);
+
+		return((os_file_t)(-1));
+	}
+
 #ifdef UNIV_NON_BUFFERED_IO
-# ifndef UNIV_HOTBACKUP
-		if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
-			/* Do not use unbuffered i/o to log files because
-			value 2 denotes that we do not flush the log at every
-			commit, but only once per second */
-		} else if (srv_win_file_flush_method
-			   == SRV_WIN_IO_UNBUFFERED) {
-			attributes = attributes | FILE_FLAG_NO_BUFFERING;
-		}
-# else /* !UNIV_HOTBACKUP */
-		attributes = attributes | FILE_FLAG_NO_BUFFERING;
-# endif /* !UNIV_HOTBACKUP */
+	// TODO: Create a bug, this looks wrong. The flush log
+	// parameter is dynamic.
+	if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
+
+		/* Do not use unbuffered i/o for the log files because
+		value 2 denotes that we do not flush the log at every
+		commit, but only once per second */
+
+	} else if (srv_win_file_flush_method == SRV_WIN_IO_UNBUFFERED) {
+
+		attributes |= FILE_FLAG_NO_BUFFERING;
+	}
 #endif /* UNIV_NON_BUFFERED_IO */
-	} else {
-		attributes = 0;
-		ut_error;
+
+#endif /* UNIV_HOTBACKUP */
+	DWORD	access = GENERIC_READ;
+
+	if (!srv_read_only_mode) {
+		access |= GENERIC_WRITE;
 	}
 
-	file = CreateFile((LPCTSTR) name,
-			  GENERIC_READ | GENERIC_WRITE, /* read and write
-							access */
-			  share_mode,	/* File can be read also by other
-					processes; we must give the read
-					permission because of ibbackup. We do
-					not give the write permission to
-					others because if one would succeed to
-					start 2 instances of mysqld on the
-					SAME files, that could cause severe
-					database corruption! When opening
-					raw disk partitions, Microsoft manuals
-					say that we must give also the write
-					permission. */
-			  NULL,	/* default security attributes */
-			  create_flag,
-			  attributes,
-			  NULL);	/*!< no template file */
+	do {
+		/* Use default security attributes and no template file. */
+		file = CreateFile(
+			(LPCTSTR) name, access, share_mode, NULL,
+			create_flag, attributes, NULL);
 
-	if (file == INVALID_HANDLE_VALUE) {
-		const char*	operation;
+		if (file == INVALID_HANDLE_VALUE) {
+			const char*	operation;
 
-		operation = create_mode == OS_FILE_CREATE ? "create" : "open";
+			operation = (create_mode == OS_FILE_CREATE
+				     && !srv_read_only_mode)
+				? "create" : "open";
 
-		*success = FALSE;
+			*success = FALSE;
 
-		if (on_error_no_exit) {
-			retry = os_file_handle_error_no_exit(
-				name, operation, on_error_silent);
+			if (on_error_no_exit) {
+				retry = os_file_handle_error_no_exit(
+					name, operation, on_error_silent);
+			} else {
+				retry = os_file_handle_error(name, operation);
+			}
 		} else {
-			retry = os_file_handle_error(name, operation);
+			*success = TRUE;
+			retry = FALSE;
 		}
 
-		if (retry) {
-			goto try_again;
-		}
-	} else {
-		*success = TRUE;
-	}
+	} while (retry);
 
-	return(file);
 #else /* __WIN__ */
-	os_file_t	file;
 	int		create_flag;
-	ibool		retry;
 	const char*	mode_str	= NULL;
 
 	on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT
@@ -1567,28 +1674,36 @@ try_again:
 	create_mode &= ~OS_FILE_ON_ERROR_NO_EXIT;
 	create_mode &= ~OS_FILE_ON_ERROR_SILENT;
 
-	DBUG_EXECUTE_IF(
-		"ib_create_table_fail_disk_full",
-		*success = FALSE;
-		errno = ENOSPC;
-		return((os_file_t) -1);
-	);
-try_again:
-	ut_a(name);
-
-	if (create_mode == OS_FILE_OPEN || create_mode == OS_FILE_OPEN_RAW
+	if (create_mode == OS_FILE_OPEN
+	    || create_mode == OS_FILE_OPEN_RAW
 	    || create_mode == OS_FILE_OPEN_RETRY) {
+
+		mode_str = "OPEN";
+
+		create_flag = srv_read_only_mode ? O_RDONLY : O_RDWR;
+
+	} else if (srv_read_only_mode) {
+
 		mode_str = "OPEN";
-		create_flag = O_RDWR;
+
+		create_flag = O_RDONLY;
+
 	} else if (create_mode == OS_FILE_CREATE) {
+
 		mode_str = "CREATE";
 		create_flag = O_RDWR | O_CREAT | O_EXCL;
+
 	} else if (create_mode == OS_FILE_OVERWRITE) {
+
 		mode_str = "OVERWRITE";
 		create_flag = O_RDWR | O_CREAT | O_TRUNC;
+
 	} else {
-		create_flag = 0;
-		ut_error;
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Unknown file create mode (%lu) for file '%s'",
+			create_mode, name);
+
+		return((os_file_t) -1);
 	}
 
 	ut_a(type == OS_LOG_FILE || type == OS_DATA_FILE);
@@ -1598,69 +1713,75 @@ try_again:
 	/* We let O_SYNC only affect log files; note that we map O_DSYNC to
 	O_SYNC because the datasync options seemed to corrupt files in 2001
 	in both Linux and Solaris */
-	if (type == OS_LOG_FILE
-	    && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
 
-# if 0
-		fprintf(stderr, "Using O_SYNC for file %s\n", name);
-# endif
+	if (!srv_read_only_mode
+	    && type == OS_LOG_FILE
+	    && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
 
-		create_flag = create_flag | O_SYNC;
+		create_flag |= O_SYNC;
 	}
 #endif /* O_SYNC */
 
-	file = open(name, create_flag, os_innodb_umask);
-
-	if (file == -1) {
-		const char*	operation;
+	do {
+		file = ::open(name, create_flag, os_innodb_umask);
 
-		operation = create_mode == OS_FILE_CREATE ? "create" : "open";
+		if (file == -1) {
+			const char*	operation;
 
-		*success = FALSE;
+			operation = (create_mode == OS_FILE_CREATE
+				     && !srv_read_only_mode)
+				? "create" : "open";
 
-		if (on_error_no_exit) {
-			retry = os_file_handle_error_no_exit(
-				name, operation, on_error_silent);
-		} else {
-			retry = os_file_handle_error(name, operation);
-		}
+			*success = FALSE;
 
-		if (retry) {
-			goto try_again;
+			if (on_error_no_exit) {
+				retry = os_file_handle_error_no_exit(
+					name, operation, on_error_silent);
+			} else {
+				retry = os_file_handle_error(name, operation);
+			}
 		} else {
-			return(file /* -1 */);
+			*success = TRUE;
+			retry = false;
 		}
-	}
-	/* else */
 
-	*success = TRUE;
+	} while (retry);
 
 	/* We disable OS caching (O_DIRECT) only on data files */
-	if (type != OS_LOG_FILE
-	    && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT) {
+
+	if (!srv_read_only_mode
+	    && *success
+	    && type != OS_LOG_FILE
+	    && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT
+		|| srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)) {
 
 		os_file_set_nocache(file, name, mode_str);
 	}
 
 #ifdef USE_FILE_LOCK
-	if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
+	if (!srv_read_only_mode
+	    && *success
+	    && create_mode != OS_FILE_OPEN_RAW
+	    && os_file_lock(file, name)) {
 
 		if (create_mode == OS_FILE_OPEN_RETRY) {
-			int i;
-			ut_print_timestamp(stderr);
-			fputs(" InnoDB: Retrying to lock"
-			      " the first data file\n",
-			      stderr);
-			for (i = 0; i < 100; i++) {
+
+			ut_a(!srv_read_only_mode);
+
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Retrying to lock the first data file");
+
+			for (int i = 0; i < 100; i++) {
 				os_thread_sleep(1000000);
+
 				if (!os_file_lock(file, name)) {
 					*success = TRUE;
 					return(file);
 				}
 			}
-			ut_print_timestamp(stderr);
-			fputs(" InnoDB: Unable to open the first data file\n",
-			      stderr);
+
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Unable to open the first data file");
 		}
 
 		*success = FALSE;
@@ -1669,22 +1790,23 @@ try_again:
 	}
 #endif /* USE_FILE_LOCK */
 
-	return(file);
 #endif /* __WIN__ */
+
+	return(file);
 }
 
 /***********************************************************************//**
 Deletes a file if it exists. The file has to be closed before calling this.
 @return	TRUE if success */
 UNIV_INTERN
-ibool
+bool
 os_file_delete_if_exists(
 /*=====================*/
 	const char*	name)	/*!< in: file path as a null-terminated
 				string */
 {
 #ifdef __WIN__
-	BOOL	ret;
+	bool	ret;
 	ulint	count	= 0;
 loop:
 	/* In Windows, deleting an .ibd file may fail if ibbackup is copying
@@ -1693,31 +1815,30 @@ loop:
 	ret = DeleteFile((LPCTSTR) name);
 
 	if (ret) {
-		return(TRUE);
+		return(true);
 	}
 
-	if (GetLastError() == ERROR_FILE_NOT_FOUND) {
+	DWORD lasterr = GetLastError();
+	if (lasterr == ERROR_FILE_NOT_FOUND
+	    || lasterr == ERROR_PATH_NOT_FOUND) {
 		/* the file does not exist, this not an error */
 
-		return(TRUE);
+		return(true);
 	}
 
 	count++;
 
 	if (count > 100 && 0 == (count % 10)) {
-		fprintf(stderr,
-			"InnoDB: Warning: cannot delete file %s\n"
-			"InnoDB: Are you running ibbackup"
-			" to back up the file?\n", name);
+		os_file_get_last_error(true); /* print error information */
 
-		os_file_get_last_error(TRUE); /* print error information */
+		ib_logf(IB_LOG_LEVEL_WARN, "Delete of file %s failed.", name);
 	}
 
 	os_thread_sleep(1000000);	/* sleep for a second */
 
 	if (count > 2000) {
 
-		return(FALSE);
+		return(false);
 	}
 
 	goto loop;
@@ -1729,18 +1850,18 @@ loop:
 	if (ret != 0 && errno != ENOENT) {
 		os_file_handle_error_no_exit(name, "delete", FALSE);
 
-		return(FALSE);
+		return(false);
 	}
 
-	return(TRUE);
-#endif
+	return(true);
+#endif /* __WIN__ */
 }
 
 /***********************************************************************//**
 Deletes a file. The file has to be closed before calling this.
 @return	TRUE if success */
 UNIV_INTERN
-ibool
+bool
 os_file_delete(
 /*===========*/
 	const char*	name)	/*!< in: file path as a null-terminated
@@ -1756,32 +1877,32 @@ loop:
 	ret = DeleteFile((LPCTSTR) name);
 
 	if (ret) {
-		return(TRUE);
+		return(true);
 	}
 
 	if (GetLastError() == ERROR_FILE_NOT_FOUND) {
 		/* If the file does not exist, we classify this as a 'mild'
 		error and return */
 
-		return(FALSE);
+		return(false);
 	}
 
 	count++;
 
 	if (count > 100 && 0 == (count % 10)) {
+		os_file_get_last_error(true); /* print error information */
+
 		fprintf(stderr,
 			"InnoDB: Warning: cannot delete file %s\n"
 			"InnoDB: Are you running ibbackup"
 			" to back up the file?\n", name);
-
-		os_file_get_last_error(TRUE); /* print error information */
 	}
 
 	os_thread_sleep(1000000);	/* sleep for a second */
 
 	if (count > 2000) {
 
-		return(FALSE);
+		return(false);
 	}
 
 	goto loop;
@@ -1793,10 +1914,10 @@ loop:
 	if (ret != 0) {
 		os_file_handle_error_no_exit(name, "delete", FALSE);
 
-		return(FALSE);
+		return(false);
 	}
 
-	return(TRUE);
+	return(true);
 #endif
 }
 
@@ -1813,6 +1934,19 @@ os_file_rename_func(
 				string */
 	const char*	newpath)/*!< in: new file path */
 {
+#ifdef UNIV_DEBUG
+	os_file_type_t	type;
+	ibool		exists;
+
+	/* New path must not exist. */
+	ut_ad(os_file_status(newpath, &exists, &type));
+	ut_ad(!exists);
+
+	/* Old path must exist. */
+	ut_ad(os_file_status(oldpath, &exists, &type));
+	ut_ad(exists);
+#endif /* UNIV_DEBUG */
+
 #ifdef __WIN__
 	BOOL	ret;
 
@@ -1837,7 +1971,7 @@ os_file_rename_func(
 	}
 
 	return(TRUE);
-#endif
+#endif /* __WIN__ */
 }
 
 /***********************************************************************//**
@@ -1877,7 +2011,7 @@ os_file_close_func(
 	}
 
 	return(TRUE);
-#endif
+#endif /* __WIN__ */
 }
 
 #ifdef UNIV_HOTBACKUP
@@ -1913,7 +2047,7 @@ os_file_close_no_error_handling(
 	}
 
 	return(TRUE);
-#endif
+#endif /* __WIN__ */
 }
 #endif /* UNIV_HOTBACKUP */
 
@@ -1942,7 +2076,7 @@ os_file_get_size(
 	return(offset);
 #else
 	return((os_offset_t) lseek(file, 0, SEEK_END));
-#endif
+#endif /* __WIN__ */
 }
 
 /***********************************************************************//**
@@ -2175,10 +2309,7 @@ os_file_flush_func(
 		return(TRUE);
 	}
 
-	ut_print_timestamp(stderr);
-
-	fprintf(stderr,
-		" InnoDB: Error: the OS said file flush did not succeed\n");
+	ib_logf(IB_LOG_LEVEL_ERROR, "The OS said file flush did not succeed");
 
 	os_file_handle_error(NULL, "flush");
 
@@ -2215,9 +2346,9 @@ os_file_pread(
 	offs = (off_t) offset;
 
 	if (sizeof(off_t) <= 4) {
-		if (UNIV_UNLIKELY(offset != (os_offset_t) offs)) {
-			fprintf(stderr,
-				"InnoDB: Error: file read at offset > 4 GB\n");
+		if (offset != (os_offset_t) offs) {
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"File read at offset > 4 GB");
 		}
 	}
 
@@ -2318,16 +2449,16 @@ os_file_pwrite(
 	off_t	offs;
 
 	ut_ad(n);
+	ut_ad(!srv_read_only_mode);
 
 	/* If off_t is > 4 bytes in size, then we assume we can pass a
 	64-bit address */
 	offs = (off_t) offset;
 
 	if (sizeof(off_t) <= 4) {
-		if (UNIV_UNLIKELY(offset != (os_offset_t) offs)) {
-			fprintf(stderr,
-				"InnoDB: Error: file write"
-				" at offset > 4 GB\n");
+		if (offset != (os_offset_t) offs) {
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"File write at offset > 4 GB.");
 		}
 	}
 
@@ -2402,7 +2533,7 @@ func_exit:
 
 		return(ret);
 	}
-#endif
+#endif /* !UNIV_HOTBACKUP */
 }
 #endif
 
@@ -2503,11 +2634,9 @@ try_again:
 		return(TRUE);
 	}
 
-	fprintf(stderr,
-		"InnoDB: Error: tried to read "ULINTPF" bytes at offset "
-		UINT64PF"\n"
-		"InnoDB: Was only able to read %ld.\n",
-		n, offset, (lint) ret);
+	ib_logf(IB_LOG_LEVEL_ERROR,
+		"Tried to read "ULINTPF" bytes at offset " UINT64PF". "
+		"Was only able to read %ld.", n, offset, (lint) ret);
 #endif /* __WIN__ */
 #ifdef __WIN__
 error_handling:
@@ -2525,7 +2654,7 @@ error_handling:
 		(ulong) GetLastError()
 #else
 		(ulong) errno
-#endif
+#endif /* __WIN__ */
 		);
 	fflush(stderr);
 
@@ -2683,6 +2812,8 @@ os_file_write_func(
 	os_offset_t	offset,	/*!< in: file offset where to write */
 	ulint		n)	/*!< in: number of bytes to write */
 {
+	ut_ad(!srv_read_only_mode);
+
 #ifdef __WIN__
 	BOOL		ret;
 	DWORD		len;
@@ -2842,8 +2973,8 @@ retry:
 			(ulint) errno);
 		if (strerror(errno) != NULL) {
 			fprintf(stderr,
-				"InnoDB: Error number %lu means '%s'.\n",
-				(ulint) errno, strerror(errno));
+				"InnoDB: Error number %d means '%s'.\n",
+				errno, strerror(errno));
 		}
 
 		fprintf(stderr,
@@ -2866,15 +2997,15 @@ UNIV_INTERN
 ibool
 os_file_status(
 /*===========*/
-	const char*	path,	/*!< in:	pathname of the file */
+	const char*	path,	/*!< in: pathname of the file */
 	ibool*		exists,	/*!< out: TRUE if file exists */
 	os_file_type_t* type)	/*!< out: type of the file (if it exists) */
 {
 #ifdef __WIN__
 	int		ret;
-	struct _stat	statinfo;
+	struct _stat64	statinfo;
 
-	ret = _stat(path, &statinfo);
+	ret = _stat64(path, &statinfo);
 	if (ret && (errno == ENOENT || errno == ENOTDIR)) {
 		/* file does not exist */
 		*exists = FALSE;
@@ -2933,47 +3064,73 @@ os_file_status(
 
 /*******************************************************************//**
 This function returns information about the specified file
-@return	TRUE if stat information found */
+@return	DB_SUCCESS if all OK */
 UNIV_INTERN
-ibool
+dberr_t
 os_file_get_status(
 /*===============*/
 	const char*	path,		/*!< in:	pathname of the file */
-	os_file_stat_t* stat_info)	/*!< information of a file in a
+	os_file_stat_t* stat_info,	/*!< information of a file in a
 					directory */
+	bool		check_rw_perm)	/*!< in: for testing whether the
+					file can be opened in RW mode */
 {
-#ifdef __WIN__
 	int		ret;
-	struct _stat	statinfo;
 
-	ret = _stat(path, &statinfo);
+#ifdef __WIN__
+	struct _stat64	statinfo;
+
+	ret = _stat64(path, &statinfo);
+
 	if (ret && (errno == ENOENT || errno == ENOTDIR)) {
 		/* file does not exist */
 
-		return(FALSE);
+		return(DB_NOT_FOUND);
+
 	} else if (ret) {
 		/* file exists, but stat call failed */
 
 		os_file_handle_error_no_exit(path, "stat", FALSE);
 
-		return(FALSE);
-	}
-	if (_S_IFDIR & statinfo.st_mode) {
+		return(DB_FAIL);
+
+	} else if (_S_IFDIR & statinfo.st_mode) {
 		stat_info->type = OS_FILE_TYPE_DIR;
 	} else if (_S_IFREG & statinfo.st_mode) {
+
+		DWORD	access = GENERIC_READ;
+
+		if (!srv_read_only_mode) {
+			access |= GENERIC_WRITE;
+		}
+
 		stat_info->type = OS_FILE_TYPE_FILE;
+
+		/* Check if we can open it in read-only mode. */
+
+		if (check_rw_perm) {
+			HANDLE	fh;
+
+			fh = CreateFile(
+				(LPCTSTR) path,		// File to open
+				access,
+				0,			// No sharing
+				NULL,			// Default security
+				OPEN_EXISTING,		// Existing file only
+				FILE_ATTRIBUTE_NORMAL,	// Normal file
+				NULL);			// No attr. template
+
+			if (fh == INVALID_HANDLE_VALUE) {
+				stat_info->rw_perm = false;
+			} else {
+				stat_info->rw_perm = true;
+				CloseHandle(fh);
+			}
+		}
 	} else {
 		stat_info->type = OS_FILE_TYPE_UNKNOWN;
 	}
-
-	stat_info->ctime = statinfo.st_ctime;
-	stat_info->atime = statinfo.st_atime;
-	stat_info->mtime = statinfo.st_mtime;
-	stat_info->size	 = statinfo.st_size;
-
-	return(TRUE);
 #else
-	int		ret;
 	struct stat	statinfo;
 
 	ret = stat(path, &statinfo);
@@ -2981,32 +3138,49 @@ os_file_get_status(
 	if (ret && (errno == ENOENT || errno == ENOTDIR)) {
 		/* file does not exist */
 
-		return(FALSE);
+		return(DB_NOT_FOUND);
+
 	} else if (ret) {
 		/* file exists, but stat call failed */
 
 		os_file_handle_error_no_exit(path, "stat", FALSE);
 
-		return(FALSE);
-	}
+		return(DB_FAIL);
 
-	if (S_ISDIR(statinfo.st_mode)) {
+	} else if (S_ISDIR(statinfo.st_mode)) {
 		stat_info->type = OS_FILE_TYPE_DIR;
 	} else if (S_ISLNK(statinfo.st_mode)) {
 		stat_info->type = OS_FILE_TYPE_LINK;
 	} else if (S_ISREG(statinfo.st_mode)) {
 		stat_info->type = OS_FILE_TYPE_FILE;
+
+		if (check_rw_perm) {
+			int	fh;
+			int	access;
+
+			access = !srv_read_only_mode ? O_RDWR : O_RDONLY;
+
+			fh = ::open(path, access, os_innodb_umask);
+
+			if (fh == -1) {
+				stat_info->rw_perm = false;
+			} else {
+				stat_info->rw_perm = true;
+				close(fh);
+			}
+		}
 	} else {
 		stat_info->type = OS_FILE_TYPE_UNKNOWN;
 	}
 
+#endif /* _WIN_ */
+
 	stat_info->ctime = statinfo.st_ctime;
 	stat_info->atime = statinfo.st_atime;
 	stat_info->mtime = statinfo.st_mtime;
-	stat_info->size	 = statinfo.st_size;
+	stat_info->size  = statinfo.st_size;
 
-	return(TRUE);
-#endif
+	return(DB_SUCCESS);
 }
 
 /* path name separator character */
@@ -3017,6 +3191,153 @@ os_file_get_status(
 #endif
 
 /****************************************************************//**
+This function returns a new path name after replacing the basename
+in an old path with a new basename.  The old_path is a full path
+name including the extension.  The tablename is in the normal
+form "databasename/tablename".  The new base name is found after
+the forward slash.  Both input strings are null terminated.
+
+This function allocates memory to be returned.  It is the callers
+responsibility to free the return value after it is no longer needed.
+
+@return	own: new full pathname */
+UNIV_INTERN
+char*
+os_file_make_new_pathname(
+/*======================*/
+	const char*	old_path,	/*!< in: pathname */
+	const char*	tablename)	/*!< in: contains new base name */
+{
+	ulint		dir_len;
+	char*		last_slash;
+	char*		base_name;
+	char*		new_path;
+	ulint		new_path_len;
+
+	/* Split the tablename into its database and table name components.
+	They are separated by a '/'. */
+	last_slash = strrchr((char*) tablename, '/');
+	base_name = last_slash ? last_slash + 1 : (char*) tablename;
+
+	/* Find the offset of the last slash. We will strip off the
+	old basename.ibd which starts after that slash. */
+	last_slash = strrchr((char*) old_path, OS_FILE_PATH_SEPARATOR);
+	dir_len = last_slash ? last_slash - old_path : strlen(old_path);
+
+	/* allocate a new path and move the old directory path to it. */
+	new_path_len = dir_len + strlen(base_name) + sizeof "/.ibd";
+	new_path = static_cast<char*>(mem_alloc(new_path_len));
+	memcpy(new_path, old_path, dir_len);
+
+	ut_snprintf(new_path + dir_len,
+		    new_path_len - dir_len,
+		    "%c%s.ibd",
+		    OS_FILE_PATH_SEPARATOR,
+		    base_name);
+
+	return(new_path);
+}
+
+/****************************************************************//**
+This function returns a remote path name by combining a data directory
+path provided in a DATA DIRECTORY clause with the tablename which is
+in the form 'database/tablename'.  It strips the file basename (which
+is the tablename) found after the last directory in the path provided.
+The full filepath created will include the database name as a directory
+under the path provided.  The filename is the tablename with the '.ibd'
+extension. All input and output strings are null-terminated.
+
+This function allocates memory to be returned.  It is the callers
+responsibility to free the return value after it is no longer needed.
+
+@return	own: A full pathname; data_dir_path/databasename/tablename.ibd */
+UNIV_INTERN
+char*
+os_file_make_remote_pathname(
+/*=========================*/
+	const char*	data_dir_path,	/*!< in: pathname */
+	const char*	tablename,	/*!< in: tablename */
+	const char*	extention)	/*!< in: file extention; ibd,cfg */
+{
+	ulint		data_dir_len;
+	char*		last_slash;
+	char*		new_path;
+	ulint		new_path_len;
+
+	ut_ad(extention && strlen(extention) == 3);
+
+	/* Find the offset of the last slash. We will strip off the
+	old basename or tablename which starts after that slash. */
+	last_slash = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
+	data_dir_len = last_slash ? last_slash - data_dir_path : strlen(data_dir_path);
+
+	/* allocate a new path and move the old directory path to it. */
+	new_path_len = data_dir_len + strlen(tablename)
+		       + sizeof "/." + strlen(extention);
+	new_path = static_cast<char*>(mem_alloc(new_path_len));
+	memcpy(new_path, data_dir_path, data_dir_len);
+	ut_snprintf(new_path + data_dir_len,
+		    new_path_len - data_dir_len,
+		    "%c%s.%s",
+		    OS_FILE_PATH_SEPARATOR,
+		    tablename,
+		    extention);
+
+	srv_normalize_path_for_win(new_path);
+
+	return(new_path);
+}
+
+/****************************************************************//**
+This function reduces a null-terminated full remote path name into
+the path that is sent by MySQL for DATA DIRECTORY clause.  It replaces
+the 'databasename/tablename.ibd' found at the end of the path with just
+'tablename'.
+
+Since the result is always smaller than the path sent in, no new memory
+is allocated. The caller should allocate memory for the path sent in.
+This function manipulates that path in place.
+
+If the path format is not as expected, just return.  The result is used
+to inform a SHOW CREATE TABLE command. */
+UNIV_INTERN
+void
+os_file_make_data_dir_path(
+/*========================*/
+	char*	data_dir_path)	/*!< in/out: full path/data_dir_path */
+{
+	char*	ptr;
+	char*	tablename;
+	ulint	tablename_len;
+
+	/* Replace the period before the extension with a null byte. */
+	ptr = strrchr((char*) data_dir_path, '.');
+	if (!ptr) {
+		return;
+	}
+	ptr[0] = '\0';
+
+	/* The tablename starts after the last slash. */
+	ptr = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
+	if (!ptr) {
+		return;
+	}
+	ptr[0] = '\0';
+	tablename = ptr + 1;
+
+	/* The databasename starts after the next to last slash. */
+	ptr = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
+	if (!ptr) {
+		return;
+	}
+	tablename_len = ut_strlen(tablename);
+
+	ut_memmove(++ptr, tablename, tablename_len);
+
+	ptr[tablename_len] = '\0';
+}
+
+/****************************************************************//**
 The function os_file_dirname returns a directory component of a
 null-terminated pathname string. In the usual case, dirname returns
 the string up to, but not including, the final '/', and basename
@@ -3080,11 +3401,18 @@ os_file_create_subdirs_if_needed(
 /*=============================*/
 	const char*	path)	/*!< in: path name */
 {
-	char*		subdir;
-	ibool		success, subdir_exists;
-	os_file_type_t	type;
+	if (srv_read_only_mode) {
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"read only mode set. Can't create subdirectories '%s'",
+			path);
+
+		return(FALSE);
+
+	}
+
+	char*	subdir = os_file_dirname(path);
 
-	subdir = os_file_dirname(path);
 	if (strlen(subdir) == 1
 	    && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) {
 		/* subdir is root or cwd, nothing to do */
@@ -3094,15 +3422,21 @@ os_file_create_subdirs_if_needed(
 	}
 
 	/* Test if subdir exists */
-	success = os_file_status(subdir, &subdir_exists, &type);
+	os_file_type_t	type;
+	ibool	subdir_exists;
+	ibool	success = os_file_status(subdir, &subdir_exists, &type);
+
 	if (success && !subdir_exists) {
+
 		/* subdir does not exist, create it */
 		success = os_file_create_subdirs_if_needed(subdir);
+
 		if (!success) {
 			mem_free(subdir);
 
 			return(FALSE);
 		}
+
 		success = os_file_create_directory(subdir, FALSE);
 	}
 
@@ -3124,7 +3458,7 @@ os_aio_array_get_nth_slot(
 {
 	ut_a(index < array->n_slots);
 
-	return((array->slots) + index);
+	return(&array->slots[index]);
 }
 
 #if defined(LINUX_NATIVE_AIO)
@@ -3226,43 +3560,74 @@ os_aio_native_aio_supported(void)
 /*=============================*/
 {
 	int			fd;
-	byte*			buf;
-	byte*			ptr;
-	struct io_event		io_event;
 	io_context_t		io_ctx;
-	struct iocb		iocb;
-	struct iocb*		p_iocb;
-	int			err;
+	char			name[1000];
 
 	if (!os_aio_linux_create_io_ctx(1, &io_ctx)) {
 		/* The platform does not support native aio. */
 		return(FALSE);
-	}
+	} else if (!srv_read_only_mode) {
+		/* Now check if tmpdir supports native aio ops. */
+		fd = innobase_mysql_tmpfile();
 
-	/* Now check if tmpdir supports native aio ops. */
-	fd = innobase_mysql_tmpfile();
+		if (fd < 0) {
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Unable to create temp file to check "
+				"native AIO support.");
 
-	if (fd < 0) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Error: unable to create "
-			"temp file to check native AIO support.\n");
+			return(FALSE);
+		}
+	} else {
 
-		return(FALSE);
+		srv_normalize_path_for_win(srv_log_group_home_dir);
+
+		ulint	dirnamelen = strlen(srv_log_group_home_dir);
+		ut_a(dirnamelen < (sizeof name) - 10 - sizeof "ib_logfile");
+		memcpy(name, srv_log_group_home_dir, dirnamelen);
+
+		/* Add a path separator if needed. */
+		if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
+			name[dirnamelen++] = SRV_PATH_SEPARATOR;
+		}
+
+		strcpy(name + dirnamelen, "ib_logfile0");
+
+		fd = ::open(name, O_RDONLY);
+
+		if (fd == -1) {
+
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Unable to open \"%s\" to check "
+				"native AIO read support.", name);
+
+			return(FALSE);
+		}
 	}
 
+	struct io_event	io_event;
+
 	memset(&io_event, 0x0, sizeof(io_event));
 
-	buf = static_cast<byte*>(ut_malloc(UNIV_PAGE_SIZE * 2));
-	ptr = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
+	byte*	buf = static_cast<byte*>(ut_malloc(UNIV_PAGE_SIZE * 2));
+	byte*	ptr = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
+
+	struct iocb	iocb;
 
 	/* Suppress valgrind warning. */
 	memset(buf, 0x00, UNIV_PAGE_SIZE * 2);
-
 	memset(&iocb, 0x0, sizeof(iocb));
-	p_iocb = &iocb;
-	io_prep_pwrite(p_iocb, fd, ptr, UNIV_PAGE_SIZE, 0);
 
-	err = io_submit(io_ctx, 1, &p_iocb);
+	struct iocb*	p_iocb = &iocb;
+
+	if (!srv_read_only_mode) {
+		io_prep_pwrite(p_iocb, fd, ptr, UNIV_PAGE_SIZE, 0);
+	} else {
+		ut_a(UNIV_PAGE_SIZE >= 512);
+		io_prep_pread(p_iocb, fd, ptr, 512, 0);
+	}
+
+	int	err = io_submit(io_ctx, 1, &p_iocb);
+
 	if (err >= 1) {
 		/* Now collect the submitted IO request. */
 		err = io_getevents(io_ctx, 1, 1, &io_event, NULL);
@@ -3277,22 +3642,18 @@ os_aio_native_aio_supported(void)
 
 	case -EINVAL:
 	case -ENOSYS:
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Error: Linux Native AIO is not"
-			" supported on tmpdir.\n"
-			"InnoDB: You can either move tmpdir to a"
-			" file system that supports native AIO\n"
-			"InnoDB: or you can set"
-			" innodb_use_native_aio to FALSE to avoid"
-			" this message.\n");
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Linux Native AIO not supported. You can either "
+			"move %s to a file system that supports native "
+			"AIO or you can set innodb_use_native_aio to "
+			"FALSE to avoid this message.",
+			srv_read_only_mode ? name : "tmpdir");
 
 		/* fall through. */
 	default:
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Error: Linux Native AIO check"
-			" on tmpdir returned error[%d]\n", -err);
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Linux Native AIO check on %s returned error[%d]",
+			srv_read_only_mode ? name : "tmpdir", -err);
 	}
 
 	return(FALSE);
@@ -3314,34 +3675,33 @@ os_aio_array_create(
 	ulint	n_segments)	/*!< in: number of segments in the aio array */
 {
 	os_aio_array_t*	array;
-	ulint		i;
-	os_aio_slot_t*	slot;
 #ifdef WIN_ASYNC_IO
 	OVERLAPPED*	over;
 #elif defined(LINUX_NATIVE_AIO)
 	struct io_event*	io_event = NULL;
-#endif
+#endif /* WIN_ASYNC_IO */
 	ut_a(n > 0);
 	ut_a(n_segments > 0);
 
-	array = static_cast<os_aio_array_t*>(ut_malloc(sizeof(os_aio_array_t)));
+	array = static_cast<os_aio_array_t*>(ut_malloc(sizeof(*array)));
+	memset(array, 0x0, sizeof(*array));
 
-	array->mutex		= os_mutex_create();
-	array->not_full		= os_event_create(NULL);
-	array->is_empty		= os_event_create(NULL);
+	array->mutex = os_mutex_create();
+	array->not_full = os_event_create();
+	array->is_empty = os_event_create();
 
 	os_event_set(array->is_empty);
 
-	array->n_slots		= n;
-	array->n_segments	= n_segments;
-	array->n_reserved	= 0;
-	array->cur_seg		= 0;
+	array->n_slots = n;
+	array->n_segments = n_segments;
 
 	array->slots = static_cast<os_aio_slot_t*>(
-		ut_malloc(n * sizeof(os_aio_slot_t)));
+		ut_malloc(n * sizeof(*array->slots)));
+
+	memset(array->slots, 0x0, sizeof(n * sizeof(*array->slots)));
 #ifdef __WIN__
 	array->handles = static_cast<HANDLE*>(ut_malloc(n * sizeof(HANDLE)));
-#endif
+#endif /* __WIN__ */
 
 #if defined(LINUX_NATIVE_AIO)
 	array->aio_ctx = NULL;
@@ -3359,16 +3719,27 @@ os_aio_array_create(
 	array->aio_ctx = static_cast<io_context**>(
 		ut_malloc(n_segments * sizeof(*array->aio_ctx)));
 
-	for (i = 0; i < n_segments; ++i) {
+	for (ulint i = 0; i < n_segments; ++i) {
 		if (!os_aio_linux_create_io_ctx(n/n_segments,
 						&array->aio_ctx[i])) {
 			/* If something bad happened during aio setup
-			we should call it a day and return right away.
-			We don't care about any leaks because a failure
-			to initialize the io subsystem means that the
-			server (or atleast the innodb storage engine)
-			is not going to startup. */
-			return(NULL);
+			we disable linux native aio.
+                        The disadvantage will be a small memory leak
+                        at shutdown but that's ok compared to a crash
+                        or a not working server.
+                        This frequently happens when running the test suite
+                        with many threads on a system with low fs.aio-max-nr!
+                        */
+
+                        fprintf(stderr,
+                                "  InnoDB: Warning: Linux Native AIO disabled "
+                                "because os_aio_linux_create_io_ctx() "
+                                "failed. To get rid of this warning you can "
+                                "try increasing system "
+                                "fs.aio-max-nr to 1048576 or larger or "
+                                "setting innodb_use_native_aio = 0 in my.cnf\n");
+                        srv_use_native_aio = FALSE;
+			goto skip_native_aio;
 		}
 	}
 
@@ -3381,7 +3752,9 @@ os_aio_array_create(
 
 skip_native_aio:
 #endif /* LINUX_NATIVE_AIO */
-	for (i = 0; i < n; i++) {
+	for (ulint i = 0; i < n; i++) {
+		os_aio_slot_t*	slot;
+
 		slot = os_aio_array_get_nth_slot(array, i);
 
 		slot->pos = i;
@@ -3389,18 +3762,17 @@ skip_native_aio:
 #ifdef WIN_ASYNC_IO
 		slot->handle = CreateEvent(NULL,TRUE, FALSE, NULL);
 
-		over = &(slot->control);
+		over = &slot->control;
 
 		over->hEvent = slot->handle;
 
-		*((array->handles) + i) = over->hEvent;
+		array->handles[i] = over->hEvent;
 
 #elif defined(LINUX_NATIVE_AIO)
-
 		memset(&slot->control, 0x0, sizeof(slot->control));
 		slot->n_bytes = 0;
 		slot->ret = 0;
-#endif
+#endif /* WIN_ASYNC_IO */
 	}
 
 	return(array);
@@ -3412,7 +3784,7 @@ static
 void
 os_aio_array_free(
 /*==============*/
-	os_aio_array_t*	array)	/*!< in, own: array to free */
+	os_aio_array_t*& array)	/*!< in, own: array to free */
 {
 #ifdef WIN_ASYNC_IO
 	ulint	i;
@@ -3439,6 +3811,8 @@ os_aio_array_free(
 
 	ut_free(array->slots);
 	ut_free(array);
+
+	array = 0;
 }
 
 /***********************************************************************
@@ -3459,93 +3833,100 @@ os_aio_init(
 	ulint	n_slots_sync)	/*<! in: number of slots in the sync aio
 				array */
 {
-	ulint	i;
-	ulint 	n_segments = 2 + n_read_segs + n_write_segs;
-
-	ut_ad(n_segments >= 4);
-
 	os_io_init_simple();
 
 #if defined(LINUX_NATIVE_AIO)
 	/* Check if native aio is supported on this system and tmpfs */
-	if (srv_use_native_aio
-	    && !os_aio_native_aio_supported()) {
+	if (srv_use_native_aio && !os_aio_native_aio_supported()) {
+
+		ib_logf(IB_LOG_LEVEL_WARN, "Linux Native AIO disabled.");
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Warning: Linux Native AIO"
-			" disabled.\n");
 		srv_use_native_aio = FALSE;
 	}
 #endif /* LINUX_NATIVE_AIO */
 
-	for (i = 0; i < n_segments; i++) {
-		srv_set_io_thread_op_info(i, "not started yet");
-	}
-
+	srv_reset_io_thread_op_info();
 
-	/* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
+	os_aio_read_array = os_aio_array_create(
+		n_read_segs * n_per_seg, n_read_segs);
 
-	os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
-	if (os_aio_ibuf_array == NULL) {
-		goto err_exit;
+	if (os_aio_read_array == NULL) {
+		return(FALSE);
 	}
 
-	srv_io_thread_function[0] = "insert buffer thread";
+	ulint	start = (srv_read_only_mode) ? 0 : 2;
+	ulint	n_segs = n_read_segs + start;
 
-	os_aio_log_array = os_aio_array_create(n_per_seg, 1);
-	if (os_aio_log_array == NULL) {
-		goto err_exit;
+	/* 0 is the ibuf segment and 1 is the insert buffer segment. */
+	for (ulint i = start; i < n_segs; ++i) {
+		ut_a(i < SRV_MAX_N_IO_THREADS);
+		srv_io_thread_function[i] = "read thread";
 	}
 
-	srv_io_thread_function[1] = "log thread";
+	ulint	n_segments = n_read_segs;
 
-	os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
-						n_read_segs);
-	if (os_aio_read_array == NULL) {
-		goto err_exit;
-	}
+	if (!srv_read_only_mode) {
 
-	for (i = 2; i < 2 + n_read_segs; i++) {
-		ut_a(i < SRV_MAX_N_IO_THREADS);
-		srv_io_thread_function[i] = "read thread";
-	}
+		os_aio_log_array = os_aio_array_create(n_per_seg, 1);
 
-	os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
-						 n_write_segs);
-	if (os_aio_write_array == NULL) {
-		goto err_exit;
-	}
+		if (os_aio_log_array == NULL) {
+			return(FALSE);
+		}
 
-	for (i = 2 + n_read_segs; i < n_segments; i++) {
-		ut_a(i < SRV_MAX_N_IO_THREADS);
-		srv_io_thread_function[i] = "write thread";
+		++n_segments;
+
+		srv_io_thread_function[1] = "log thread";
+
+		os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
+
+		if (os_aio_ibuf_array == NULL) {
+			return(FALSE);
+		}
+
+		++n_segments;
+
+		srv_io_thread_function[0] = "insert buffer thread";
+
+		os_aio_write_array = os_aio_array_create(
+			n_write_segs * n_per_seg, n_write_segs);
+
+		if (os_aio_write_array == NULL) {
+			return(FALSE);
+		}
+
+		n_segments += n_write_segs;
+
+		for (ulint i = start + n_read_segs; i < n_segments; ++i) {
+			ut_a(i < SRV_MAX_N_IO_THREADS);
+			srv_io_thread_function[i] = "write thread";
+		}
+
+		ut_ad(n_segments >= 4);
+	} else {
+		ut_ad(n_segments > 0);
 	}
 
 	os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
+
 	if (os_aio_sync_array == NULL) {
-		goto err_exit;
+		return(FALSE);
 	}
 
-
 	os_aio_n_segments = n_segments;
 
 	os_aio_validate();
 
-	os_aio_segment_wait_events = static_cast<os_event_struct_t**>(
-		ut_malloc(n_segments * sizeof(void*)));
+	os_aio_segment_wait_events = static_cast<os_event_t*>(
+		ut_malloc(n_segments * sizeof *os_aio_segment_wait_events));
 
-	for (i = 0; i < n_segments; i++) {
-		os_aio_segment_wait_events[i] = os_event_create(NULL);
+	for (ulint i = 0; i < n_segments; ++i) {
+		os_aio_segment_wait_events[i] = os_event_create();
 	}
 
-	os_last_printout = time(NULL);
+	os_last_printout = ut_time();
 
 	return(TRUE);
 
-err_exit:
-	return(FALSE);
-
 }
 
 /***********************************************************************
@@ -3555,20 +3936,25 @@ void
 os_aio_free(void)
 /*=============*/
 {
-	ulint	i;
+	if (os_aio_ibuf_array != 0) {
+		os_aio_array_free(os_aio_ibuf_array);
+	}
+
+	if (os_aio_log_array != 0) {
+		os_aio_array_free(os_aio_log_array);
+	}
+
+	if (os_aio_write_array != 0) {
+		os_aio_array_free(os_aio_write_array);
+	}
+
+	if (os_aio_sync_array != 0) {
+		os_aio_array_free(os_aio_sync_array);
+	}
 
-	os_aio_array_free(os_aio_ibuf_array);
-	os_aio_ibuf_array = NULL;
-	os_aio_array_free(os_aio_log_array);
-	os_aio_log_array = NULL;
 	os_aio_array_free(os_aio_read_array);
-	os_aio_read_array = NULL;
-	os_aio_array_free(os_aio_write_array);
-	os_aio_write_array = NULL;
-	os_aio_array_free(os_aio_sync_array);
-	os_aio_sync_array = NULL;
 
-	for (i = 0; i < os_aio_n_segments; i++) {
+	for (ulint i = 0; i < os_aio_n_segments; i++) {
 		os_event_free(os_aio_segment_wait_events[i]);
 	}
 
@@ -3604,14 +3990,20 @@ void
 os_aio_wake_all_threads_at_shutdown(void)
 /*=====================================*/
 {
-	ulint	i;
-
 #ifdef WIN_ASYNC_IO
 	/* This code wakes up all ai/o threads in Windows native aio */
 	os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array);
-	os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
-	os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
-	os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
+	if (os_aio_write_array != 0) {
+		os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
+	}
+
+	if (os_aio_ibuf_array != 0) {
+		os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
+	}
+
+	if (os_aio_log_array != 0) {
+		os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
+	}
 
 #elif defined(LINUX_NATIVE_AIO)
 
@@ -3623,12 +4015,14 @@ os_aio_wake_all_threads_at_shutdown(void)
 	if (srv_use_native_aio) {
 		return;
 	}
+
 	/* Fall through to simulated AIO handler wakeup if we are
 	not using native AIO. */
-#endif
+#endif /* !WIN_ASYNC_AIO */
+
 	/* This loop wakes up all simulated ai/o threads */
 
-	for (i = 0; i < os_aio_n_segments; i++) {
+	for (ulint i = 0; i < os_aio_n_segments; i++) {
 
 		os_event_set(os_aio_segment_wait_events[i]);
 	}
@@ -3642,6 +4036,7 @@ void
 os_aio_wait_until_no_pending_writes(void)
 /*=====================================*/
 {
+	ut_ad(!srv_read_only_mode);
 	os_event_wait(os_aio_write_array->is_empty);
 }
 
@@ -3660,10 +4055,14 @@ os_aio_get_segment_no_from_slot(
 	ulint	seg_len;
 
 	if (array == os_aio_ibuf_array) {
-		segment = 0;
+		ut_ad(!srv_read_only_mode);
+
+		segment = IO_IBUF_SEGMENT;
 
 	} else if (array == os_aio_log_array) {
-		segment = 1;
+		ut_ad(!srv_read_only_mode);
+
+		segment = IO_LOG_SEGMENT;
 
 	} else if (array == os_aio_read_array) {
 		seg_len = os_aio_read_array->n_slots
@@ -3671,7 +4070,9 @@ os_aio_get_segment_no_from_slot(
 
 		segment = 2 + slot->pos / seg_len;
 	} else {
+		ut_ad(!srv_read_only_mode);
 		ut_a(array == os_aio_write_array);
+
 		seg_len = os_aio_write_array->n_slots
 			/ os_aio_write_array->n_segments;
 
@@ -3692,15 +4093,19 @@ os_aio_get_array_and_local_segment(
 	os_aio_array_t** array,		/*!< out: aio wait array */
 	ulint		 global_segment)/*!< in: global segment number */
 {
-	ulint	segment;
+	ulint		segment;
 
 	ut_a(global_segment < os_aio_n_segments);
 
-	if (global_segment == 0) {
+	if (srv_read_only_mode) {
+		*array = os_aio_read_array;
+
+		return(global_segment);
+	} else if (global_segment == IO_IBUF_SEGMENT) {
 		*array = os_aio_ibuf_array;
 		segment = 0;
 
-	} else if (global_segment == 1) {
+	} else if (global_segment == IO_LOG_SEGMENT) {
 		*array = os_aio_log_array;
 		segment = 0;
 
@@ -3748,7 +4153,7 @@ os_aio_array_reserve_slot(
 	struct iocb*	iocb;
 	off_t		aio_offset;
 
-#endif
+#endif /* WIN_ASYNC_IO */
 	ulint		i;
 	ulint		counter;
 	ulint		slots_per_seg;
@@ -3756,7 +4161,7 @@ os_aio_array_reserve_slot(
 
 #ifdef WIN_ASYNC_IO
 	ut_a((len & 0xFFFFFFFFUL) == len);
-#endif
+#endif /* WIN_ASYNC_IO */
 
 	/* No need of a mutex. Only reading constant fields */
 	slots_per_seg = array->n_slots / array->n_segments;
@@ -3789,9 +4194,11 @@ loop:
 	local segment and do a full scan of the array. We are
 	guaranteed to find a slot in full scan. */
 	for (i = local_seg * slots_per_seg, counter = 0;
-	     counter < array->n_slots; i++, counter++) {
+	     counter < array->n_slots;
+	     i++, counter++) {
 
 		i %= array->n_slots;
+
 		slot = os_aio_array_get_nth_slot(array, i);
 
 		if (slot->reserved == FALSE) {
@@ -3815,7 +4222,7 @@ found:
 	}
 
 	slot->reserved = TRUE;
-	slot->reservation_time = time(NULL);
+	slot->reservation_time = ut_time();
 	slot->message1 = message1;
 	slot->message2 = message2;
 	slot->file     = file;
@@ -3827,7 +4234,7 @@ found:
 	slot->io_already_done = FALSE;
 
 #ifdef WIN_ASYNC_IO
-	control = &(slot->control);
+	control = &slot->control;
 	control->Offset = (DWORD) offset & 0xFFFFFFFF;
 	control->OffsetHigh = (DWORD) (offset >> 32);
 	ResetEvent(slot->handle);
@@ -3858,7 +4265,6 @@ found:
 	iocb->data = (void*) slot;
 	slot->n_bytes = 0;
 	slot->ret = 0;
-	/*fprintf(stderr, "Filled up Linux native iocb.\n");*/
 
 skip_native_aio:
 #endif /* LINUX_NATIVE_AIO */
@@ -3876,9 +4282,6 @@ os_aio_array_free_slot(
 	os_aio_array_t*	array,	/*!< in: aio array */
 	os_aio_slot_t*	slot)	/*!< in: pointer to slot */
 {
-	ut_ad(array);
-	ut_ad(slot);
-
 	os_mutex_enter(array->mutex);
 
 	ut_ad(slot->reserved);
@@ -3927,36 +4330,42 @@ os_aio_simulated_wake_handler_thread(
 				arrays */
 {
 	os_aio_array_t*	array;
-	os_aio_slot_t*	slot;
 	ulint		segment;
-	ulint		n;
-	ulint		i;
 
 	ut_ad(!srv_use_native_aio);
 
 	segment = os_aio_get_array_and_local_segment(&array, global_segment);
 
-	n = array->n_slots / array->n_segments;
+	ulint	n = array->n_slots / array->n_segments;
+
+	segment *= n;
 
 	/* Look through n slots after the segment * n'th slot */
 
 	os_mutex_enter(array->mutex);
 
-	for (i = 0; i < n; i++) {
-		slot = os_aio_array_get_nth_slot(array, i + segment * n);
+	for (ulint i = 0; i < n; ++i) {
+		const os_aio_slot_t*	slot;
+
+		slot = os_aio_array_get_nth_slot(array, segment + i);
 
 		if (slot->reserved) {
+
 			/* Found an i/o request */
 
-			break;
+			os_mutex_exit(array->mutex);
+
+			os_event_t	event;
+
+			event = os_aio_segment_wait_events[global_segment];
+
+			os_event_set(event);
+
+			return;
 		}
 	}
 
 	os_mutex_exit(array->mutex);
-
-	if (i < n) {
-		os_event_set(os_aio_segment_wait_events[global_segment]);
-	}
 }
 
 /**********************************************************************//**
@@ -3966,8 +4375,6 @@ void
 os_aio_simulated_wake_handler_threads(void)
 /*=======================================*/
 {
-	ulint	i;
-
 	if (srv_use_native_aio) {
 		/* We do not use simulated aio: do nothing */
 
@@ -3976,7 +4383,7 @@ os_aio_simulated_wake_handler_threads(void)
 
 	os_aio_recommend_sleep_for_read_threads	= FALSE;
 
-	for (i = 0; i < os_aio_n_segments; i++) {
+	for (ulint i = 0; i < os_aio_n_segments; i++) {
 		os_aio_simulated_wake_handler_thread(i);
 	}
 }
@@ -3998,7 +4405,6 @@ background threads too eagerly to allow for coalescing during
 readahead requests. */
 #ifdef __WIN__
 	os_aio_array_t*	array;
-	ulint		g;
 
 	if (srv_use_native_aio) {
 		/* We do not use simulated aio: do nothing */
@@ -4008,12 +4414,12 @@ readahead requests. */
 
 	os_aio_recommend_sleep_for_read_threads	= TRUE;
 
-	for (g = 0; g < os_aio_n_segments; g++) {
-		os_aio_get_array_and_local_segment(&array, g);
+	for (ulint i = 0; i < os_aio_n_segments; i++) {
+		os_aio_get_array_and_local_segment(&array, i);
 
 		if (array == os_aio_read_array) {
 
-			os_event_reset(os_aio_segment_wait_events[g]);
+			os_event_reset(os_aio_segment_wait_events[i]);
 		}
 	}
 #endif /* __WIN__ */
@@ -4111,11 +4517,10 @@ os_aio_func(
 	ibool		retval;
 	BOOL		ret		= TRUE;
 	DWORD		len		= (DWORD) n;
-	struct fil_node_struct * dummy_mess1;
+	struct fil_node_t* dummy_mess1;
 	void*		dummy_mess2;
 	ulint		dummy_type;
 #endif /* WIN_ASYNC_IO */
-	ibool		retry;
 	ulint		wake_later;
 
 	ut_ad(file);
@@ -4153,6 +4558,7 @@ os_aio_func(
 			return(os_file_read_func(file, buf, offset, n));
 		}
 
+		ut_ad(!srv_read_only_mode);
 		ut_a(type == OS_FILE_WRITE);
 
 		return(os_file_write_func(name, file, buf, offset, n));
@@ -4161,9 +4567,12 @@ os_aio_func(
 try_again:
 	switch (mode) {
 	case OS_AIO_NORMAL:
-		array = (type == OS_FILE_READ)
-			? os_aio_read_array
-			: os_aio_write_array;
+		if (type == OS_FILE_READ) {
+			array = os_aio_read_array;
+		} else {
+			ut_ad(!srv_read_only_mode);
+			array = os_aio_write_array;
+		}
 		break;
 	case OS_AIO_IBUF:
 		ut_ad(type == OS_FILE_READ);
@@ -4172,14 +4581,21 @@ try_again:
 
 		wake_later = FALSE;
 
-		array = os_aio_ibuf_array;
+		if (srv_read_only_mode) {
+			array = os_aio_read_array;
+		} else {
+			array = os_aio_ibuf_array;
+		}
 		break;
 	case OS_AIO_LOG:
-		array = os_aio_log_array;
+		if (srv_read_only_mode) {
+			array = os_aio_read_array;
+		} else {
+			array = os_aio_log_array;
+		}
 		break;
 	case OS_AIO_SYNC:
 		array = os_aio_sync_array;
-
 #if defined(LINUX_NATIVE_AIO)
 		/* In Linux native AIO we don't use sync IO array. */
 		ut_a(!srv_use_native_aio);
@@ -4204,7 +4620,7 @@ try_again:
 			if (!os_aio_linux_dispatch(array, slot)) {
 				goto err_exit;
 			}
-#endif
+#endif /* WIN_ASYNC_IO */
 		} else {
 			if (!wake_later) {
 				os_aio_simulated_wake_handler_thread(
@@ -4213,6 +4629,7 @@ try_again:
 			}
 		}
 	} else if (type == OS_FILE_WRITE) {
+		ut_ad(!srv_read_only_mode);
 		if (srv_use_native_aio) {
 			os_n_file_writes++;
 #ifdef WIN_ASYNC_IO
@@ -4223,7 +4640,7 @@ try_again:
 			if (!os_aio_linux_dispatch(array, slot)) {
 				goto err_exit;
 			}
-#endif
+#endif /* WIN_ASYNC_IO */
 		} else {
 			if (!wake_later) {
 				os_aio_simulated_wake_handler_thread(
@@ -4247,11 +4664,10 @@ try_again:
 				we must use the same wait mechanism as for
 				async i/o */
 
-				retval = os_aio_windows_handle(ULINT_UNDEFINED,
-							       slot->pos,
-							       &dummy_mess1,
-							       &dummy_mess2,
-							       &dummy_type);
+				retval = os_aio_windows_handle(
+					ULINT_UNDEFINED, slot->pos,
+					&dummy_mess1, &dummy_mess2,
+					&dummy_type);
 
 				return(retval);
 			}
@@ -4270,10 +4686,8 @@ err_exit:
 #endif /* LINUX_NATIVE_AIO || WIN_ASYNC_IO */
 	os_aio_array_free_slot(array, slot);
 
-	retry = os_file_handle_error(name,
-				     type == OS_FILE_READ
-				     ? "aio read" : "aio write");
-	if (retry) {
+	if (os_file_handle_error(
+		name,type == OS_FILE_READ ? "aio read" : "aio write")) {
 
 		goto try_again;
 	}
@@ -4323,8 +4737,8 @@ os_aio_windows_handle(
 	BOOL		retry		= FALSE;
 
 	if (segment == ULINT_UNDEFINED) {
-		array = os_aio_sync_array;
 		segment = 0;
+		array = os_aio_sync_array;
 	} else {
 		segment = os_aio_get_array_and_local_segment(&array, segment);
 	}
@@ -4338,16 +4752,21 @@ os_aio_windows_handle(
 	n = array->n_slots / array->n_segments;
 
 	if (array == os_aio_sync_array) {
+
 		WaitForSingleObject(
 			os_aio_array_get_nth_slot(array, pos)->handle,
 			INFINITE);
+
 		i = pos;
+
 	} else {
-		srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
-		i = WaitForMultipleObjects((DWORD) n,
-					   array->handles + segment * n,
-					   FALSE,
-					   INFINITE);
+		if (orig_seg != ULINT_UNDEFINED) {
+			srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
+		}
+
+		i = WaitForMultipleObjects(
+			(DWORD) n, array->handles + segment * n,
+			FALSE, INFINITE);
 	}
 
 	os_mutex_enter(array->mutex);
@@ -4367,8 +4786,8 @@ os_aio_windows_handle(
 	ut_a(slot->reserved);
 
 	if (orig_seg != ULINT_UNDEFINED) {
-		srv_set_io_thread_op_info(orig_seg,
-					  "get windows aio return value");
+		srv_set_io_thread_op_info(
+			orig_seg, "get windows aio return value");
 	}
 
 	ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
@@ -4671,7 +5090,7 @@ found:
 
 	*type = slot->type;
 
-	if ((slot->ret == 0) && (slot->n_bytes == (long) slot->len)) {
+	if (slot->ret == 0 && slot->n_bytes == (long) slot->len) {
 
 		ret = TRUE;
 	} else {
@@ -4720,8 +5139,6 @@ os_aio_simulated_handle(
 {
 	os_aio_array_t*	array;
 	ulint		segment;
-	os_aio_slot_t*	slot;
-	os_aio_slot_t*	slot2;
 	os_aio_slot_t*	consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
 	ulint		n_consecutive;
 	ulint		total_len;
@@ -4734,7 +5151,7 @@ os_aio_simulated_handle(
 	ibool		ret;
 	ibool		any_reserved;
 	ulint		n;
-	ulint		i;
+	os_aio_slot_t*	aio_slot;
 
 	/* Fix compiler warning */
 	*consecutive_ios = NULL;
@@ -4772,7 +5189,9 @@ restart:
 
 	os_mutex_enter(array->mutex);
 
-	for (i = 0; i < n; i++) {
+	for (ulint i = 0; i < n; i++) {
+		os_aio_slot_t*	slot;
+
 		slot = os_aio_array_get_nth_slot(array, i + segment * n);
 
 		if (!slot->reserved) {
@@ -4786,8 +5205,8 @@ restart:
 					(ulong) i);
 			}
 
+			aio_slot = slot;
 			ret = TRUE;
-
 			goto slot_io_done;
 		} else {
 			any_reserved = TRUE;
@@ -4797,9 +5216,7 @@ restart:
 	/* There is no completed request.
 	If there is no pending request at all,
 	and the system is being shut down, exit. */
-	if (UNIV_UNLIKELY
-	    (!any_reserved
-	     && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) {
+	if (!any_reserved && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
 		os_mutex_exit(array->mutex);
 		*message1 = NULL;
 		*message2 = NULL;
@@ -4815,12 +5232,15 @@ restart:
 	biggest_age = 0;
 	lowest_offset = IB_UINT64_MAX;
 
-	for (i = 0; i < n; i++) {
+	for (ulint i = 0; i < n; i++) {
+		os_aio_slot_t*	slot;
+
 		slot = os_aio_array_get_nth_slot(array, i + segment * n);
 
 		if (slot->reserved) {
-			age = (ulint) difftime(time(NULL),
-					       slot->reservation_time);
+
+			age = (ulint) difftime(
+				ut_time(), slot->reservation_time);
 
 			if ((age >= 2 && age > biggest_age)
 			    || (age >= 2 && age == biggest_age
@@ -4844,9 +5264,11 @@ restart:
 
 		lowest_offset = IB_UINT64_MAX;
 
-		for (i = 0; i < n; i++) {
-			slot = os_aio_array_get_nth_slot(array,
-							 i + segment * n);
+		for (ulint i = 0; i < n; i++) {
+			os_aio_slot_t*	slot;
+
+			slot = os_aio_array_get_nth_slot(
+				array, i + segment * n);
 
 			if (slot->reserved && slot->offset < lowest_offset) {
 
@@ -4872,25 +5294,28 @@ restart:
 	ut_ad(n_consecutive != 0);
 	ut_ad(consecutive_ios[0] != NULL);
 
-	slot = consecutive_ios[0];
+	aio_slot = consecutive_ios[0];
 
 	/* Check if there are several consecutive blocks to read or write */
 
 consecutive_loop:
-	for (i = 0; i < n; i++) {
-		slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
+	for (ulint i = 0; i < n; i++) {
+		os_aio_slot_t*	slot;
+
+		slot = os_aio_array_get_nth_slot(array, i + segment * n);
 
-		if (slot2->reserved && slot2 != slot
-		    && slot2->offset == slot->offset + slot->len
-		    && slot2->type == slot->type
-		    && slot2->file == slot->file) {
+		if (slot->reserved
+		    && slot != aio_slot
+		    && slot->offset == slot->offset + aio_slot->len
+		    && slot->type == aio_slot->type
+		    && slot->file == aio_slot->file) {
 
 			/* Found a consecutive i/o request */
 
-			consecutive_ios[n_consecutive] = slot2;
+			consecutive_ios[n_consecutive] = slot;
 			n_consecutive++;
 
-			slot = slot2;
+			aio_slot = slot;
 
 			if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
 
@@ -4908,15 +5333,15 @@ consecutive_loop:
 	i/o */
 
 	total_len = 0;
-	slot = consecutive_ios[0];
+	aio_slot = consecutive_ios[0];
 
-	for (i = 0; i < n_consecutive; i++) {
+	for (ulint i = 0; i < n_consecutive; i++) {
 		total_len += consecutive_ios[i]->len;
 	}
 
 	if (n_consecutive == 1) {
 		/* We can use the buffer of the i/o request */
-		combined_buf = slot->buf;
+		combined_buf = aio_slot->buf;
 		combined_buf2 = NULL;
 	} else {
 		combined_buf2 = static_cast<byte*>(
@@ -4934,50 +5359,41 @@ consecutive_loop:
 
 	os_mutex_exit(array->mutex);
 
-	if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
+	if (aio_slot->type == OS_FILE_WRITE && n_consecutive > 1) {
 		/* Copy the buffers to the combined buffer */
 		offs = 0;
 
-		for (i = 0; i < n_consecutive; i++) {
+		for (ulint i = 0; i < n_consecutive; i++) {
 
 			ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
 				  consecutive_ios[i]->len);
+
 			offs += consecutive_ios[i]->len;
 		}
 	}
 
 	srv_set_io_thread_op_info(global_segment, "doing file i/o");
 
-	if (os_aio_print_debug) {
-		fprintf(stderr,
-			"InnoDB: doing i/o of type %lu at offset " UINT64PF
-			", length %lu\n",
-			(ulong) slot->type, slot->offset, (ulong) total_len);
-	}
-
 	/* Do the i/o with ordinary, synchronous i/o functions: */
-	if (slot->type == OS_FILE_WRITE) {
-		ret = os_file_write(slot->name, slot->file, combined_buf,
-				    slot->offset, total_len);
+	if (aio_slot->type == OS_FILE_WRITE) {
+		ut_ad(!srv_read_only_mode);
+		ret = os_file_write(
+			aio_slot->name, aio_slot->file, combined_buf,
+			aio_slot->offset, total_len);
 	} else {
-		ret = os_file_read(slot->file, combined_buf,
-				   slot->offset, total_len);
+		ret = os_file_read(
+			aio_slot->file, combined_buf,
+			aio_slot->offset, total_len);
 	}
 
 	ut_a(ret);
 	srv_set_io_thread_op_info(global_segment, "file i/o done");
 
-#if 0
-	fprintf(stderr,
-		"aio: %lu consecutive %lu:th segment, first offs %lu blocks\n",
-		n_consecutive, global_segment, slot->offset / UNIV_PAGE_SIZE);
-#endif
-
-	if (slot->type == OS_FILE_READ && n_consecutive > 1) {
+	if (aio_slot->type == OS_FILE_READ && n_consecutive > 1) {
 		/* Copy the combined buffer to individual buffers */
 		offs = 0;
 
-		for (i = 0; i < n_consecutive; i++) {
+		for (ulint i = 0; i < n_consecutive; i++) {
 
 			ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
 				  consecutive_ios[i]->len);
@@ -4993,7 +5409,7 @@ consecutive_loop:
 
 	/* Mark the i/os done in slots */
 
-	for (i = 0; i < n_consecutive; i++) {
+	for (ulint i = 0; i < n_consecutive; i++) {
 		consecutive_ios[i]->io_already_done = TRUE;
 	}
 
@@ -5003,16 +5419,16 @@ consecutive_loop:
 
 slot_io_done:
 
-	ut_a(slot->reserved);
+	ut_a(aio_slot->reserved);
 
-	*message1 = slot->message1;
-	*message2 = slot->message2;
+	*message1 = aio_slot->message1;
+	*message2 = aio_slot->message2;
 
-	*type = slot->type;
+	*type = aio_slot->type;
 
 	os_mutex_exit(array->mutex);
 
-	os_aio_array_free_slot(array, slot);
+	os_aio_array_free_slot(array, aio_slot);
 
 	return(ret);
 
@@ -5031,30 +5447,20 @@ recommended_sleep:
 
 	os_event_wait(os_aio_segment_wait_events[global_segment]);
 
-	if (os_aio_print_debug) {
-		fprintf(stderr,
-			"InnoDB: i/o handler thread for i/o"
-			" segment %lu wakes up\n",
-			(ulong) global_segment);
-	}
-
 	goto restart;
 }
 
 /**********************************************************************//**
 Validates the consistency of an aio array.
-@return	TRUE if ok */
+@return	true if ok */
 static
-ibool
+bool
 os_aio_array_validate(
 /*==================*/
 	os_aio_array_t*	array)	/*!< in: aio wait array */
 {
-	os_aio_slot_t*	slot;
-	ulint		n_reserved	= 0;
 	ulint		i;
-
-	ut_a(array);
+	ulint		n_reserved	= 0;
 
 	os_mutex_enter(array->mutex);
 
@@ -5062,6 +5468,8 @@ os_aio_array_validate(
 	ut_a(array->n_segments > 0);
 
 	for (i = 0; i < array->n_slots; i++) {
+		os_aio_slot_t*	slot;
+
 		slot = os_aio_array_get_nth_slot(array, i);
 
 		if (slot->reserved) {
@@ -5074,7 +5482,7 @@ os_aio_array_validate(
 
 	os_mutex_exit(array->mutex);
 
-	return(TRUE);
+	return(true);
 }
 
 /**********************************************************************//**
@@ -5086,10 +5494,22 @@ os_aio_validate(void)
 /*=================*/
 {
 	os_aio_array_validate(os_aio_read_array);
-	os_aio_array_validate(os_aio_write_array);
-	os_aio_array_validate(os_aio_ibuf_array);
-	os_aio_array_validate(os_aio_log_array);
-	os_aio_array_validate(os_aio_sync_array);
+
+	if (os_aio_write_array != 0) {
+		os_aio_array_validate(os_aio_write_array);
+	}
+
+	if (os_aio_ibuf_array != 0) {
+		os_aio_array_validate(os_aio_ibuf_array);
+	}
+
+	if (os_aio_log_array != 0) {
+		os_aio_array_validate(os_aio_log_array);
+	}
+
+	if (os_aio_sync_array != 0) {
+		os_aio_array_validate(os_aio_sync_array);
+	}
 
 	return(TRUE);
 }
@@ -5129,65 +5549,36 @@ os_aio_print_segment_info(
 }
 
 /**********************************************************************//**
-Prints info of the aio arrays. */
+Prints info about the aio array. */
 UNIV_INTERN
 void
-os_aio_print(
-/*=========*/
-	FILE*	file)	/*!< in: file where to print */
+os_aio_print_array(
+/*==============*/
+	FILE*		file,	/*!< in: file where to print */
+	os_aio_array_t*	array)	/*!< in: aio array to print */
 {
-	os_aio_array_t*	array;
-	os_aio_slot_t*	slot;
-	ulint		n_reserved;
-	ulint		n_res_seg[SRV_MAX_N_IO_THREADS];
-	time_t		current_time;
-	double		time_elapsed;
-	double		avg_bytes_read;
-	ulint		i;
-
-	for (i = 0; i < srv_n_file_io_threads; i++) {
-		fprintf(file, "I/O thread %lu state: %s (%s)", (ulong) i,
-			srv_io_thread_op_info[i],
-			srv_io_thread_function[i]);
-
-#ifndef __WIN__
-		if (os_aio_segment_wait_events[i]->is_set) {
-			fprintf(file, " ev set");
-		}
-#endif
-
-		fprintf(file, "\n");
-	}
-
-	fputs("Pending normal aio reads:", file);
-
-	array = os_aio_read_array;
-loop:
-	ut_a(array);
+	ulint			n_reserved = 0;
+	ulint			n_res_seg[SRV_MAX_N_IO_THREADS];
 
 	os_mutex_enter(array->mutex);
 
 	ut_a(array->n_slots > 0);
 	ut_a(array->n_segments > 0);
 
-	n_reserved = 0;
-
 	memset(n_res_seg, 0x0, sizeof(n_res_seg));
 
-	for (i = 0; i < array->n_slots; i++) {
-		ulint	seg_no;
+	for (ulint i = 0; i < array->n_slots; ++i) {
+		os_aio_slot_t*	slot;
+		ulint		seg_no;
 
 		slot = os_aio_array_get_nth_slot(array, i);
 
 		seg_no = (i * array->n_segments) / array->n_slots;
+
 		if (slot->reserved) {
-			n_reserved++;
-			n_res_seg[seg_no]++;
-#if 0
-			fprintf(stderr, "Reserved slot, messages %p %p\n",
-				(void*) slot->message1,
-				(void*) slot->message2);
-#endif
+			++n_reserved;
+			++n_res_seg[seg_no];
+
 			ut_a(slot->len > 0);
 		}
 	}
@@ -5199,38 +5590,61 @@ loop:
 	os_aio_print_segment_info(file, n_res_seg, array);
 
 	os_mutex_exit(array->mutex);
+}
 
-	if (array == os_aio_read_array) {
-		fputs(", aio writes:", file);
+/**********************************************************************//**
+Prints info of the aio arrays. */
+UNIV_INTERN
+void
+os_aio_print(
+/*=========*/
+	FILE*	file)	/*!< in: file where to print */
+{
+	time_t		current_time;
+	double		time_elapsed;
+	double		avg_bytes_read;
 
-		array = os_aio_write_array;
+	for (ulint i = 0; i < srv_n_file_io_threads; ++i) {
+		fprintf(file, "I/O thread %lu state: %s (%s)",
+			(ulong) i,
+			srv_io_thread_op_info[i],
+			srv_io_thread_function[i]);
 
-		goto loop;
+#ifndef __WIN__
+		if (os_aio_segment_wait_events[i]->is_set) {
+			fprintf(file, " ev set");
+		}
+#endif /* __WIN__ */
+
+		fprintf(file, "\n");
 	}
 
-	if (array == os_aio_write_array) {
-		fputs(",\n ibuf aio reads:", file);
-		array = os_aio_ibuf_array;
+	fputs("Pending normal aio reads:", file);
 
-		goto loop;
+	os_aio_print_array(file, os_aio_read_array);
+
+	if (os_aio_write_array != 0) {
+		fputs(", aio writes:", file);
+		os_aio_print_array(file, os_aio_write_array);
 	}
 
-	if (array == os_aio_ibuf_array) {
-		fputs(", log i/o's:", file);
-		array = os_aio_log_array;
+	if (os_aio_ibuf_array != 0) {
+		fputs(",\n ibuf aio reads:", file);
+		os_aio_print_array(file, os_aio_ibuf_array);
+	}
 
-		goto loop;
+	if (os_aio_log_array != 0) {
+		fputs(", log i/o's:", file);
+		os_aio_print_array(file, os_aio_log_array);
 	}
 
-	if (array == os_aio_log_array) {
+	if (os_aio_sync_array != 0) {
 		fputs(", sync i/o's:", file);
-		array = os_aio_sync_array;
-
-		goto loop;
+		os_aio_print_array(file, os_aio_sync_array);
 	}
 
 	putc('\n', file);
-	current_time = time(NULL);
+	current_time = ut_time();
 	time_elapsed = 0.001 + difftime(current_time, os_last_printout);
 
 	fprintf(file,
@@ -5238,7 +5652,8 @@ loop:
 		"%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
 		(ulong) fil_n_pending_log_flushes,
 		(ulong) fil_n_pending_tablespace_flushes,
-		(ulong) os_n_file_reads, (ulong) os_n_file_writes,
+		(ulong) os_n_file_reads,
+		(ulong) os_n_file_writes,
 		(ulong) os_n_fsyncs);
 
 	if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) {
@@ -5310,21 +5725,29 @@ os_aio_all_slots_free(void)
 
 	os_mutex_exit(array->mutex);
 
-	array = os_aio_write_array;
+	if (!srv_read_only_mode) {
+		ut_a(os_aio_write_array == 0);
 
-	os_mutex_enter(array->mutex);
+		array = os_aio_write_array;
 
-	n_res += array->n_reserved;
+		os_mutex_enter(array->mutex);
 
-	os_mutex_exit(array->mutex);
+		n_res += array->n_reserved;
 
-	array = os_aio_ibuf_array;
+		os_mutex_exit(array->mutex);
 
-	os_mutex_enter(array->mutex);
+		ut_a(os_aio_ibuf_array == 0);
 
-	n_res += array->n_reserved;
+		array = os_aio_ibuf_array;
 
-	os_mutex_exit(array->mutex);
+		os_mutex_enter(array->mutex);
+
+		n_res += array->n_reserved;
+
+		os_mutex_exit(array->mutex);
+	}
+
+	ut_a(os_aio_log_array == 0);
 
 	array = os_aio_log_array;
 
diff --git a/storage/innobase/os/os0sync.cc b/storage/innobase/os/os0sync.cc
index c2e2e7e477f..392dbe0d7a7 100644
--- a/storage/innobase/os/os0sync.cc
+++ b/storage/innobase/os/os0sync.cc
@@ -38,7 +38,7 @@ Created 9/6/1995 Heikki Tuuri
 #include "srv0srv.h"
 
 /* Type definition for an operating system mutex struct */
-struct os_mutex_struct{
+struct os_mutex_t{
 	os_event_t	event;	/*!< Used by sync0arr.cc for queing threads */
 	void*		handle;	/*!< OS handle to mutex */
 	ulint		count;	/*!< we use this counter to check
@@ -47,12 +47,12 @@ struct os_mutex_struct{
 				do not assume that the OS mutex
 				supports recursive locking, though
 				NT seems to do that */
-	UT_LIST_NODE_T(os_mutex_str_t) os_mutex_list;
+	UT_LIST_NODE_T(os_mutex_t) os_mutex_list;
 				/* list of all 'slow' OS mutexes created */
 };
 
 /** Mutex protecting counts and the lists of OS mutexes and events */
-UNIV_INTERN os_mutex_t	os_sync_mutex;
+UNIV_INTERN os_ib_mutex_t	os_sync_mutex;
 /** TRUE if os_sync_mutex has been initialized */
 static ibool		os_sync_mutex_inited	= FALSE;
 /** TRUE when os_sync_free() is being executed */
@@ -63,10 +63,10 @@ os_thread_exit */
 UNIV_INTERN ulint	os_thread_count		= 0;
 
 /** The list of all events created */
-static UT_LIST_BASE_NODE_T(os_event_struct_t)	os_event_list;
+static UT_LIST_BASE_NODE_T(os_event)		os_event_list;
 
 /** The list of all OS 'slow' mutexes */
-static UT_LIST_BASE_NODE_T(os_mutex_str_t)	os_mutex_list;
+static UT_LIST_BASE_NODE_T(os_mutex_t)		os_mutex_list;
 
 UNIV_INTERN ulint	os_event_count		= 0;
 UNIV_INTERN ulint	os_mutex_count		= 0;
@@ -329,7 +329,7 @@ os_sync_free(void)
 /*==============*/
 {
 	os_event_t	event;
-	os_mutex_t	mutex;
+	os_ib_mutex_t	mutex;
 
 	os_sync_free_called = TRUE;
 	event = UT_LIST_GET_FIRST(os_event_list);
@@ -365,10 +365,8 @@ must be reset explicitly by calling sync_os_reset_event.
 @return	the event handle */
 UNIV_INTERN
 os_event_t
-os_event_create(
-/*============*/
-	const char*	name)	/*!< in: the name of the event, if NULL
-				the event is created without a name */
+os_event_create(void)
+/*==================*/
 {
 	os_event_t	event;
 
@@ -377,10 +375,7 @@ os_event_create(
 
 		event = static_cast<os_event_t>(ut_malloc(sizeof(*event)));
 
-		event->handle = CreateEvent(NULL,
-					    TRUE,
-					    FALSE,
-					    (LPCTSTR) name);
+		event->handle = CreateEvent(NULL, TRUE, FALSE, NULL);
 		if (!event->handle) {
 			fprintf(stderr,
 				"InnoDB: Could not create a Windows event"
@@ -390,10 +385,7 @@ os_event_create(
 	} else /* Windows with condition variables */
 #endif
 	{
-		UT_NOT_USED(name);
-
-		event = static_cast<os_event_struct_t*>(
-			ut_malloc(sizeof(struct os_event_struct)));
+		event = static_cast<os_event_t>(ut_malloc(sizeof *event));
 
 #ifndef PFS_SKIP_EVENT_MUTEX
 		os_fast_mutex_init(event_os_mutex_key, &event->os_mutex);
@@ -739,27 +731,26 @@ os_event_wait_time_low(
 
 /*********************************************************//**
 Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
+mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible.
 @return	the mutex handle */
 UNIV_INTERN
-os_mutex_t
+os_ib_mutex_t
 os_mutex_create(void)
 /*=================*/
 {
 	os_fast_mutex_t*	mutex;
-	os_mutex_t		mutex_str;
+	os_ib_mutex_t		mutex_str;
 
 	mutex = static_cast<os_fast_mutex_t*>(
 		ut_malloc(sizeof(os_fast_mutex_t)));
 
 	os_fast_mutex_init(os_mutex_key, mutex);
 
-	mutex_str = static_cast<os_mutex_t>(
-		ut_malloc(sizeof(os_mutex_str_t)));
+	mutex_str = static_cast<os_ib_mutex_t>(ut_malloc(sizeof *mutex_str));
 
 	mutex_str->handle = mutex;
 	mutex_str->count = 0;
-	mutex_str->event = os_event_create(NULL);
+	mutex_str->event = os_event_create();
 
 	if (UNIV_LIKELY(os_sync_mutex_inited)) {
 		/* When creating os_sync_mutex itself we cannot reserve it */
@@ -783,7 +774,7 @@ UNIV_INTERN
 void
 os_mutex_enter(
 /*===========*/
-	os_mutex_t	mutex)	/*!< in: mutex to acquire */
+	os_ib_mutex_t	mutex)	/*!< in: mutex to acquire */
 {
 	os_fast_mutex_lock(static_cast<os_fast_mutex_t*>(mutex->handle));
 
@@ -798,7 +789,7 @@ UNIV_INTERN
 void
 os_mutex_exit(
 /*==========*/
-	os_mutex_t	mutex)	/*!< in: mutex to release */
+	os_ib_mutex_t	mutex)	/*!< in: mutex to release */
 {
 	ut_a(mutex);
 
@@ -814,7 +805,7 @@ UNIV_INTERN
 void
 os_mutex_free(
 /*==========*/
-	os_mutex_t	mutex)	/*!< in: mutex to free */
+	os_ib_mutex_t	mutex)	/*!< in: mutex to free */
 {
 	ut_a(mutex);
 
diff --git a/storage/innobase/os/os0thread.cc b/storage/innobase/os/os0thread.cc
index 48ee61e9402..9cc09a847b1 100644
--- a/storage/innobase/os/os0thread.cc
+++ b/storage/innobase/os/os0thread.cc
@@ -132,8 +132,10 @@ os_thread_create_func(
 	if (thread_id) {
 		*thread_id = win_thread_id;
 	}
-
-	return(thread);
+	if (thread) {
+		CloseHandle(thread);
+	}
+	return((os_thread_t)win_thread_id);
 #else
 	int		ret;
 	os_thread_t	pthread;
diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc
index 9046338f377..f416d38cc35 100644
--- a/storage/innobase/page/page0cur.cc
+++ b/storage/innobase/page/page0cur.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -973,6 +974,9 @@ page_cur_insert_rec_low(
 	page = page_align(current_rec);
 	ut_ad(dict_table_is_comp(index->table)
 	      == (ibool) !!page_is_comp(page));
+	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
+	      == index->id || recv_recovery_is_on() || mtr->inside_ibuf);
 
 	ut_ad(!page_rec_is_supremum(current_rec));
 
@@ -1007,8 +1011,8 @@ page_cur_insert_rec_low(
 
 		rec_offs_init(foffsets_);
 
-		foffsets = rec_get_offsets(free_rec, index, foffsets,
-					ULINT_UNDEFINED, &heap);
+		foffsets = rec_get_offsets(
+			free_rec, index, foffsets, ULINT_UNDEFINED, &heap);
 		if (rec_offs_size(foffsets) < rec_size) {
 			if (UNIV_LIKELY_NULL(heap)) {
 				mem_heap_free(heap);
@@ -1167,14 +1171,27 @@ page_cur_insert_rec_zip_reorg(
 	buf_block_t*	block,	/*!< in: buffer block */
 	dict_index_t*	index,	/*!< in: record descriptor */
 	rec_t*		rec,	/*!< in: inserted record */
+	ulint		rec_size,/*!< in: size of the inserted record */
 	page_t*		page,	/*!< in: uncompressed page */
 	page_zip_des_t*	page_zip,/*!< in: compressed page */
 	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
 {
 	ulint		pos;
 
+	/* Make a local copy as the values can change dynamically. */
+	bool		log_compressed = page_log_compressed_pages;
+	ulint		level = page_compression_level;
+
 	/* Recompress or reorganize and recompress the page. */
-	if (page_zip_compress(page_zip, page, index, mtr)) {
+	if (page_zip_compress(page_zip, page, index, level,
+			      log_compressed ? mtr : NULL)) {
+		if (!log_compressed) {
+			page_cur_insert_rec_write_log(
+				rec, rec_size, *current_rec, index, mtr);
+			page_zip_compress_write_log_no_data(
+				level, page, index, mtr);
+		}
+
 		return(rec);
 	}
 
@@ -1246,6 +1263,9 @@ page_cur_insert_rec_zip(
 	page = page_align(*current_rec);
 	ut_ad(dict_table_is_comp(index->table));
 	ut_ad(page_is_comp(page));
+	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
+	      == index->id || mtr->inside_ibuf || recv_recovery_is_on());
 
 	ut_ad(!page_rec_is_supremum(*current_rec));
 #ifdef UNIV_ZIP_DEBUG
@@ -1281,10 +1301,27 @@ page_cur_insert_rec_zip(
 						     index, rec, offsets,
 						     NULL);
 
-		if (UNIV_LIKELY(insert_rec != NULL)) {
+		/* If recovery is on, this implies that the compression
+		of the page was successful during runtime. Had that not
+		been the case or had the redo logging of compressed
+		pages been enabled during runtime then we'd have seen
+		a MLOG_ZIP_PAGE_COMPRESS redo record. Therefore, we
+		know that we don't need to reorganize the page. We,
+		however, do need to recompress the page. That will
+		happen when the next redo record is read which must
+		be of type MLOG_ZIP_PAGE_COMPRESS_NO_DATA and it must
+		contain a valid compression level value.
+		This implies that during recovery from this point till
+		the next redo is applied the uncompressed and
+		compressed versions are not identical and
+		page_zip_validate will fail but that is OK because
+		we call page_zip_validate only after processing
+		all changes to a page under a single mtr during
+		recovery. */
+		if (insert_rec != NULL && !recv_recovery_is_on()) {
 			insert_rec = page_cur_insert_rec_zip_reorg(
 				current_rec, block, index, insert_rec,
-				page, page_zip, mtr);
+				rec_size, page, page_zip, mtr);
 #ifdef UNIV_DEBUG
 			if (insert_rec) {
 				rec_offs_make_valid(
@@ -1781,9 +1818,9 @@ UNIV_INLINE
 void
 page_cur_delete_rec_write_log(
 /*==========================*/
-	rec_t*		rec,	/*!< in: record to be deleted */
-	dict_index_t*	index,	/*!< in: record descriptor */
-	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+	rec_t*			rec,	/*!< in: record to be deleted */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	byte*	log_ptr;
 
@@ -1865,10 +1902,11 @@ UNIV_INTERN
 void
 page_cur_delete_rec(
 /*================*/
-	page_cur_t*	cursor,	/*!< in/out: a page cursor */
-	dict_index_t*	index,	/*!< in: record descriptor */
-	const ulint*	offsets,/*!< in: rec_get_offsets(cursor->rec, index) */
-	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+	page_cur_t*		cursor,	/*!< in/out: a page cursor */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const ulint*		offsets,/*!< in: rec_get_offsets(
+					cursor->rec, index) */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	page_dir_slot_t* cur_dir_slot;
 	page_dir_slot_t* prev_slot;
@@ -1881,8 +1919,6 @@ page_cur_delete_rec(
 	ulint		cur_n_owned;
 	rec_t*		rec;
 
-	ut_ad(cursor && mtr);
-
 	page = page_cur_get_page(cursor);
 	page_zip = page_cur_get_page_zip(cursor);
 
@@ -1897,17 +1933,23 @@ page_cur_delete_rec(
 	current_rec = cursor->rec;
 	ut_ad(rec_offs_validate(current_rec, index, offsets));
 	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
+	      == index->id || mtr->inside_ibuf || recv_recovery_is_on());
 
 	/* The record must not be the supremum or infimum record. */
 	ut_ad(page_rec_is_user_rec(current_rec));
 
 	/* Save to local variables some data associated with current_rec */
 	cur_slot_no = page_dir_find_owner_slot(current_rec);
+	ut_ad(cur_slot_no > 0);
 	cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);
 	cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot);
 
 	/* 0. Write the log record */
-	page_cur_delete_rec_write_log(current_rec, index, mtr);
+	if (mtr != 0) {
+		page_cur_delete_rec_write_log(current_rec, index, mtr);
+	}
 
 	/* 1. Reset the last insert info in the page header and increment
 	the modify clock for the frame */
@@ -1915,9 +1957,13 @@ page_cur_delete_rec(
 	page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
 
 	/* The page gets invalid for optimistic searches: increment the
-	frame modify clock */
+	frame modify clock only if there is an mini-transaction covering
+	the change. During IMPORT we allocate local blocks that are not
+	part of the buffer pool. */
 
-	buf_block_modify_clock_inc(page_cur_get_block(cursor));
+	if (mtr != 0) {
+		buf_block_modify_clock_inc(page_cur_get_block(cursor));
+	}
 
 	/* 2. Find the next and the previous record. Note that the cursor is
 	left at the next record. */
@@ -1961,14 +2007,15 @@ page_cur_delete_rec(
 	page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
 
 	/* 6. Free the memory occupied by the record */
-	btr_blob_dbg_remove_rec(current_rec, index, offsets, "delete");
+	btr_blob_dbg_remove_rec(current_rec, const_cast<dict_index_t*>(index),
+				offsets, "delete");
 	page_mem_free(page, page_zip, current_rec, index, offsets);
 
 	/* 7. Now we have decremented the number of owned records of the slot.
 	If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
 	slots. */
 
-	if (UNIV_UNLIKELY(cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED)) {
+	if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) {
 		page_dir_balance_slot(page, page_zip, cur_slot_no);
 	}
 
diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc
index deef6935f08..6b7b8424856 100644
--- a/storage/innobase/page/page0page.cc
+++ b/storage/innobase/page/page0page.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -512,7 +513,8 @@ page_create_zip(
 	page = page_create_low(block, TRUE);
 	mach_write_to_2(page + PAGE_HEADER + PAGE_LEVEL, level);
 
-	if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
+	if (!page_zip_compress(page_zip, page, index,
+	    page_compression_level, mtr)) {
 		/* The compression of a newly created page
 		should always succeed. */
 		ut_error;
@@ -658,7 +660,11 @@ page_copy_rec_list_end(
 	if (new_page_zip) {
 		mtr_set_log_mode(mtr, log_mode);
 
-		if (!page_zip_compress(new_page_zip, new_page, index, mtr)) {
+		if (!page_zip_compress(new_page_zip,
+				       new_page,
+				       index,
+				       page_compression_level,
+				       mtr)) {
 			/* Before trying to reorganize the page,
 			store the number of preceding records on the page. */
 			ulint	ret_pos
@@ -781,8 +787,9 @@ page_copy_rec_list_start(
 		DBUG_EXECUTE_IF("page_copy_rec_list_start_compress_fail",
 				goto zip_reorganize;);
 
-		if (UNIV_UNLIKELY
-		    (!page_zip_compress(new_page_zip, new_page, index, mtr))) {
+		if (!page_zip_compress(new_page_zip, new_page, index,
+				       page_compression_level, mtr)) {
+
 			ulint	ret_pos;
 #ifndef DBUG_OFF
 zip_reorganize:
@@ -793,8 +800,8 @@ zip_reorganize:
 			/* Before copying, "ret" was the predecessor
 			of the predefined supremum record.  If it was
 			the predefined infimum record, then it would
-			still be the infimum.  Thus, the assertion
-			ut_a(ret_pos > 0) would fail here. */
+			still be the infimum, and we would have
+			ret_pos == 0. */
 
 			if (UNIV_UNLIKELY
 			    (!page_zip_reorganize(new_block, index, mtr))) {
@@ -1049,6 +1056,7 @@ page_delete_rec_list_end(
 
 		n_owned = rec_get_n_owned_new(rec2) - count;
 		slot_index = page_dir_find_owner_slot(rec2);
+		ut_ad(slot_index > 0);
 		slot = page_dir_get_nth_slot(page, slot_index);
 	} else {
 		rec_t*	rec2	= rec;
@@ -1064,6 +1072,7 @@ page_delete_rec_list_end(
 
 		n_owned = rec_get_n_owned_old(rec2) - count;
 		slot_index = page_dir_find_owner_slot(rec2);
+		ut_ad(slot_index > 0);
 		slot = page_dir_get_nth_slot(page, slot_index);
 	}
 
@@ -1470,6 +1479,10 @@ page_rec_get_nth_const(
 	ulint			n_owned;
 	const rec_t*		rec;
 
+	if (nth == 0) {
+		return(page_get_infimum_rec(page));
+	}
+
 	ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
 
 	for (i = 0;; i++) {
@@ -2313,6 +2326,20 @@ page_validate(
 		}
 	}
 
+	if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)
+	    && page_get_n_recs(page) > 0) {
+		trx_id_t	max_trx_id	= page_get_max_trx_id(page);
+		trx_id_t	sys_max_trx_id	= trx_sys_get_max_trx_id();
+
+		if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) {
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"PAGE_MAX_TRX_ID out of bounds: "
+				TRX_ID_FMT ", " TRX_ID_FMT,
+				max_trx_id, sys_max_trx_id);
+			goto func_exit2;
+		}
+	}
+
 	heap = mem_heap_create(UNIV_PAGE_SIZE + 200);
 
 	/* The following buffer is used to check that the
@@ -2602,3 +2629,60 @@ page_find_rec_with_heap_no(
 	}
 }
 #endif /* !UNIV_HOTBACKUP */
+
+/*******************************************************//**
+Removes the record from a leaf page. This function does not log
+any changes. It is used by the IMPORT tablespace functions.
+The cursor is moved to the next record after the deleted one.
+@return	true if success, i.e., the page did not become too empty */
+UNIV_INTERN
+bool
+page_delete_rec(
+/*============*/
+	const dict_index_t*	index,	/*!< in: The index that the record
+					belongs to */
+	page_cur_t*		pcur,	/*!< in/out: page cursor on record
+					to delete */
+	page_zip_des_t*		page_zip,/*!< in: compressed page descriptor */
+	const ulint*		offsets)/*!< in: offsets for record */
+{
+	bool		no_compress_needed;
+	buf_block_t*	block = pcur->block;
+	page_t*		page = buf_block_get_frame(block);
+
+	ut_ad(page_is_leaf(page));
+
+	if (!rec_offs_any_extern(offsets)
+	    && ((page_get_data_size(page) - rec_offs_size(offsets)
+		< BTR_CUR_PAGE_COMPRESS_LIMIT)
+		|| (mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL
+		    && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)
+		|| (page_get_n_recs(page) < 2))) {
+
+		ulint	root_page_no = dict_index_get_page(index);
+
+		/* The page fillfactor will drop below a predefined
+		minimum value, OR the level in the B-tree contains just
+		one page, OR the page will become empty: we recommend
+		compression if this is not the root page. */
+
+		no_compress_needed = page_get_page_no(page) == root_page_no;
+	} else {
+		no_compress_needed = true;
+	}
+
+	if (no_compress_needed) {
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+		page_cur_delete_rec(pcur, index, offsets, 0);
+
+#ifdef UNIV_ZIP_DEBUG
+		ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+	}
+
+	return(no_compress_needed);
+}
+
diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc
index 35a8f458fb2..dee37580002 100644
--- a/storage/innobase/page/page0zip.cc
+++ b/storage/innobase/page/page0zip.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 2005, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,6 +24,9 @@ Compressed page interface
 Created June 2005 by Marko Makela
 *******************************************************/
 
+#include <map>
+using namespace std;
+
 #define THIS_MODULE
 #include "page0zip.h"
 #ifdef UNIV_NONINL
@@ -54,9 +58,23 @@ Created June 2005 by Marko Makela
 
 #ifndef UNIV_HOTBACKUP
 /** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
-UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+UNIV_INTERN page_zip_stat_t		page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+/** Statistics on compression, indexed by index->id */
+UNIV_INTERN page_zip_stat_per_index_t	page_zip_stat_per_index;
+/** Mutex protecting page_zip_stat_per_index */
+UNIV_INTERN ib_mutex_t			page_zip_stat_per_index_mutex;
+#ifdef HAVE_PSI_INTERFACE
+UNIV_INTERN mysql_pfs_key_t		page_zip_stat_per_index_mutex_key;
+#endif /* HAVE_PSI_INTERFACE */
 #endif /* !UNIV_HOTBACKUP */
 
+/* Compression level to be used by zlib. Settable by user. */
+UNIV_INTERN ulint	page_compression_level = 6;
+
+/* Whether or not to log compressed page images to avoid possible
+compression algorithm changes in zlib. */
+UNIV_INTERN bool	page_log_compressed_pages = true;
+
 /* Please refer to ../include/page0zip.ic for a description of the
 compressed page format. */
 
@@ -386,7 +404,7 @@ page_zip_get_n_prev_extern(
 					compressed page */
 	const rec_t*		rec,	/*!< in: compact physical record
 					on a B-tree leaf page */
-	dict_index_t*		index)	/*!< in: record descriptor */
+	const dict_index_t*	index)	/*!< in: record descriptor */
 {
 	const page_t*	page	= page_align(rec);
 	ulint		n_ext	= 0;
@@ -1181,6 +1199,7 @@ page_zip_compress(
 				m_start, m_end, m_nonempty */
 	const page_t*	page,	/*!< in: uncompressed page */
 	dict_index_t*	index,	/*!< in: index of the B-tree node */
+	ulint		level,	/*!< in: commpression level */
 	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
 {
 	z_stream	c_stream;
@@ -1194,7 +1213,6 @@ page_zip_compress(
 	const rec_t**	recs;	/*!< dense page directory, sorted by address */
 	mem_heap_t*	heap;
 	ulint		trx_id_col;
-	ulint*		offsets	= NULL;
 	ulint		n_blobs	= 0;
 	byte*		storage;/* storage of uncompressed columns */
 #ifndef UNIV_HOTBACKUP
@@ -1203,6 +1221,10 @@ page_zip_compress(
 #ifdef PAGE_ZIP_COMPRESS_DBG
 	FILE*		logfile = NULL;
 #endif
+	/* A local copy of srv_cmp_per_index_enabled to avoid reading that
+	variable multiple times in this function since it can be changed at
+	anytime. */
+	my_bool		cmp_per_index_enabled = srv_cmp_per_index_enabled;
 
 	ut_a(page_is_comp(page));
 	ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
@@ -1265,6 +1287,11 @@ page_zip_compress(
 #endif /* PAGE_ZIP_COMPRESS_DBG */
 #ifndef UNIV_HOTBACKUP
 	page_zip_stat[page_zip->ssize - 1].compressed++;
+	if (cmp_per_index_enabled) {
+		mutex_enter(&page_zip_stat_per_index_mutex);
+		page_zip_stat_per_index[index->id].compressed++;
+		mutex_exit(&page_zip_stat_per_index_mutex);
+	}
 #endif /* !UNIV_HOTBACKUP */
 
 	if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
@@ -1276,7 +1303,8 @@ page_zip_compress(
 	MONITOR_INC(MONITOR_PAGE_COMPRESS);
 
 	heap = mem_heap_create(page_zip_get_size(page_zip)
-			       + n_fields * (2 + sizeof *offsets)
+			       + n_fields * (2 + sizeof(ulint))
+			       + REC_OFFS_HEADER_SIZE
 			       + n_dense * ((sizeof *recs)
 					    - PAGE_ZIP_DIR_SLOT_SIZE)
 			       + UNIV_PAGE_SIZE * 4
@@ -1295,7 +1323,7 @@ page_zip_compress(
 	/* Compress the data payload. */
 	page_zip_set_alloc(&c_stream, heap);
 
-	err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
+	err = deflateInit2(&c_stream, level,
 			   Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
 			   MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
 	ut_a(err == Z_OK);
@@ -1408,8 +1436,19 @@ err_exit:
 		}
 #endif /* PAGE_ZIP_COMPRESS_DBG */
 #ifndef UNIV_HOTBACKUP
+		if (page_is_leaf(page)) {
+			dict_index_zip_failure(index);
+		}
+
+		ullint	time_diff = ut_time_us(NULL) - usec;
 		page_zip_stat[page_zip->ssize - 1].compressed_usec
-			+= ut_time_us(NULL) - usec;
+			+= time_diff;
+		if (cmp_per_index_enabled) {
+			mutex_enter(&page_zip_stat_per_index_mutex);
+			page_zip_stat_per_index[index->id].compressed_usec
+				+= time_diff;
+			mutex_exit(&page_zip_stat_per_index_mutex);
+		}
 #endif /* !UNIV_HOTBACKUP */
 		return(FALSE);
 	}
@@ -1469,11 +1508,18 @@ err_exit:
 	}
 #endif /* PAGE_ZIP_COMPRESS_DBG */
 #ifndef UNIV_HOTBACKUP
-	{
-		page_zip_stat_t*	zip_stat
-			= &page_zip_stat[page_zip->ssize - 1];
-		zip_stat->compressed_ok++;
-		zip_stat->compressed_usec += ut_time_us(NULL) - usec;
+	ullint	time_diff = ut_time_us(NULL) - usec;
+	page_zip_stat[page_zip->ssize - 1].compressed_ok++;
+	page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff;
+	if (cmp_per_index_enabled) {
+		mutex_enter(&page_zip_stat_per_index_mutex);
+		page_zip_stat_per_index[index->id].compressed_ok++;
+		page_zip_stat_per_index[index->id].compressed_usec += time_diff;
+		mutex_exit(&page_zip_stat_per_index_mutex);
+	}
+
+	if (page_is_leaf(page)) {
+		dict_index_zip_success(index);
 	}
 #endif /* !UNIV_HOTBACKUP */
 
@@ -1518,6 +1564,7 @@ page_zip_fields_free(
 {
 	if (index) {
 		dict_table_t*	table = index->table;
+		os_fast_mutex_free(&index->zip_pad.mutex);
 		mem_heap_free(index->heap);
 		mutex_free(&(table->autoinc_mutex));
 		ut_free(table->name);
@@ -3075,11 +3122,17 @@ err_exit:
 	page_zip_fields_free(index);
 	mem_heap_free(heap);
 #ifndef UNIV_HOTBACKUP
-	{
-		page_zip_stat_t*	zip_stat
-			= &page_zip_stat[page_zip->ssize - 1];
-		zip_stat->decompressed++;
-		zip_stat->decompressed_usec += ut_time_us(NULL) - usec;
+	ullint	time_diff = ut_time_us(NULL) - usec;
+	page_zip_stat[page_zip->ssize - 1].decompressed++;
+	page_zip_stat[page_zip->ssize - 1].decompressed_usec += time_diff;
+
+	index_id_t	index_id = btr_page_get_index_id(page);
+
+	if (srv_cmp_per_index_enabled) {
+		mutex_enter(&page_zip_stat_per_index_mutex);
+		page_zip_stat_per_index[index_id].decompressed++;
+		page_zip_stat_per_index[index_id].decompressed_usec += time_diff;
+		mutex_exit(&page_zip_stat_per_index_mutex);
 	}
 #endif /* !UNIV_HOTBACKUP */
 
@@ -3177,7 +3230,7 @@ page_zip_validate_low(
 	UNIV_MEM_ASSERT_RW() checks fail.  The v-bits of page[],
 	page_zip->data[] or page_zip could be viewed at temp_page[] or
 	temp_page_zip in a debugger when running valgrind --db-attach. */
-	VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE);
+	(void) VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE);
 	UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
 # if UNIV_WORD_SIZE == 4
 	VALGRIND_GET_VBITS(page_zip, &temp_page_zip, sizeof temp_page_zip);
@@ -3186,8 +3239,8 @@ page_zip_validate_low(
 	pad bytes. */
 	UNIV_MEM_ASSERT_RW(page_zip, sizeof *page_zip);
 # endif
-	VALGRIND_GET_VBITS(page_zip->data, temp_page,
-			   page_zip_get_size(page_zip));
+	(void) VALGRIND_GET_VBITS(page_zip->data, temp_page,
+				  page_zip_get_size(page_zip));
 	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 #endif /* UNIV_DEBUG_VALGRIND */
 
@@ -3295,7 +3348,7 @@ page_zip_validate_low(
 					       "record list: 0x%02x!=0x%02x\n",
 					       (unsigned) page_offset(rec),
 					       (unsigned) page_offset(trec)));
-		valid = FALSE;
+				valid = FALSE;
 				break;
 			}
 
@@ -4042,10 +4095,10 @@ static
 void
 page_zip_clear_rec(
 /*===============*/
-	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
-	byte*		rec,	/*!< in: record to clear */
-	dict_index_t*	index,	/*!< in: index of rec */
-	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
+	byte*		rec,		/*!< in: record to clear */
+	const dict_index_t*	index,	/*!< in: index of rec */
+	const ulint*	offsets)	/*!< in: rec_get_offsets(rec, index) */
 {
 	ulint	heap_no;
 	page_t*	page	= page_align(rec);
@@ -4256,11 +4309,12 @@ UNIV_INTERN
 void
 page_zip_dir_delete(
 /*================*/
-	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
-	byte*		rec,	/*!< in: record to delete */
-	dict_index_t*	index,	/*!< in: index of rec */
-	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
-	const byte*	free)	/*!< in: previous start of the free list */
+	page_zip_des_t*		page_zip,	/*!< in/out: compressed page */
+	byte*			rec,		/*!< in: deleted record */
+	const dict_index_t*	index,		/*!< in: index of rec */
+	const ulint*		offsets,	/*!< in: rec_get_offsets(rec) */
+	const byte*		free)		/*!< in: previous start of
+						the free list */
 {
 	byte*	slot_rec;
 	byte*	slot_free;
@@ -4576,7 +4630,8 @@ page_zip_reorganize(
 	/* Restore logging. */
 	mtr_set_log_mode(mtr, log_mode);
 
-	if (!page_zip_compress(page_zip, page, index, mtr)) {
+	if (!page_zip_compress(page_zip, page, index,
+			       page_compression_level, mtr)) {
 
 #ifndef UNIV_HOTBACKUP
 		buf_block_free(temp_block);
diff --git a/storage/innobase/pars/lexyy.cc b/storage/innobase/pars/lexyy.cc
index 9de8ea51efd..48ab04e1eff 100644
--- a/storage/innobase/pars/lexyy.cc
+++ b/storage/innobase/pars/lexyy.cc
@@ -35,7 +35,7 @@
 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 
 /* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
+ * if you want the limit (max/min) macros for int types. 
  */
 #ifndef __STDC_LIMIT_MACROS
 #define __STDC_LIMIT_MACROS 1
@@ -55,7 +55,6 @@ typedef int flex_int32_t;
 typedef unsigned char flex_uint8_t;
 typedef unsigned short int flex_uint16_t;
 typedef unsigned int flex_uint32_t;
-#endif /* ! C99 */
 
 /* Limits of integral types. */
 #ifndef INT8_MIN
@@ -86,6 +85,8 @@ typedef unsigned int flex_uint32_t;
 #define UINT32_MAX             (4294967295U)
 #endif
 
+#endif /* ! C99 */
+
 #endif /* ! FLEXINT_H */
 
 #ifdef __cplusplus
@@ -142,7 +143,15 @@ typedef unsigned int flex_uint32_t;
 
 /* Size of default input buffer. */
 #ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
 #define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
 #endif
 
 /* The state buf must be large enough to hold one state per character in the main buffer.
@@ -276,7 +285,7 @@ static yy_size_t yy_n_chars;		/* number of characters read into yy_ch_buf */
 yy_size_t yyleng;
 
 /* Points to current character in buffer. */
-static char *yy_c_buf_p = (char*) 0;
+static char *yy_c_buf_p = (char *) 0;
 static int yy_init = 0;		/* whether we need to initialize */
 static int yy_start = 0;	/* start state number */
 
@@ -338,7 +347,7 @@ void yyfree (void *  );
 
 typedef unsigned char YY_CHAR;
 
-FILE *yyin = (FILE*) 0, *yyout = (FILE*) 0;
+FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
 
 typedef int yy_state_type;
 
@@ -373,7 +382,7 @@ struct yy_trans_info
 	flex_int32_t yy_verify;
 	flex_int32_t yy_nxt;
 	};
-static yyconst flex_int16_t yy_accept[424] =
+static yyconst flex_int16_t yy_accept[425] =
     {   0,
         0,    0,  119,  119,    0,    0,    0,    0,  125,  123,
       122,  122,    8,  123,  114,    5,  103,  109,  112,  110,
@@ -382,46 +391,46 @@ static yyconst flex_int16_t yy_accept[424] =
        96,   96,   96,   96,   96,   96,   96,   96,   96,   96,
       115,  116,  119,  120,    6,    7,    9,   10,  122,    4,
        98,  118,    2,    1,    3,   99,  100,  102,  101,    0,
-       96,   96,   96,   96,   96,   96,   44,   96,   96,   96,
+       96,    0,   96,   96,   96,   96,   96,   44,   96,   96,
        96,   96,   96,   96,   96,   96,   96,   96,   96,   96,
-       96,   96,   96,   28,   17,   25,   96,   96,   96,   96,
+       96,   96,   96,   96,   28,   17,   25,   96,   96,   96,
 
-       96,   96,   54,   63,   96,   14,   96,   96,   96,   96,
-       96,   96,   96,   96,   96,   96,   96,   96,   96,   96,
-       96,   96,   96,   96,  119,  120,  120,  121,    6,    7,
-        9,   10,    2,    0,   97,   13,   45,   96,   96,   96,
-       96,   96,   96,   96,   96,   96,   96,   96,   96,   96,
-       96,   96,   96,   96,   96,   96,   27,   96,   96,   96,
-       41,   96,   96,   96,   96,   21,   96,   96,   96,   96,
-       96,   15,   96,   96,   96,   18,   96,   96,   96,   96,
-       96,   82,   96,   96,   96,   51,   96,   12,   96,   36,
+       96,   96,   96,   54,   63,   96,   14,   96,   96,   96,
        96,   96,   96,   96,   96,   96,   96,   96,   96,   96,
-
-       96,    0,   97,   96,   96,   96,   96,   20,   96,   24,
+       96,   96,   96,   96,   96,  119,  120,  120,  121,    6,
+        7,    9,   10,    2,    0,   97,   13,   45,   96,   96,
        96,   96,   96,   96,   96,   96,   96,   96,   96,   96,
-       96,   46,   96,   96,   30,   96,   89,   96,   96,   39,
-       96,   96,   96,   96,   96,   48,   96,   94,   91,   32,
-       93,   96,   11,   66,   96,   96,   96,   42,   96,   96,
-       96,   96,   96,   96,   96,   96,   96,   96,   29,   96,
-       96,   96,   96,   96,   96,   96,   96,   96,   87,    0,
-       96,   26,   96,   96,   96,   68,   96,   96,   96,   96,
-       37,   96,   96,   96,   96,   96,   96,   96,   31,   67,
-       23,   96,   59,   96,   77,   96,   96,   96,   43,   96,
-
-       96,   96,   96,   96,   96,   96,   96,   92,   96,   96,
-       56,   96,   96,   96,   96,   96,   96,   96,   40,   33,
-        0,   81,   95,   19,   96,   96,   85,   96,   76,   55,
-       96,   65,   96,   52,   96,   96,   96,   47,   96,   78,
-       96,   80,   96,   96,   34,   96,   96,   96,   35,   74,
-       96,   96,   96,   96,   60,   96,   50,   49,   96,   96,
-       96,   57,   53,   64,   96,   96,   96,   22,   96,   96,
-       75,   83,   96,   96,   79,   96,   70,   96,   96,   96,
-       96,   96,   38,   96,   90,   69,   96,   86,   96,   96,
-       96,   88,   96,   96,   61,   96,   16,   96,   72,   71,
-
-       96,   58,   96,   84,   96,   96,   96,   96,   96,   96,
-       96,   96,   96,   96,   73,   96,   96,   96,   96,   96,
-       96,   62,    0
+       96,   96,   96,   96,   96,   96,   96,   27,   96,   96,
+       96,   41,   96,   96,   96,   96,   21,   96,   96,   96,
+       96,   96,   15,   96,   96,   96,   18,   96,   96,   96,
+       96,   96,   82,   96,   96,   96,   51,   96,   12,   96,
+       36,   96,   96,   96,   96,   96,   96,   96,   96,   96,
+
+       96,   96,    0,   97,   96,   96,   96,   96,   20,   96,
+       24,   96,   96,   96,   96,   96,   96,   96,   96,   96,
+       96,   96,   46,   96,   96,   30,   96,   89,   96,   96,
+       39,   96,   96,   96,   96,   96,   48,   96,   94,   91,
+       32,   93,   96,   11,   66,   96,   96,   96,   42,   96,
+       96,   96,   96,   96,   96,   96,   96,   96,   96,   29,
+       96,   96,   96,   96,   96,   96,   96,   96,   96,   87,
+        0,   96,   26,   96,   96,   96,   68,   96,   96,   96,
+       96,   37,   96,   96,   96,   96,   96,   96,   96,   31,
+       67,   23,   96,   59,   96,   77,   96,   96,   96,   43,
+
+       96,   96,   96,   96,   96,   96,   96,   96,   92,   96,
+       96,   56,   96,   96,   96,   96,   96,   96,   96,   40,
+       33,    0,   81,   95,   19,   96,   96,   85,   96,   76,
+       55,   96,   65,   96,   52,   96,   96,   96,   47,   96,
+       78,   96,   80,   96,   96,   34,   96,   96,   96,   35,
+       74,   96,   96,   96,   96,   60,   96,   50,   49,   96,
+       96,   96,   57,   53,   64,   96,   96,   96,   22,   96,
+       96,   75,   83,   96,   96,   79,   96,   70,   96,   96,
+       96,   96,   96,   38,   96,   90,   69,   96,   86,   96,
+       96,   96,   88,   96,   96,   61,   96,   16,   96,   72,
+
+       71,   96,   58,   96,   84,   96,   96,   96,   96,   96,
+       96,   96,   96,   96,   96,   73,   96,   96,   96,   96,
+       96,   96,   62,    0
     } ;
 
 static yyconst flex_int32_t yy_ec[256] =
@@ -432,14 +441,14 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    2,    1,    4,    5,    6,    7,    1,    8,    9,
        10,   11,   12,   13,   14,   15,   16,   17,   17,   17,
        17,   17,   17,   17,   17,   17,   17,   18,   19,   20,
-       21,   22,   23,    1,   24,   25,   26,   27,   28,   29,
-       30,   31,   32,   33,   34,   35,   36,   37,   38,   39,
-       40,   41,   42,   43,   44,   45,   46,   47,   48,   49,
-        1,    1,    1,    1,   50,    1,   33,   33,   33,   33,
-
-       33,   33,   33,   33,   33,   33,   33,   51,   33,   33,
-       33,   33,   52,   33,   53,   33,   33,   33,   33,   33,
-       33,   33,   54,    1,   55,    1,    1,    1,    1,    1,
+       21,   22,   23,   24,   25,   26,   27,   28,   29,   30,
+       31,   32,   33,   34,   35,   36,   37,   38,   39,   40,
+       41,   42,   43,   44,   45,   46,   47,   48,   49,   50,
+        1,    1,    1,    1,   51,    1,   34,   34,   34,   34,
+
+       34,   34,   34,   34,   34,   34,   34,   52,   34,   34,
+       34,   34,   53,   34,   54,   34,   34,   34,   34,   34,
+       34,   34,   55,    1,   56,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
@@ -456,328 +465,438 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1
     } ;
 
-static yyconst flex_int32_t yy_meta[56] =
+static yyconst flex_int32_t yy_meta[57] =
     {   0,
         1,    1,    1,    2,    3,    1,    1,    4,    1,    1,
         5,    1,    1,    1,    1,    6,    7,    1,    1,    1,
-        8,    1,    1,    9,    9,    9,    9,    9,    9,    9,
+        8,    1,    1,    6,    9,    9,    9,    9,    9,    9,
         9,    9,    9,    9,    9,    9,    9,    9,    9,    9,
         9,    9,    9,    9,    9,    9,    9,    9,    9,    9,
-        9,    9,    9,    1,    1
+        9,    9,    9,    9,    1,    1
     } ;
 
-static yyconst flex_int16_t yy_base[436] =
+static yyconst flex_int16_t yy_base[438] =
     {   0,
-        0,    0,  849,  848,  850,  849,  852,  851,  854,  861,
-       54,   56,  861,    0,  861,  861,  861,  861,  861,  861,
-      861,  861,  838,  841,   45,  830,  861,   42,  861,  829,
-      861,   45,   49,   54,   58,   56,   72,  833,   83,   86,
-       63,   67,   90,   53,  105,  107,  106,  120,   51,  101,
-      861,  861,    0,   55,    0,  840,    0,  843,  106,    0,
-      861,  861,  829,   61,  824,  861,  861,  861,  861,  839,
-      827,   88,  124,  130,  132,  125,  826,  129,  133,  136,
-       52,  138,  148,  140,  142,  145,  149,  152,  151,  159,
-      162,  169,  165,  825,  172,  824,  173,  170,  175,  179,
-
-      176,  177,  823,  822,  180,  182,  184,  200,  201,  195,
-      189,  202,  204,  207,  205,  210,  218,  220,  213,  215,
-      223,  230,  238,  217,    0,  240,  244,  861,    0,  829,
-        0,  832,  818,  781,    0,  817,  816,  233,  237,  243,
-      248,  251,  246,  252,  255,  257,  258,  262,  264,  263,
-      265,  267,  266,  269,  273,  270,  815,  274,  275,  287,
-      814,  290,  292,  291,  293,  294,  297,  300,  304,  298,
-      307,  313,  308,  309,  317,  813,  314,  315,  323,  318,
-      324,  328,  331,  332,  333,  812,  336,  811,  338,  810,
-      340,  339,  342,  344,  343,  341,  347,  346,  348,  349,
-
-      359,  773,    0,  356,  369,  370,  360,  808,  371,  807,
-      372,  375,  376,  378,  379,  380,  382,  383,  388,  393,
-      394,  806,  396,  397,  805,  398,  804,  399,  400,  803,
-      403,  404,  408,  413,  405,  802,  415,  801,  800,  799,
-      798,  406,  797,  796,  416,  417,  420,  795,  422,  418,
-      423,  425,  424,  426,  439,  429,  437,  440,  794,  446,
-      450,  453,  454,  455,  457,  458,  459,  460,  793,  757,
-      461,  791,  463,  464,  466,  790,  467,  468,  473,  474,
-      789,  475,  476,  477,  478,  480,  485,  486,  788,  787,
-      786,  489,  785,  491,  784,  498,  493,  494,  783,  499,
-
-      504,  509,  511,  513,  516,  514,  517,  782,  520,  519,
-      781,  521,  523,  527,  525,  528,  526,  529,  780,  779,
-      780,  776,  773,  530,  533,  535,  772,  534,  771,  770,
-      541,  769,  550,  760,  543,  548,  551,  753,  552,  736,
-      554,  730,  556,  557,  723,  558,  566,  563,  693,  692,
-      569,  572,  565,  578,  691,  574,  690,  689,  567,  585,
-      588,  688,  687,  685,  571,  589,  591,  683,  592,  593,
-      681,  680,  595,  596,  679,  597,  678,  599,  604,  602,
-      605,  608,  676,  606,  675,  674,  609,  673,  607,  610,
-      614,  670,  620,  623,  668,  628,  667,  630,  665,  664,
-
-      625,  663,  629,  112,  627,  626,  631,  632,  647,  633,
-      636,  637,  644,  650,  110,  652,  659,  657,  660,  661,
-      662,   57,  861,  710,  719,  728,  731,  734,  738,  747,
-      756,  765,  774,  781,  784
+        0,    0,  293,  287,  284,  281,  272,  256,  254, 1357,
+       55,   57, 1357,    0, 1357, 1357, 1357, 1357, 1357, 1357,
+     1357, 1357,  238,  227,   46,  205, 1357,   43, 1357,  203,
+     1357,   46,   50,   56,   52,   66,   64,   51,   81,   92,
+       91,   94,   96,  111,  113,  116,  130,  134,   53,  143,
+     1357, 1357,    0,  106,    0,  212,    0,  210,  141,    0,
+     1357, 1357,  192,   56,  173, 1357, 1357, 1357, 1357,  168,
+      140,  150,  152,  154,  155,  161,  167,  171,  177,  172,
+      184,  174,  188,  189,  191,  194,  203,  212,  215,  217,
+      219,  221,  226,  228,  231,  240,  233,  235,  246,  251,
+
+      258,  253,  255,  256,  269,  271,  278,  272,  285,  283,
+      287,  289,  296,  305,  298,  315,  319,  321,  322,  326,
+      332,  333,  342,  339,  343,    0,  112,  173, 1357,    0,
+      155,    0,  156,  132,   93,    0,  355,  357,  358,  360,
+      364,  367,  374,  370,  379,  380,  389,  383,  390,  392,
+      395,  408,  411,  409,  415,  418,  425,  427,  429,  436,
+      431,  441,  446,  448,  450,  452,  453,  462,  471,  464,
+      473,  474,  478,  485,  488,  490,  491,  494,  500,  501,
+      504,  506,  507,  517,  518,  519,  520,  521,  522,  523,
+      533,  536,  538,  543,  549,  554,  555,  561,  556,  566,
+
+      567,  576,   60,    0,  573,  578,  580,  582,  583,  593,
+      589,  596,  598,  603,  605,  607,  610,  617,  619,  621,
+      622,  628,  633,  634,  635,  639,  640,  649,  650,  652,
+      653,  655,  659,  664,  668,  669,  665,  671,  674,  678,
+      681,  685,  687,  688,  692,  697,  698,  701,  703,  704,
+      707,  708,  717,  713,  728,  730,  724,  740,  734,  745,
+      746,  750,  751,  756,  757,  760,  761,  762,  771,  773,
+       42,  778,  782,  783,  787,  789,  792,  794,  793,  804,
+      805,  808,  809,  810,  819,  823,  826,  828,  829,  830,
+      835,  840,  844,  846,  847,  856,  857,  858,  859,  860,
+
+      863,  872,  873,  878,  879,  882,  885,  889,  894,  895,
+      896,  898,  905,  910,  908,  912,  914,  915,  926,  930,
+      931,   73,  932,  933,  935,  937,  942,  944,  946,  947,
+      948,  949,  951,  958,  961,  965,  967,  972,  978,  979,
+      981,  984,  983,  985,  994,  988,  999, 1000, 1001, 1004,
+     1013, 1015, 1022, 1016, 1019, 1026, 1032, 1033, 1035, 1036,
+     1038, 1039, 1048, 1049, 1050, 1051, 1053, 1054, 1060, 1063,
+     1065, 1066, 1069, 1070, 1072, 1082, 1084, 1085, 1087, 1096,
+     1097, 1098, 1099, 1101, 1113, 1114, 1115, 1116, 1117, 1118,
+     1119, 1128, 1130, 1131, 1134, 1133, 1135, 1137, 1150, 1151,
+
+     1153, 1155, 1157, 1162, 1160, 1167, 1172, 1173, 1174, 1176,
+     1185, 1190, 1183, 1187, 1189, 1199, 1204, 1206, 1208, 1210,
+     1215, 1220, 1222, 1357, 1269, 1278, 1287, 1290, 1293, 1297,
+     1306, 1315, 1324, 1333, 1340, 1344, 1347
     } ;
 
-static yyconst flex_int16_t yy_def[436] =
+static yyconst flex_int16_t yy_def[438] =
     {   0,
-      423,    1,  424,  424,  425,  425,  426,  426,  423,  423,
-      423,  423,  423,  427,  423,  423,  423,  423,  423,  423,
-      423,  423,  423,  423,  423,  428,  423,  423,  423,  423,
-      423,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      423,  423,  430,  431,  432,  423,  433,  423,  423,  427,
-      423,  423,  423,  423,  428,  423,  423,  423,  423,  434,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  430,  431,  431,  423,  432,  423,
-      433,  423,  423,  423,  435,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-
-      429,  423,  435,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  423,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      423,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,  429,  429,  429,  429,  429,  429,  429,  429,
-      429,  429,    0,  423,  423,  423,  423,  423,  423,  423,
-      423,  423,  423,  423,  423
+      424,    1,  425,  425,  426,  426,  427,  427,  424,  424,
+      424,  424,  424,  428,  424,  424,  424,  424,  424,  424,
+      424,  424,  424,  424,  424,  429,  424,  424,  424,  424,
+      424,  430,  430,  430,  430,  430,   34,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      424,  424,  431,  432,  433,  424,  434,  424,  424,  428,
+      424,  424,  424,  424,  429,  424,  424,  424,  424,  435,
+      430,  436,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  431,  432,  432,  424,  433,
+      424,  434,  424,  424,  424,  437,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+
+      430,  430,  424,  437,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      424,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  424,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
+      430,  430,  430,    0,  424,  424,  424,  424,  424,  424,
+      424,  424,  424,  424,  424,  424,  424
     } ;
 
-static yyconst flex_int16_t yy_nxt[917] =
+static yyconst flex_int16_t yy_nxt[1414] =
     {   0,
        10,   11,   12,   13,   10,   14,   15,   16,   17,   18,
        19,   20,   21,   22,   23,   24,   25,   26,   27,   28,
-       29,   30,   31,   32,   33,   34,   35,   36,   37,   38,
-       38,   39,   38,   38,   40,   41,   42,   43,   44,   38,
-       45,   46,   47,   48,   49,   50,   38,   38,   38,   38,
-       38,   38,   38,   51,   52,   59,   59,   59,   59,   63,
-       70,   64,   67,   68,   70,  127,   70,   70,   70,   70,
-      128,   70,   70,   70,  122,   63,   74,   64,   70,  149,
-       75,   72,   70,   76,   78,   83,   73,   70,   79,   84,
-       86,   80,   87,  108,   81,   85,   77,   82,   70,   89,
-
-      100,   70,   88,   70,  101,   70,   90,   59,   59,   91,
-      102,   94,   92,   97,  136,   93,   70,   98,  103,   95,
-       70,   70,   70,   99,   96,   70,  104,   70,  105,  117,
-      106,  123,  109,  107,  112,   70,  118,  113,  124,   70,
-       70,  110,  111,  119,   70,   70,  114,   70,   70,  137,
-      115,   70,  143,   70,  116,   70,  120,   70,  121,  139,
-       70,  140,  142,   70,   70,  138,   70,   70,  141,  155,
-      144,  146,  147,  151,   70,  157,  145,   70,  150,  148,
-       70,  154,  152,  158,   70,   70,  156,   70,   70,  153,
-       70,   70,   70,  159,   70,   70,  160,   70,  164,   70,
-
-      169,  163,  161,  168,   70,  171,  162,  174,  175,  167,
-       70,  173,  170,  165,  166,   70,   70,   70,  172,   70,
-       70,  182,   70,  183,  179,   70,  176,  187,   70,  189,
-       70,  177,   70,   70,  184,   70,  185,  178,   70,  180,
-      190,  188,  192,  181,  186,   70,  195,  193,   70,  197,
-      423,  191,   70,   70,  127,  423,  196,  201,   70,  128,
-      204,   70,  194,   70,  198,  199,   70,   70,  205,  200,
-       70,  207,   70,   70,  206,  208,  209,   70,   70,   70,
-       70,   70,   70,  215,   70,   70,  210,  217,   70,   70,
-       70,  222,  213,  211,  221,  214,  212,  225,  216,  220,
-
-      228,  226,   70,  218,  219,   70,   70,   70,   70,   70,
-      229,  223,   70,   70,  224,   70,  227,  231,  232,   70,
-      233,  235,   70,   70,   70,  230,  237,  238,   70,   70,
-       70,  236,   70,   70,  241,  234,  240,  239,   70,   70,
-      247,  242,  243,   70,  245,  244,   70,   70,   70,  248,
-      246,   70,  249,   70,   70,   70,   70,   70,   70,   70,
-      254,   70,   70,   70,   70,  252,  257,  250,  260,  261,
-      265,   70,  264,  258,   70,   70,  255,  251,  259,  256,
-      262,  253,  263,  268,   70,   70,   70,   70,  267,  266,
-       70,   70,  269,   70,   70,   70,  271,   70,   70,  276,
-
-      274,  279,  280,   70,  275,  272,  273,  278,   70,   70,
-      283,   70,   70,   70,   70,   70,  285,  277,   70,   70,
-       70,   70,  281,   70,  282,  284,  289,  287,   70,  290,
-       70,   70,   70,   70,  296,   70,  286,   70,   70,   70,
-       70,   70,  291,  298,   70,  292,  288,  301,  294,  305,
-      293,  307,   70,  295,   70,   70,  299,  297,  303,  300,
-      310,   70,  306,  302,  304,   70,  308,  311,   70,   70,
-       70,  309,   70,   70,   70,   70,   70,  312,   70,   70,
-      313,   70,   70,   70,  316,  318,  319,  320,   70,   70,
-       70,   70,   70,   70,  326,   70,  314,  315,  328,  317,
-
-       70,   70,  330,  322,   70,  323,   70,  334,   70,   70,
-      327,  324,  331,   70,   70,  325,  329,  332,  333,   70,
-      337,  335,  336,  340,   70,  339,   70,  342,   70,   70,
-      343,   70,   70,  338,   70,   70,   70,  341,   70,  347,
-       70,   70,   70,   70,   70,   70,  353,  345,   70,   70,
-       70,  344,  355,  357,  348,  346,   70,  352,   70,  349,
-      350,  351,  354,   70,  356,   70,   70,   70,  365,   70,
-      358,   70,   70,   70,  360,  361,  362,  364,   70,  359,
-       70,   70,   70,  363,   70,  366,   70,   70,  367,   70,
-      369,  373,  368,   70,  374,  376,  375,  371,  372,  370,
-
-       70,  379,  378,   70,   70,  377,   70,   70,   70,  380,
-       70,   70,   70,  383,   70,  382,  381,   70,  386,   70,
-       70,   70,   70,   70,   70,   70,  391,  385,  388,   70,
-      392,  384,  389,  387,  395,   70,  397,  390,   70,  393,
-       70,   70,   70,   70,   70,   70,   70,   70,   70,  398,
-      402,   70,   70,  394,  400,  396,  403,  399,  404,   70,
-      406,  405,   70,  413,  412,   70,  409,   70,  408,  401,
-      407,  411,   70,  414,   70,   70,   70,   70,   70,   70,
-       70,  410,   70,   70,  415,   70,  418,  417,   70,   70,
-       70,   70,  419,   70,   70,   70,   70,  420,   70,  416,
-
-       70,  421,   70,   70,   70,   70,   70,   70,   70,  422,
-       53,   53,   53,   53,   53,   53,   53,   53,   53,   55,
-       55,   55,   55,   55,   55,   55,   55,   55,   57,   57,
-       57,   57,   57,   57,   57,   57,   57,   60,   70,   60,
-       65,   65,   65,   71,   71,   70,   71,  125,  125,  125,
-      125,   70,  125,  125,  125,  125,  126,  126,  126,  126,
-      126,  126,  126,  126,  126,  129,  129,  129,   70,  129,
-      129,  129,  129,  129,  131,   70,  131,  131,  131,  131,
-      131,  131,  131,  135,   70,   70,   70,   70,   70,  135,
-      203,   70,  203,  135,   70,   70,   70,   70,   70,   70,
-
-       70,   70,   70,   70,   70,   70,   70,  321,   70,   70,
-       70,   70,   70,   70,   70,   70,   70,   70,   70,   70,
-       70,   70,   70,   70,  270,   70,   70,   70,   70,   70,
-       70,   70,   70,  202,  133,  132,  130,   70,   70,   70,
-       70,   70,   70,  134,  423,  133,  132,  130,   70,   69,
-       66,   62,   61,  423,   58,   58,   56,   56,   54,   54,
-        9,  423,  423,  423,  423,  423,  423,  423,  423,  423,
-      423,  423,  423,  423,  423,  423,  423,  423,  423,  423,
-      423,  423,  423,  423,  423,  423,  423,  423,  423,  423,
-      423,  423,  423,  423,  423,  423,  423,  423,  423,  423,
-
-      423,  423,  423,  423,  423,  423,  423,  423,  423,  423,
-      423,  423,  423,  423,  423,  423
+       29,   30,   31,   10,   32,   33,   34,   35,   36,   37,
+       38,   38,   39,   38,   38,   40,   41,   42,   43,   44,
+       38,   45,   46,   47,   48,   49,   50,   38,   38,   38,
+       38,   38,   38,   38,   51,   52,   59,   59,   59,   59,
+       63,   70,   64,   67,   68,   70,   70,   70,   70,   72,
+       63,   70,   64,   72,   72,   72,   72,  123,   75,   72,
+       84,   70,   76,   73,   85,   77,  136,   79,   74,   72,
+       86,   80,   90,  322,   81,   71,   70,   82,   78,   91,
+
+       83,   87,   92,   88,   72,   93,   70,   70,   94,   70,
+       95,   70,  271,   89,   72,   72,  128,   72,   96,   72,
+       98,  129,  424,   97,   99,  104,   70,  424,   70,  101,
+      100,   70,  102,  105,   72,  106,   72,  107,  103,   72,
+      108,  110,   59,   59,  113,   70,  203,  114,  134,   70,
+      111,  112,  109,   72,  118,   70,  115,   72,   70,  133,
+      116,  119,  131,   72,  117,   70,   72,   70,  120,   70,
+       70,  121,  135,  122,  124,   72,   70,   72,   72,  137,
+      138,  125,   70,  128,   72,  140,   70,   70,  129,   70,
+       72,  141,   70,  424,   72,   72,  139,   72,  142,   70,
+
+       72,  144,  150,   70,   70,  143,   70,   72,  134,   70,
+      145,   72,   72,  133,   72,  152,  146,   72,   70,  131,
+      147,  148,  156,   69,  153,   66,   72,   70,  149,  151,
+       70,  154,   70,  155,   70,   72,   70,   62,   72,  158,
+       72,   70,   72,   70,   72,  157,   70,  159,   70,   72,
+       70,   72,   61,  424,   72,   70,   72,  161,   72,   58,
+      160,   70,  162,   72,  163,  164,   70,  165,   70,   72,
+       70,   70,  168,   70,   72,   58,   72,  170,   72,   72,
+      169,   72,  166,  167,   70,  172,   70,   70,   56,  171,
+      174,   56,   72,   70,   72,   72,  173,   54,   70,  175,
+
+       70,   72,   70,   54,   70,  176,   72,  180,   72,  424,
+       72,   70,   72,   70,  183,  177,  424,  178,  424,   72,
+       70,   72,  181,  179,  184,  424,  182,  424,   72,  188,
+       70,  186,  424,  189,   70,  185,   70,   70,   72,  187,
+      190,   70,   72,  424,   72,   72,  193,   70,   70,   72,
+      194,  191,  424,  424,   70,   72,   72,   70,   70,  424,
+      198,  192,   72,  424,  196,   72,   72,  200,  424,  424,
+       70,  201,   70,   70,  197,   70,  195,  199,   72,   70,
+       72,   72,   70,   72,  202,   70,  205,   72,  424,   70,
+       72,  208,  206,   72,   70,   70,  207,   72,   70,  209,
+
+      210,  424,   72,   72,   70,   70,   72,   70,  424,  216,
+       70,  211,   72,   72,  424,   72,  218,  424,   72,  424,
+      424,  212,  213,   70,   70,  214,   70,  217,  215,  424,
+       70,   72,   72,   70,   72,  223,  219,  220,   72,  222,
+       70,   72,   70,  221,   70,  424,   70,  424,   72,  424,
+       72,   70,   72,  226,   72,  230,   70,  227,  224,   72,
+      225,   70,  229,   70,   72,   70,  424,   70,   70,   72,
+      424,   72,  228,   72,  232,   72,   72,   70,  233,   70,
+      234,  236,  231,  424,  424,   72,   70,   72,   70,   70,
+      424,  237,  238,   70,   72,  235,   72,   72,  240,  239,
+
+       70,   72,  242,   70,  424,   70,   70,  243,   72,   70,
+      424,   72,  241,   72,   72,   70,   70,   72,  246,   70,
+      244,   70,   70,   72,   72,  245,  248,   72,  249,   72,
+       72,  247,   70,   70,   70,   70,   70,   70,   70,  250,
+       72,   72,   72,   72,   72,   72,   72,  255,   70,  424,
+      251,   70,  253,   70,  424,  424,   72,  252,   70,   72,
+      424,   72,  256,  258,   70,  257,   72,  424,  254,   70,
+       70,   70,   72,  259,  261,  262,   70,   72,   72,   72,
+      260,   70,   70,  424,   72,  266,  263,  265,   70,   72,
+       72,   70,  424,   70,  264,   70,   72,   70,   70,   72,
+
+      267,   72,  269,   72,   70,   72,   72,  268,   70,  424,
+      270,   70,   72,   70,  272,  273,   72,  274,   70,   72,
+       70,   72,   70,  275,  277,   70,   72,  276,   72,  280,
+       72,  281,   70,   72,   70,  279,   70,   70,  424,  424,
+       72,  278,   72,   70,   72,   72,  286,  284,   70,   70,
+       70,   72,  424,  282,   70,   70,   72,   72,   72,  285,
+      283,  424,   72,   72,   70,   70,  288,   70,   70,  290,
+       70,  287,   72,   72,   70,   72,   72,  424,   72,   70,
+       70,  291,   72,   70,   70,  289,   70,   72,   72,   70,
+      424,   72,   72,   70,   72,  292,   70,   72,  293,  297,
+
+       70,   72,   70,   70,   72,  295,  294,   70,   72,  296,
+       72,   72,   70,   70,  298,   72,   70,  424,   70,   70,
+       72,   72,   70,   70,   72,  299,   72,   72,   70,  302,
+       72,   72,   70,  424,  424,  424,   72,  424,  300,   70,
+       72,  301,  306,   70,  424,   70,  303,   72,  304,   70,
+      305,   72,  307,   72,  308,   70,  424,   72,  309,  424,
+       70,   70,  312,   72,  311,   70,   70,  310,   72,   72,
+      424,   70,   70,   72,   72,   70,   70,   70,  313,   72,
+       72,  314,  424,   72,   72,   72,   70,  317,   70,  319,
+      320,  424,  424,   70,   72,  315,   72,   70,   70,  321,
+
+      316,   72,   70,  318,   70,   72,   72,   70,   70,   70,
+       72,  424,   72,  424,  424,   72,   72,   72,  424,   70,
+       70,  323,  327,   70,   70,   70,  324,   72,   72,  424,
+      329,   72,   72,   72,   70,  325,  328,  331,   70,  326,
+      424,   70,   72,   70,   70,   70,   72,  332,  330,   72,
+       70,   72,   72,   72,  335,   70,  424,  424,   72,   70,
+      333,   70,   70,   72,  334,  336,  337,   72,  424,   72,
+       72,   70,   70,   70,   70,   70,  338,  424,   70,   72,
+       72,   72,   72,   72,  424,  340,   72,   70,   70,  341,
+      339,  424,  343,   70,   70,   72,   72,   70,  424,  344,
+
+       70,   72,   72,  342,   70,   72,  348,  424,   72,   70,
+       70,   70,   72,   70,  424,  346,  345,   72,   72,   72,
+       70,   72,  347,   70,  424,   70,  349,   70,   72,   70,
+       70,   72,  350,   72,  354,   72,  351,   72,   72,  352,
+      356,   70,  353,  358,  355,   70,   70,   70,   70,   72,
+       70,  357,   70,   72,   72,   72,   72,   70,   72,   70,
+       72,   70,   70,   70,   70,   72,   70,   72,  359,   72,
+       72,   72,   72,   70,   72,  424,   70,  424,  424,  361,
+       70,   72,   70,  362,   72,  360,  365,   70,   72,  363,
+       72,  366,  364,   70,   70,   72,   70,  424,   70,   70,
+
+       70,   72,   72,   70,   72,  367,   72,   72,   72,   70,
+      368,   72,  424,  424,   70,   70,   70,   72,  424,   70,
+      369,  370,   72,   72,   72,  424,  374,   72,   70,  371,
+       70,   70,  424,  375,   70,  372,   72,   70,   72,   72,
+      373,   70,   72,  376,  379,   72,  377,   70,   70,   72,
+       70,   70,  424,   70,   70,   72,   72,  378,   72,   72,
+      380,   72,   72,   70,   70,   70,   70,  383,   70,   70,
+      382,   72,   72,   72,   72,   70,   72,   72,   70,  381,
+       70,   70,  424,   72,   70,   70,   72,   70,   72,   72,
+      387,  386,   72,   72,  384,   72,  385,   70,  424,   70,
+
+       70,  424,   70,  424,  389,   72,  388,   72,   72,  390,
+       72,   70,   70,   70,   70,  392,   70,  424,  424,   72,
+       72,   72,   72,  393,   72,  391,  396,  424,   70,   70,
+       70,   70,   70,   70,   70,  394,   72,   72,   72,   72,
+       72,   72,   72,   70,  398,   70,   70,  395,   70,   70,
+       70,   72,   70,   72,   72,  424,   72,   72,   72,  424,
+       72,  399,  403,  397,  404,   70,   70,  400,   70,  401,
+       70,  424,   70,   72,   72,   70,   72,   70,   72,  405,
+       72,  402,   70,   72,  424,   72,  424,   70,   70,   70,
+       72,   70,  406,  424,  407,   72,   72,   72,   70,   72,
+
+       70,  412,   70,  424,   70,   70,   72,  424,   72,  410,
+       72,  408,   72,   72,   70,  409,  424,  413,  414,   70,
+      415,   70,   72,   70,  411,   70,  424,   72,  416,   72,
+       70,   72,  424,   72,  419,   70,  424,   70,   72,  417,
+      418,  424,  424,   72,  420,   72,  424,  424,  421,  424,
+      424,  424,  424,  424,  424,  424,  422,  424,  424,  424,
+      424,  424,  424,  424,  424,  424,  424,  424,  423,   53,
+       53,   53,   53,   53,   53,   53,   53,   53,   55,   55,
+       55,   55,   55,   55,   55,   55,   55,   57,   57,   57,
+       57,   57,   57,   57,   57,   57,   60,  424,   60,   65,
+
+       65,   65,   71,   71,  424,   71,  126,  126,  126,  126,
+      424,  126,  126,  126,  126,  127,  127,  127,  127,  127,
+      127,  127,  127,  127,  130,  130,  130,  424,  130,  130,
+      130,  130,  130,  132,  424,  132,  132,  132,  132,  132,
+      132,  132,  136,  424,  424,  424,  424,  424,  136,   72,
+       72,  424,   72,  204,  424,  204,    9,  424,  424,  424,
+      424,  424,  424,  424,  424,  424,  424,  424,  424,  424,
+      424,  424,  424,  424,  424,  424,  424,  424,  424,  424,
+      424,  424,  424,  424,  424,  424,  424,  424,  424,  424,
+      424,  424,  424,  424,  424,  424,  424,  424,  424,  424,
+
+      424,  424,  424,  424,  424,  424,  424,  424,  424,  424,
+      424,  424,  424
     } ;
 
-static yyconst flex_int16_t yy_chk[917] =
+static yyconst flex_int16_t yy_chk[1414] =
     {   0,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,   11,   11,   12,   12,   25,
-       32,   25,   28,   28,   33,   54,   49,   81,   44,   34,
-       54,   36,  422,   35,   49,   64,   33,   64,   41,   81,
-       33,   32,   42,   33,   34,   35,   32,   37,   34,   35,
-       36,   34,   36,   44,   34,   35,   33,   34,   39,   37,
-
-       41,   40,   36,   72,   42,   43,   37,   59,   59,   37,
-       42,   39,   37,   40,   72,   37,   50,   40,   43,   39,
-       45,   47,   46,   40,   39,  415,   43,  404,   43,   47,
-       43,   50,   45,   43,   46,   48,   47,   46,   50,   73,
-       76,   45,   45,   47,   78,   74,   46,   75,   79,   73,
-       46,   80,   78,   82,   46,   84,   48,   85,   48,   74,
-       86,   75,   76,   83,   87,   73,   89,   88,   75,   85,
-       79,   80,   80,   83,   90,   87,   79,   91,   82,   80,
-       93,   84,   83,   88,   92,   98,   86,   95,   97,   83,
-       99,  101,  102,   89,  100,  105,   90,  106,   95,  107,
-
-       99,   93,   91,   98,  111,  100,   92,  105,  106,   97,
-      110,  102,   99,   95,   95,  108,  109,  112,  101,  113,
-      115,  110,  114,  111,  109,  116,  107,  113,  119,  115,
-      120,  108,  124,  117,  111,  118,  112,  108,  121,  109,
-      115,  114,  117,  109,  112,  122,  120,  118,  138,  121,
-      126,  116,  139,  123,  127,  126,  120,  124,  140,  127,
-      138,  143,  119,  141,  122,  123,  142,  144,  139,  123,
-      145,  141,  146,  147,  140,  142,  142,  148,  150,  149,
-      151,  153,  152,  147,  154,  156,  143,  149,  155,  158,
-      159,  153,  146,  144,  152,  146,  145,  156,  148,  151,
-
-      159,  156,  160,  150,  150,  162,  164,  163,  165,  166,
-      160,  154,  167,  170,  155,  168,  158,  163,  164,  169,
-      165,  166,  171,  173,  174,  162,  167,  168,  172,  177,
-      178,  166,  175,  180,  171,  165,  170,  169,  179,  181,
-      178,  172,  173,  182,  175,  174,  183,  184,  185,  179,
-      177,  187,  180,  189,  192,  191,  196,  193,  195,  194,
-      185,  198,  197,  199,  200,  183,  191,  181,  194,  194,
-      197,  204,  196,  192,  201,  207,  187,  182,  193,  189,
-      194,  184,  195,  200,  205,  206,  209,  211,  199,  198,
-      212,  213,  201,  214,  215,  216,  204,  217,  218,  211,
-
-      207,  214,  215,  219,  209,  205,  206,  213,  220,  221,
-      218,  223,  224,  226,  228,  229,  220,  212,  231,  232,
-      235,  242,  216,  233,  217,  219,  226,  223,  234,  228,
-      237,  245,  246,  250,  235,  247,  221,  249,  251,  253,
-      252,  254,  229,  242,  256,  231,  224,  247,  233,  252,
-      232,  254,  257,  234,  255,  258,  245,  237,  250,  246,
-      257,  260,  253,  249,  251,  261,  255,  258,  262,  263,
-      264,  256,  265,  266,  267,  268,  271,  260,  273,  274,
-      261,  275,  277,  278,  264,  266,  267,  268,  279,  280,
-      282,  283,  284,  285,  277,  286,  262,  263,  279,  265,
-
-      287,  288,  282,  271,  292,  273,  294,  286,  297,  298,
-      278,  274,  283,  296,  300,  275,  280,  284,  285,  301,
-      292,  287,  288,  297,  302,  296,  303,  300,  304,  306,
-      301,  305,  307,  294,  310,  309,  312,  298,  313,  305,
-      315,  317,  314,  316,  318,  324,  313,  303,  325,  328,
-      326,  302,  315,  317,  306,  304,  331,  312,  335,  307,
-      309,  310,  314,  336,  316,  333,  337,  339,  335,  341,
-      318,  343,  344,  346,  325,  326,  328,  333,  348,  324,
-      353,  347,  359,  331,  351,  336,  365,  352,  337,  356,
-      341,  347,  339,  354,  348,  352,  351,  344,  346,  343,
-
-      360,  356,  354,  361,  366,  353,  367,  369,  370,  359,
-      373,  374,  376,  365,  378,  361,  360,  380,  369,  379,
-      381,  384,  389,  382,  387,  390,  378,  367,  373,  391,
-      379,  366,  374,  370,  382,  393,  387,  376,  394,  380,
-      401,  406,  405,  396,  403,  398,  407,  408,  410,  389,
-      394,  411,  412,  381,  391,  384,  396,  390,  398,  413,
-      403,  401,  409,  411,  410,  414,  407,  416,  406,  393,
-      405,  409,  418,  412,  417,  419,  420,  421,  402,  400,
-      399,  408,  397,  395,  413,  392,  417,  416,  388,  386,
-      385,  383,  418,  377,  375,  372,  371,  419,  368,  414,
-
-      364,  420,  363,  362,  358,  357,  355,  350,  349,  421,
-      424,  424,  424,  424,  424,  424,  424,  424,  424,  425,
+        1,    1,    1,    1,    1,    1,   11,   11,   12,   12,
+       25,   32,   25,   28,   28,   33,   38,   35,   49,   32,
+       64,   34,   64,   33,   38,   35,   49,   49,   33,   34,
+       35,   36,   33,   32,   35,   33,  322,   34,   32,   36,
+       35,   34,   37,  271,   34,   37,   39,   34,   33,   37,
+
+       34,   36,   37,   36,   39,   37,   41,   40,   37,   42,
+       39,   43,  203,   36,   41,   40,   54,   42,   39,   43,
+       40,   54,  127,   39,   40,   43,   44,  127,   45,   41,
+       40,   46,   42,   43,   44,   43,   45,   43,   42,   46,
+       43,   45,   59,   59,   46,   47,  135,   46,  134,   48,
+       45,   45,   44,   47,   47,   71,   46,   48,   50,  133,
+       46,   47,  131,   71,   46,   72,   50,   73,   47,   74,
+       75,   48,   70,   48,   50,   73,   76,   74,   75,   73,
+       74,   50,   77,  128,   76,   75,   78,   80,  128,   82,
+       77,   76,   79,   65,   78,   80,   74,   82,   76,   81,
+
+       79,   79,   82,   83,   84,   77,   85,   81,   63,   86,
+       80,   83,   84,   58,   85,   84,   80,   86,   87,   56,
+       81,   81,   86,   30,   84,   26,   87,   88,   81,   83,
+       89,   84,   90,   85,   91,   88,   92,   24,   89,   88,
+       90,   93,   91,   94,   92,   87,   95,   89,   97,   93,
+       98,   94,   23,    9,   95,   96,   97,   91,   98,    8,
+       90,   99,   92,   96,   93,   94,  100,   96,  102,   99,
+      103,  104,   98,  101,  100,    7,  102,  100,  103,  104,
+       99,  101,   96,   96,  105,  101,  106,  108,    6,  100,
+      103,    5,  105,  107,  106,  108,  102,    4,  110,  106,
+
+      109,  107,  111,    3,  112,  107,  110,  110,  109,    0,
+      111,  113,  112,  115,  111,  108,    0,  109,    0,  113,
+      114,  115,  110,  109,  112,    0,  110,    0,  114,  114,
+      116,  113,    0,  115,  117,  112,  118,  119,  116,  113,
+      116,  120,  117,    0,  118,  119,  118,  121,  122,  120,
+      119,  116,    0,    0,  124,  121,  122,  123,  125,    0,
+      122,  117,  124,    0,  121,  123,  125,  124,    0,    0,
+      137,  124,  138,  139,  121,  140,  120,  123,  137,  141,
+      138,  139,  142,  140,  125,  144,  139,  141,    0,  143,
+      142,  142,  140,  144,  145,  146,  141,  143,  148,  143,
+
+      143,    0,  145,  146,  147,  149,  148,  150,    0,  148,
+      151,  144,  147,  149,    0,  150,  150,    0,  151,    0,
+        0,  145,  146,  152,  154,  147,  153,  149,  147,    0,
+      155,  152,  154,  156,  153,  154,  151,  151,  155,  153,
+      157,  156,  158,  152,  159,    0,  161,    0,  157,    0,
+      158,  160,  159,  157,  161,  161,  162,  157,  155,  160,
+      156,  163,  160,  164,  162,  165,    0,  166,  167,  163,
+        0,  164,  159,  165,  164,  166,  167,  168,  165,  170,
+      166,  167,  163,    0,    0,  168,  169,  170,  171,  172,
+        0,  167,  168,  173,  169,  166,  171,  172,  170,  169,
+
+      174,  173,  172,  175,    0,  176,  177,  173,  174,  178,
+        0,  175,  171,  176,  177,  179,  180,  178,  176,  181,
+      174,  182,  183,  179,  180,  175,  179,  181,  180,  182,
+      183,  178,  184,  185,  186,  187,  188,  189,  190,  181,
+      184,  185,  186,  187,  188,  189,  190,  186,  191,    0,
+      182,  192,  184,  193,    0,    0,  191,  183,  194,  192,
+        0,  193,  188,  192,  195,  190,  194,    0,  185,  196,
+      197,  199,  195,  193,  195,  195,  198,  196,  197,  199,
+      194,  200,  201,    0,  198,  198,  195,  197,  205,  200,
+      201,  202,    0,  206,  196,  207,  205,  208,  209,  202,
+
+      199,  206,  201,  207,  211,  208,  209,  200,  210,    0,
+      202,  212,  211,  213,  205,  206,  210,  207,  214,  212,
+      215,  213,  216,  208,  212,  217,  214,  210,  215,  215,
+      216,  216,  218,  217,  219,  214,  220,  221,    0,    0,
+      218,  213,  219,  222,  220,  221,  221,  219,  223,  224,
+      225,  222,    0,  217,  226,  227,  223,  224,  225,  220,
+      218,    0,  226,  227,  228,  229,  224,  230,  231,  227,
+      232,  222,  228,  229,  233,  230,  231,    0,  232,  234,
+      237,  229,  233,  235,  236,  225,  238,  234,  237,  239,
+        0,  235,  236,  240,  238,  230,  241,  239,  232,  236,
+
+      242,  240,  243,  244,  241,  234,  233,  245,  242,  235,
+      243,  244,  246,  247,  238,  245,  248,    0,  249,  250,
+      246,  247,  251,  252,  248,  243,  249,  250,  254,  248,
+      251,  252,  253,    0,    0,    0,  254,    0,  246,  257,
+      253,  247,  253,  255,    0,  256,  250,  257,  251,  259,
+      252,  255,  254,  256,  255,  258,    0,  259,  256,    0,
+      260,  261,  259,  258,  258,  262,  263,  257,  260,  261,
+        0,  264,  265,  262,  263,  266,  267,  268,  261,  264,
+      265,  262,    0,  266,  267,  268,  269,  265,  270,  267,
+      268,    0,    0,  272,  269,  263,  270,  273,  274,  269,
+
+      264,  272,  275,  266,  276,  273,  274,  277,  279,  278,
+      275,    0,  276,    0,    0,  277,  279,  278,    0,  280,
+      281,  272,  278,  282,  283,  284,  274,  280,  281,    0,
+      280,  282,  283,  284,  285,  275,  279,  283,  286,  276,
+        0,  287,  285,  288,  289,  290,  286,  284,  281,  287,
+      291,  288,  289,  290,  287,  292,    0,    0,  291,  293,
+      285,  294,  295,  292,  286,  288,  289,  293,    0,  294,
+      295,  296,  297,  298,  299,  300,  293,    0,  301,  296,
+      297,  298,  299,  300,    0,  297,  301,  302,  303,  298,
+      295,    0,  301,  304,  305,  302,  303,  306,    0,  302,
+
+      307,  304,  305,  299,  308,  306,  306,    0,  307,  309,
+      310,  311,  308,  312,    0,  304,  303,  309,  310,  311,
+      313,  312,  305,  315,    0,  314,  307,  316,  313,  317,
+      318,  315,  308,  314,  314,  316,  310,  317,  318,  311,
+      316,  319,  313,  318,  315,  320,  321,  323,  324,  319,
+      325,  317,  326,  320,  321,  323,  324,  327,  325,  328,
+      326,  329,  330,  331,  332,  327,  333,  328,  319,  329,
+      330,  331,  332,  334,  333,    0,  335,    0,    0,  326,
+      336,  334,  337,  327,  335,  325,  334,  338,  336,  329,
+      337,  336,  332,  339,  340,  338,  341,    0,  343,  342,
+
+      344,  339,  340,  346,  341,  337,  343,  342,  344,  345,
+      338,  346,    0,    0,  347,  348,  349,  345,    0,  350,
+      340,  342,  347,  348,  349,    0,  348,  350,  351,  344,
+      352,  354,    0,  349,  355,  345,  351,  353,  352,  354,
+      347,  356,  355,  352,  355,  353,  353,  357,  358,  356,
+      359,  360,    0,  361,  362,  357,  358,  354,  359,  360,
+      357,  361,  362,  363,  364,  365,  366,  362,  367,  368,
+      361,  363,  364,  365,  366,  369,  367,  368,  370,  360,
+      371,  372,    0,  369,  373,  374,  370,  375,  371,  372,
+      370,  368,  373,  374,  366,  375,  367,  376,    0,  377,
+
+      378,    0,  379,    0,  374,  376,  371,  377,  378,  375,
+      379,  380,  381,  382,  383,  379,  384,    0,    0,  380,
+      381,  382,  383,  380,  384,  377,  383,    0,  385,  386,
+      387,  388,  389,  390,  391,  381,  385,  386,  387,  388,
+      389,  390,  391,  392,  388,  393,  394,  382,  396,  395,
+      397,  392,  398,  393,  394,    0,  396,  395,  397,    0,
+      398,  390,  395,  385,  397,  399,  400,  391,  401,  392,
+      402,    0,  403,  399,  400,  405,  401,  404,  402,  399,
+      403,  394,  406,  405,    0,  404,    0,  407,  408,  409,
+      406,  410,  402,    0,  404,  407,  408,  409,  413,  410,
+
+      411,  410,  414,    0,  415,  412,  413,    0,  411,  408,
+      414,  406,  415,  412,  416,  407,    0,  411,  412,  417,
+      413,  418,  416,  419,  409,  420,    0,  417,  414,  418,
+      421,  419,    0,  420,  418,  422,    0,  423,  421,  415,
+      417,    0,    0,  422,  419,  423,    0,    0,  420,    0,
+        0,    0,    0,    0,    0,    0,  421,    0,    0,    0,
+        0,    0,    0,    0,    0,    0,    0,    0,  422,  425,
       425,  425,  425,  425,  425,  425,  425,  425,  426,  426,
-      426,  426,  426,  426,  426,  426,  426,  427,  345,  427,
-      428,  428,  428,  429,  429,  342,  429,  430,  430,  430,
-      430,  340,  430,  430,  430,  430,  431,  431,  431,  431,
-      431,  431,  431,  431,  431,  432,  432,  432,  338,  432,
-      432,  432,  432,  432,  433,  334,  433,  433,  433,  433,
-      433,  433,  433,  434,  332,  330,  329,  327,  323,  434,
-      435,  322,  435,  321,  320,  319,  311,  308,  299,  295,
-
-      293,  291,  290,  289,  281,  276,  272,  270,  269,  259,
-      248,  244,  243,  241,  240,  239,  238,  236,  230,  227,
-      225,  222,  210,  208,  202,  190,  188,  186,  176,  161,
-      157,  137,  136,  134,  133,  132,  130,  104,  103,   96,
-       94,   77,   71,   70,   65,   63,   58,   56,   38,   30,
-       26,   24,   23,    9,    8,    7,    6,    5,    4,    3,
-      423,  423,  423,  423,  423,  423,  423,  423,  423,  423,
-      423,  423,  423,  423,  423,  423,  423,  423,  423,  423,
-      423,  423,  423,  423,  423,  423,  423,  423,  423,  423,
-      423,  423,  423,  423,  423,  423,  423,  423,  423,  423,
-
-      423,  423,  423,  423,  423,  423,  423,  423,  423,  423,
-      423,  423,  423,  423,  423,  423
+      426,  426,  426,  426,  426,  426,  426,  427,  427,  427,
+      427,  427,  427,  427,  427,  427,  428,    0,  428,  429,
+
+      429,  429,  430,  430,    0,  430,  431,  431,  431,  431,
+        0,  431,  431,  431,  431,  432,  432,  432,  432,  432,
+      432,  432,  432,  432,  433,  433,  433,    0,  433,  433,
+      433,  433,  433,  434,    0,  434,  434,  434,  434,  434,
+      434,  434,  435,    0,    0,    0,    0,    0,  435,  436,
+      436,    0,  436,  437,    0,  437,  424,  424,  424,  424,
+      424,  424,  424,  424,  424,  424,  424,  424,  424,  424,
+      424,  424,  424,  424,  424,  424,  424,  424,  424,  424,
+      424,  424,  424,  424,  424,  424,  424,  424,  424,  424,
+      424,  424,  424,  424,  424,  424,  424,  424,  424,  424,
+
+      424,  424,  424,  424,  424,  424,  424,  424,  424,  424,
+      424,  424,  424
     } ;
 
 static yy_state_type yy_last_accepting_state;
@@ -847,6 +966,7 @@ Created 12/14/1997 Heikki Tuuri
 #define realloc(P, A)	ut_realloc(P, A)
 #define exit(A) 	ut_error
 
+/* Note: We cast &result to int* from yysize_t* */
 #define YY_INPUT(buf, result, max_size) \
 	(result = pars_get_lex_chars(buf, max_size))
 
@@ -883,7 +1003,7 @@ string_append(
 
 
 
-#line 887 "lexyy.cc"
+#line 1006 "lexyy.cc"
 
 #define INITIAL 0
 #define comment 1
@@ -965,7 +1085,12 @@ static int input (void );
 
 /* Amount of stuff to slurp up with each read. */
 #ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
 #define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
 #endif
 
 /* Copy whatever the last rule matched to the standard output. */
@@ -973,7 +1098,7 @@ static int input (void );
 /* This used to be an fputs(), but since the string might contain NUL's,
  * we now use fwrite().
  */
-#define ECHO fwrite( yytext, yyleng, 1, yyout )
+#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0)
 #endif
 
 /* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
@@ -984,7 +1109,7 @@ static int input (void );
 	if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
 		{ \
 		int c = '*'; \
-		yy_size_t n; \
+		size_t n; \
 		for ( n = 0; n < max_size && \
 			     (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
 			buf[n] = (char) c; \
@@ -1069,7 +1194,7 @@ YY_DECL
 #line 112 "pars0lex.l"
 
 
-#line 1073 "lexyy.cc"
+#line 1197 "lexyy.cc"
 
 	if ( !(yy_init) )
 		{
@@ -1122,13 +1247,13 @@ yy_match:
 			while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 				{
 				yy_current_state = (int) yy_def[yy_current_state];
-				if ( yy_current_state >= 424 )
+				if ( yy_current_state >= 425 )
 					yy_c = yy_meta[(unsigned int) yy_c];
 				}
 			yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
 			++yy_cp;
 			}
-		while ( yy_current_state != 423 );
+		while ( yy_current_state != 424 );
 		yy_cp = (yy_last_accepting_cpos);
 		yy_current_state = (yy_last_accepting_state);
 
@@ -2109,7 +2234,7 @@ YY_RULE_SETUP
 #line 691 "pars0lex.l"
 YY_FATAL_ERROR( "flex scanner jammed" );
 	YY_BREAK
-#line 2113 "lexyy.cc"
+#line 2237 "lexyy.cc"
 case YY_STATE_EOF(INITIAL):
 case YY_STATE_EOF(comment):
 case YY_STATE_EOF(quoted):
@@ -2299,7 +2424,7 @@ static int yy_get_next_buffer (void)
 
 	else
 		{
-			yy_size_t num_to_read =
+			int num_to_read =
 			YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
 
 		while ( num_to_read <= 0 )
@@ -2313,16 +2438,16 @@ static int yy_get_next_buffer (void)
 
 			if ( b->yy_is_our_buffer )
 				{
-				yy_size_t new_size = b->yy_buf_size * 2;
+				int new_size = b->yy_buf_size * 2;
 
 				if ( new_size <= 0 )
 					b->yy_buf_size += b->yy_buf_size / 8;
 				else
 					b->yy_buf_size *= 2;
 
-				b->yy_ch_buf = (char*)
+				b->yy_ch_buf = (char *)
 					/* Include room in for 2 EOB chars. */
-					yyrealloc((void*) b->yy_ch_buf,b->yy_buf_size + 2  );
+					yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2  );
 				}
 			else
 				/* Can't grow it, we don't own it. */
@@ -2344,7 +2469,7 @@ static int yy_get_next_buffer (void)
 
 		/* Read in more data. */
 		YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
-			(yy_n_chars), num_to_read );
+			(yy_n_chars), (size_t) num_to_read );
 
 		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
 		}
@@ -2371,7 +2496,7 @@ static int yy_get_next_buffer (void)
 	if ((yy_size_t) ((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
 		/* Extend the array by 50%, plus the number we really need. */
 		yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
-		YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char*) yyrealloc((void*) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size  );
+		YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size  );
 		if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
 			YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
 	}
@@ -2387,7 +2512,7 @@ static int yy_get_next_buffer (void)
 
 /* yy_get_previous_state - get the state just before the EOB char was reached */
 
-    static yy_state_type yy_get_previous_state (void)
+     yy_state_type yy_get_previous_state (void)
 {
 	register yy_state_type yy_current_state;
 	register char *yy_cp;
@@ -2405,7 +2530,7 @@ static int yy_get_next_buffer (void)
 		while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 			{
 			yy_current_state = (int) yy_def[yy_current_state];
-			if ( yy_current_state >= 424 )
+			if ( yy_current_state >= 425 )
 				yy_c = yy_meta[(unsigned int) yy_c];
 			}
 		yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
@@ -2419,7 +2544,7 @@ static int yy_get_next_buffer (void)
  * synopsis
  *	next_state = yy_try_NUL_trans( current_state );
  */
-    static yy_state_type yy_try_NUL_trans  (yy_state_type yy_current_state )
+     static yy_state_type yy_try_NUL_trans  (yy_state_type yy_current_state )
 {
 	register int yy_is_jam;
     	register char *yy_cp = (yy_c_buf_p);
@@ -2433,11 +2558,11 @@ static int yy_get_next_buffer (void)
 	while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 		{
 		yy_current_state = (int) yy_def[yy_current_state];
-		if ( yy_current_state >= 424 )
+		if ( yy_current_state >= 425 )
 			yy_c = yy_meta[(unsigned int) yy_c];
 		}
 	yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
-	yy_is_jam = (yy_current_state == 423);
+	yy_is_jam = (yy_current_state == 424);
 
 	return yy_is_jam ? 0 : yy_current_state;
 }
@@ -2466,7 +2591,7 @@ static int yy_get_next_buffer (void)
 
 		else
 			{ /* need more input */
-			yy_size_t offset = (yy_c_buf_p) - (yytext_ptr);
+			int offset = (int)((yy_c_buf_p) - (yytext_ptr));
 			++(yy_c_buf_p);
 
 			switch ( yy_get_next_buffer(  ) )
@@ -2490,7 +2615,7 @@ static int yy_get_next_buffer (void)
 				case EOB_ACT_END_OF_FILE:
 					{
 					if ( yywrap( ) )
-						return 0;
+						return EOF;
 
 					if ( ! (yy_did_buffer_switch_on_eof) )
 						YY_NEW_FILE;
@@ -2508,7 +2633,7 @@ static int yy_get_next_buffer (void)
 			}
 		}
 
-	c = *(unsigned char*) (yy_c_buf_p);	/* cast for 8-bit char's */
+	c = *(unsigned char *) (yy_c_buf_p);	/* cast for 8-bit char's */
 	*(yy_c_buf_p) = '\0';	/* preserve yytext */
 	(yy_hold_char) = *++(yy_c_buf_p);
 
@@ -2518,7 +2643,7 @@ static int yy_get_next_buffer (void)
 
 /** Immediately switch to a different input stream.
  * @param input_file A readable stream.
- *
+ * 
  * @note This function does not reset the start condition to @c INITIAL .
  */
     void yyrestart  (FILE * input_file )
@@ -2536,7 +2661,7 @@ static int yy_get_next_buffer (void)
 
 /** Switch to a different input buffer.
  * @param new_buffer The new input buffer.
- *
+ * 
  */
     __attribute__((unused)) static void yy_switch_to_buffer  (YY_BUFFER_STATE  new_buffer )
 {
@@ -2580,7 +2705,7 @@ static void yy_load_buffer_state  (void)
 /** Allocate and initialize an input buffer state.
  * @param file A readable stream.
  * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
- *
+ * 
  * @return the allocated buffer state.
  */
     static YY_BUFFER_STATE yy_create_buffer  (FILE * file, int  size )
@@ -2596,7 +2721,7 @@ static void yy_load_buffer_state  (void)
 	/* yy_ch_buf has to be 2 characters longer than the size given because
 	 * we need to put in 2 end-of-buffer characters.
 	 */
-	b->yy_ch_buf = (char*) yyalloc(b->yy_buf_size + 2  );
+	b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2  );
 	if ( ! b->yy_ch_buf )
 		YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
 
@@ -2609,9 +2734,9 @@ static void yy_load_buffer_state  (void)
 
 /** Destroy the buffer.
  * @param b a buffer created with yy_create_buffer()
- *
+ * 
  */
-    void yy_delete_buffer (YY_BUFFER_STATE  b )
+     void yy_delete_buffer (YY_BUFFER_STATE  b )
 {
 
 	if ( ! b )
@@ -2621,20 +2746,20 @@ static void yy_load_buffer_state  (void)
 		YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
 
 	if ( b->yy_is_our_buffer )
-		yyfree((void*) b->yy_ch_buf  );
+		yyfree((void *) b->yy_ch_buf  );
 
-	yyfree((void*) b  );
+	yyfree((void *) b  );
 }
 
 /* Initializes or reinitializes a buffer.
  * This function is sometimes called more than once on the same buffer,
  * such as during a yyrestart() or at EOF.
  */
-    static void yy_init_buffer  (YY_BUFFER_STATE  b, FILE * file )
+     static void yy_init_buffer  (YY_BUFFER_STATE  b, FILE * file )
 
 {
 	int oerrno = errno;
-
+ 
 	yy_flush_buffer(b );
 
 	b->yy_input_file = file;
@@ -2650,13 +2775,13 @@ static void yy_load_buffer_state  (void)
     }
 
         b->yy_is_interactive = 0;
-
+    
 	errno = oerrno;
 }
 
 /** Discard all buffered characters. On the next scan, YY_INPUT will be called.
  * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
- *
+ * 
  */
     void yy_flush_buffer (YY_BUFFER_STATE  b )
 {
@@ -2685,7 +2810,7 @@ static void yy_load_buffer_state  (void)
  *  the current state. This function will allocate the stack
  *  if necessary.
  *  @param new_buffer The new state.
- *
+ *  
  */
 void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
 {
@@ -2715,7 +2840,7 @@ void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
 
 /** Removes and deletes the top of the stack, if present.
  *  The next element becomes the new top.
- *
+ *  
  */
 void yypop_buffer_state (void)
 {
@@ -2738,8 +2863,8 @@ void yypop_buffer_state (void)
  */
 static void yyensure_buffer_stack (void)
 {
-	yy_size_t num_to_alloc;
-
+	int num_to_alloc;
+    
 	if (!(yy_buffer_stack)) {
 
 		/* First allocation is just for 2 elements, since we don't know if this
@@ -2747,7 +2872,7 @@ static void yyensure_buffer_stack (void)
 		 * immediate realloc on the next call.
          */
 		num_to_alloc = 1;
-		(yy_buffer_stack) = (struct yy_buffer_state**) yyalloc
+		(yy_buffer_stack) = (struct yy_buffer_state**)yyalloc
 								(num_to_alloc * sizeof(struct yy_buffer_state*)
 								);
 		if ( ! (yy_buffer_stack) )
@@ -2766,7 +2891,7 @@ static void yyensure_buffer_stack (void)
 		int grow_size = 8 /* arbitrary grow size */;
 
 		num_to_alloc = (yy_buffer_stack_max) + grow_size;
-		(yy_buffer_stack) = (struct yy_buffer_state**) yyrealloc
+		(yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc
 								((yy_buffer_stack),
 								num_to_alloc * sizeof(struct yy_buffer_state*)
 								);
@@ -2809,7 +2934,7 @@ static void yy_fatal_error (yyconst char* msg )
 /* Accessor  methods (get/set functions) to struct members. */
 
 /** Get the current line number.
- *
+ * 
  */
 int yyget_lineno  (void)
 {
@@ -2818,7 +2943,7 @@ int yyget_lineno  (void)
 }
 
 /** Get the input stream.
- *
+ * 
  */
 FILE *yyget_in  (void)
 {
@@ -2826,7 +2951,7 @@ FILE *yyget_in  (void)
 }
 
 /** Get the output stream.
- *
+ * 
  */
 FILE *yyget_out  (void)
 {
@@ -2834,7 +2959,7 @@ FILE *yyget_out  (void)
 }
 
 /** Get the length of the current token.
- *
+ * 
  */
 yy_size_t yyget_leng  (void)
 {
@@ -2842,7 +2967,7 @@ yy_size_t yyget_leng  (void)
 }
 
 /** Get the current token.
- *
+ * 
  */
 
 char *yyget_text  (void)
@@ -2852,18 +2977,18 @@ char *yyget_text  (void)
 
 /** Set the current line number.
  * @param line_number
- *
+ * 
  */
 void yyset_lineno (int  line_number )
 {
-
+ 
     yylineno = line_number;
 }
 
 /** Set the input stream. This does not discard the current
  * input buffer.
  * @param in_str A readable stream.
- *
+ * 
  * @see yy_switch_to_buffer
  */
 void yyset_in (FILE *  in_str )
@@ -2895,7 +3020,7 @@ static int yy_init_globals (void)
     (yy_buffer_stack) = 0;
     (yy_buffer_stack_top) = 0;
     (yy_buffer_stack_max) = 0;
-    (yy_c_buf_p) = (char*) 0;
+    (yy_c_buf_p) = (char *) 0;
     (yy_init) = 0;
     (yy_start) = 0;
 
@@ -2904,8 +3029,8 @@ static int yy_init_globals (void)
     yyin = stdin;
     yyout = stdout;
 #else
-    yyin = (FILE*) 0;
-    yyout = (FILE*) 0;
+    yyin = (FILE *) 0;
+    yyout = (FILE *) 0;
 #endif
 
     /* For future reference: Set errno on error, since we are called by
@@ -2917,7 +3042,7 @@ static int yy_init_globals (void)
 /* yylex_destroy is for both reentrant and non-reentrant scanners. */
 __attribute__((unused)) static int yylex_destroy  (void)
 {
-
+ 
     /* Pop the buffer stack, destroying each element. */
 	while(YY_CURRENT_BUFFER){
 		yy_delete_buffer(YY_CURRENT_BUFFER  );
@@ -2962,24 +3087,24 @@ static int yy_flex_strlen (yyconst char * s )
 
 void *yyalloc (yy_size_t  size )
 {
-	return (void*) malloc( size );
+	return (void *) malloc( size );
 }
 
 void *yyrealloc  (void * ptr, yy_size_t  size )
 {
-	/* The cast to (char*) in the following accommodates both
+	/* The cast to (char *) in the following accommodates both
 	 * implementations that use char* generic pointers, and those
 	 * that use void* generic pointers.  It works with the latter
 	 * because both ANSI C and C++ allow castless assignment from
 	 * any pointer type to void*, and deal with argument conversions
 	 * as though doing an assignment.
 	 */
-	return (void*) realloc( (char*) ptr, size );
+	return (void *) realloc( (char *) ptr, size );
 }
 
 void yyfree (void * ptr )
 {
-	free( (char*) ptr );	/* see yyrealloc() for (char*) cast */
+	free( (char*) ptr );	/* see yyrealloc() for (char *) cast */
 }
 
 #define YYTABLES_NAME "yytables"
diff --git a/storage/innobase/pars/pars0lex.l b/storage/innobase/pars/pars0lex.l
index 2446e40cde8..83c3af4b6c5 100644
--- a/storage/innobase/pars/pars0lex.l
+++ b/storage/innobase/pars/pars0lex.l
@@ -102,7 +102,7 @@ string_append(
 
 DIGIT		[0-9]
 ID		[a-z_A-Z][a-z_A-Z0-9]*
-TABLE_NAME	[a-z_A-Z][a-z_A-Z0-9]*\/(#sql-|[a-z_A-Z])[a-z_A-Z0-9]*
+TABLE_NAME	[a-z_A-Z][@a-z_A-Z0-9]*\/(#sql-|[a-z_A-Z])[a-z_A-Z0-9]*
 BOUND_LIT	\:[a-z_A-Z0-9]+
 BOUND_ID	\$[a-z_A-Z0-9]+
 
diff --git a/storage/innobase/pars/pars0opt.cc b/storage/innobase/pars/pars0opt.cc
index e5f347eedd6..cbed2b39eeb 100644
--- a/storage/innobase/pars/pars0opt.cc
+++ b/storage/innobase/pars/pars0opt.cc
@@ -345,7 +345,7 @@ opt_calc_index_goodness(
 
 	/* At least for now we don't support using FTS indexes for queries
 	done through InnoDB's own SQL parser. */
-	if (index->type == DICT_FTS) {
+	if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) {
 		return(0);
 	}
 
@@ -400,7 +400,7 @@ opt_calc_index_goodness(
 		}
 	}
 
-	/* We have to test for goodness here, as last_op may note be set */
+	/* We have to test for goodness here, as last_op may not be set */
 	if (goodness && dict_index_is_clust(index)) {
 
 		goodness++;
diff --git a/storage/innobase/pars/pars0pars.cc b/storage/innobase/pars/pars0pars.cc
index a4ab85adc36..f82610e62d0 100644
--- a/storage/innobase/pars/pars0pars.cc
+++ b/storage/innobase/pars/pars0pars.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -859,7 +859,8 @@ pars_retrieve_table_def(
 		sym_node->resolved = TRUE;
 		sym_node->token_type = SYM_TABLE_REF_COUNTED;
 
-		sym_node->table = dict_table_open_on_name(sym_node->name, TRUE);
+		sym_node->table = dict_table_open_on_name(
+			sym_node->name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
 
 		ut_a(sym_node->table != NULL);
 	}
@@ -1115,8 +1116,8 @@ pars_function_declaration(
 	sym_node->token_type = SYM_FUNCTION;
 
 	/* Check that the function exists. */
-	ut_a(pars_info_get_user_func(pars_sym_tab_global->info,
-				     sym_node->name));
+	ut_a(pars_info_lookup_user_func(
+		pars_sym_tab_global->info, sym_node->name));
 
 	return(sym_node);
 }
@@ -1782,8 +1783,9 @@ pars_fetch_statement(
 	} else {
 		pars_resolve_exp_variables_and_types(NULL, user_func);
 
-		node->func = pars_info_get_user_func(pars_sym_tab_global->info,
-						     user_func->name);
+		node->func = pars_info_lookup_user_func(
+			pars_sym_tab_global->info, user_func->name);
+
 		ut_a(node->func);
 
 		node->into_list = NULL;
@@ -1941,9 +1943,23 @@ pars_create_table(
 	const dtype_t*	dtype;
 	ulint		n_cols;
 	ulint		flags = 0;
+	ulint		flags2 = 0;
 
 	if (compact != NULL) {
+
+		/* System tables currently only use the REDUNDANT row
+		format therefore the check for srv_file_per_table should be
+		safe for now. */
+
 		flags |= DICT_TF_COMPACT;
+
+		/* FIXME: Ideally this should be part of the SQL syntax
+		or use some other mechanism. We want to reduce dependency
+		on global variables. There is an inherent race here but
+		that has always existed around this variable. */
+		if (srv_file_per_table) {
+			flags2 |= DICT_TF2_USE_TABLESPACE;
+		}
 	}
 
 	if (block_size != NULL) {
@@ -1974,10 +1990,8 @@ pars_create_table(
 
 	n_cols = que_node_list_get_len(column_defs);
 
-	/* As the InnoDB SQL parser is for internal use only,
-	for creating some system tables, this function will only
-	create tables in the old (not compact) record format. */
-	table = dict_mem_table_create(table_sym->name, 0, n_cols, flags, 0);
+	table = dict_mem_table_create(
+		table_sym->name, 0, n_cols, flags, flags2);
 
 #ifdef UNIV_DEBUG
 	if (not_fit_in_memory != NULL) {
@@ -1998,7 +2012,7 @@ pars_create_table(
 		column = static_cast<sym_node_t*>(que_node_get_next(column));
 	}
 
-	node = tab_create_graph_create(table, pars_sym_tab_global->heap);
+	node = tab_create_graph_create(table, pars_sym_tab_global->heap, true);
 
 	table_sym->resolved = TRUE;
 	table_sym->token_type = SYM_TABLE;
@@ -2052,7 +2066,7 @@ pars_create_index(
 		column = static_cast<sym_node_t*>(que_node_get_next(column));
 	}
 
-	node = ind_create_graph_create(index, pars_sym_tab_global->heap);
+	node = ind_create_graph_create(index, pars_sym_tab_global->heap, true);
 
 	table_sym->resolved = TRUE;
 	table_sym->token_type = SYM_TABLE;
@@ -2251,7 +2265,7 @@ que_thr_t*
 pars_complete_graph_for_exec(
 /*=========================*/
 	que_node_t*	node,	/*!< in: root node for an incomplete
-				query graph */
+				query graph, or NULL for dummy graph */
 	trx_t*		trx,	/*!< in: transaction handle */
 	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
 {
@@ -2265,7 +2279,9 @@ pars_complete_graph_for_exec(
 
 	thr->child = node;
 
-	que_node_set_parent(node, thr);
+	if (node) {
+		que_node_set_parent(node, thr);
+	}
 
 	trx->graph = NULL;
 
@@ -2478,7 +2494,7 @@ pars_info_bind_int8_literal(
 	const char*		name,	/* in: name */
 	const ib_uint64_t*	val)	/* in: value */
 {
-        pars_bound_lit_t*	pbl;
+	pars_bound_lit_t*	pbl;
 
 	pbl = pars_info_lookup_bound_lit(info, name);
 
@@ -2519,6 +2535,33 @@ pars_info_add_ull_literal(
 }
 
 /****************************************************************//**
+If the literal value already exists then it rebinds otherwise it
+creates a new entry. */
+UNIV_INTERN
+void
+pars_info_bind_ull_literal(
+/*=======================*/
+	pars_info_t*		info,		/*!< in: info struct */
+	const char*		name,		/*!< in: name */
+	const ib_uint64_t*	val)		/*!< in: value */
+{
+	pars_bound_lit_t*	pbl;
+
+	pbl = pars_info_lookup_bound_lit(info, name);
+
+	if (!pbl) {
+		pars_info_add_literal(
+			info, name, val, sizeof(*val), DATA_FIXBINARY, 0);
+	} else {
+
+		pbl->address = val;
+		pbl->length = sizeof(*val);
+
+		sym_tab_rebind_lit(pbl->node, val, sizeof(*val));
+	}
+}
+
+/****************************************************************//**
 Add user function. */
 UNIV_INTERN
 void
@@ -2605,19 +2648,6 @@ pars_info_get_bound_id(
 }
 
 /****************************************************************//**
-Get user function with the given name.
-@return	user func, or NULL if not found */
-UNIV_INTERN
-pars_user_func_t*
-pars_info_get_user_func(
-/*====================*/
-	pars_info_t*		info,	/*!< in: info struct */
-	const char*		name)	/*!< in: function name to find*/
-{
-	return(pars_info_lookup_user_func(info, name));
-}
-
-/****************************************************************//**
 Get bound literal with the given name.
 @return	bound literal, or NULL if not found */
 UNIV_INTERN
diff --git a/storage/innobase/pars/pars0sym.cc b/storage/innobase/pars/pars0sym.cc
index c71ad8a6b39..b01a69cb33a 100644
--- a/storage/innobase/pars/pars0sym.cc
+++ b/storage/innobase/pars/pars0sym.cc
@@ -84,7 +84,7 @@ sym_tab_free_private(
 
 		if (sym->token_type == SYM_TABLE_REF_COUNTED) {
 
-			dict_table_close(sym->table, TRUE);
+			dict_table_close(sym->table, TRUE, FALSE);
 
 			sym->table = NULL;
 			sym->resolved = FALSE;
diff --git a/storage/innobase/que/que0que.cc b/storage/innobase/que/que0que.cc
index c023723685c..fb185959d56 100644
--- a/storage/innobase/que/que0que.cc
+++ b/storage/innobase/que/que0que.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -1248,7 +1248,7 @@ loop:
 Evaluate the given SQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-enum db_err
+dberr_t
 que_eval_sql(
 /*=========*/
 	pars_info_t*	info,	/*!< in: info struct, or NULL */
diff --git a/storage/innobase/read/read0read.cc b/storage/innobase/read/read0read.cc
index 02d78d657c6..14dc9ee5e7f 100644
--- a/storage/innobase/read/read0read.cc
+++ b/storage/innobase/read/read0read.cc
@@ -174,59 +174,6 @@ The order does not matter. No new transactions can be created and no running
 transaction can commit or rollback (or free views).
 */
 
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Validates a read view object. */
-static
-ibool
-read_view_validate(
-/*===============*/
-	const read_view_t*	view)	/*!< in: view to validate */
-{
-	ulint	i;
-
-	ut_ad(mutex_own(&trx_sys->mutex));
-
-	/* Check that the view->trx_ids array is in descending order. */
-	for (i = 1; i < view->n_trx_ids; ++i) {
-
-		ut_a(view->trx_ids[i] < view->trx_ids[i - 1]);
-	}
-
-	return(TRUE);
-}
-
-/** Functor to validate the view list. */
-struct	Check {
-
-	Check() : m_prev_view(0) { }
-
-	void	operator()(const read_view_t* view)
-	{
-		ut_a(m_prev_view == NULL
-		     || m_prev_view->low_limit_no >= view->low_limit_no);
-
-		m_prev_view = view;
-	}
-
-	const read_view_t*	m_prev_view;
-};
-
-/*********************************************************************//**
-Validates a read view list. */
-static
-ibool
-read_view_list_validate(void)
-/*=========================*/
-{
-	ut_ad(mutex_own(&trx_sys->mutex));
-
-	ut_list_map(trx_sys->view_list, &read_view_t::view_list, Check());
-
-	return(TRUE);
-}
-#endif
-
 /*********************************************************************//**
 Creates a read view object.
 @return	own: read view struct */
@@ -530,25 +477,6 @@ read_view_purge_open(
 }
 
 /*********************************************************************//**
-Remove a read view from the trx_sys->view_list. */
-UNIV_INTERN
-void
-read_view_remove(
-/*=============*/
-	read_view_t*	view)	/*!< in: read view */
-{
-	mutex_enter(&trx_sys->mutex);
-
-	ut_ad(read_view_validate(view));
-
-	UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
-
-	ut_ad(read_view_list_validate());
-
-	mutex_exit(&trx_sys->mutex);
-}
-
-/*********************************************************************//**
 Closes a consistent read view for MySQL. This function is called at an SQL
 statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
 UNIV_INTERN
@@ -559,7 +487,7 @@ read_view_close_for_mysql(
 {
 	ut_a(trx->global_read_view);
 
-	read_view_remove(trx->global_read_view);
+	read_view_remove(trx->global_read_view, false);
 
 	mem_heap_empty(trx->global_read_view_heap);
 
@@ -692,7 +620,7 @@ read_cursor_view_close_for_mysql(
 	belong to this transaction */
 	trx->n_mysql_tables_in_use += curview->n_mysql_tables_in_use;
 
-	read_view_remove(curview->read_view);
+	read_view_remove(curview->read_view, false);
 
 	trx->read_view = trx->global_read_view;
 
diff --git a/storage/innobase/rem/rem0cmp.cc b/storage/innobase/rem/rem0cmp.cc
index 19f5633953a..db0fdf3ee21 100644
--- a/storage/innobase/rem/rem0cmp.cc
+++ b/storage/innobase/rem/rem0cmp.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,6 +30,7 @@ Created 7/1/1994 Heikki Tuuri
 #endif
 
 #include "ha_prototypes.h"
+#include "handler0alter.h"
 #include "srv0srv.h"
 
 /*		ALPHABETICAL ORDER
@@ -69,10 +70,12 @@ cmp_debug_dtuple_rec_with_match(
 				has an equal number or more fields than
 				dtuple */
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint*		matched_fields);/*!< in/out: number of already
+	ulint		n_cmp,	/*!< in: number of fields to compare */
+	ulint*		matched_fields)/*!< in/out: number of already
 				completely  matched fields; when function
 				returns, contains the value for current
 				comparison */
+	__attribute__((nonnull, warn_unused_result));
 #endif /* UNIV_DEBUG */
 /*************************************************************//**
 This function is used to compare two data fields for which the data type
@@ -621,14 +624,15 @@ respectively, when only the common first fields are compared, or until
 the first externally stored field in rec */
 UNIV_INTERN
 int
-cmp_dtuple_rec_with_match(
-/*======================*/
+cmp_dtuple_rec_with_match_low(
+/*==========================*/
 	const dtuple_t*	dtuple,	/*!< in: data tuple */
 	const rec_t*	rec,	/*!< in: physical record which differs from
 				dtuple in some of the common fields, or which
 				has an equal number or more fields than
 				dtuple */
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n_cmp,	/*!< in: number of fields to compare */
 	ulint*		matched_fields, /*!< in/out: number of already completely
 				matched fields; when function returns,
 				contains the value for current comparison */
@@ -652,7 +656,7 @@ cmp_dtuple_rec_with_match(
 	ulint		cur_field;	/* current field number */
 	ulint		cur_bytes;	/* number of already matched bytes
 					in current field */
-	int		ret = 3333;	/* return value */
+	int		ret;		/* return value */
 
 	ut_ad(dtuple && rec && matched_fields && matched_bytes);
 	ut_ad(dtuple_check_typed(dtuple));
@@ -661,7 +665,9 @@ cmp_dtuple_rec_with_match(
 	cur_field = *matched_fields;
 	cur_bytes = *matched_bytes;
 
-	ut_ad(cur_field <= dtuple_get_n_fields_cmp(dtuple));
+	ut_ad(n_cmp > 0);
+	ut_ad(n_cmp <= dtuple_get_n_fields(dtuple));
+	ut_ad(cur_field <= n_cmp);
 	ut_ad(cur_field <= rec_offs_n_fields(offsets));
 
 	if (cur_bytes == 0 && cur_field == 0) {
@@ -681,7 +687,7 @@ cmp_dtuple_rec_with_match(
 	/* Match fields in a loop; stop if we run out of fields in dtuple
 	or find an externally stored field */
 
-	while (cur_field < dtuple_get_n_fields_cmp(dtuple)) {
+	while (cur_field < n_cmp) {
 
 		ulint	mtype;
 		ulint	prtype;
@@ -838,7 +844,7 @@ next_field:
 order_resolved:
 	ut_ad((ret >= - 1) && (ret <= 1));
 	ut_ad(ret == cmp_debug_dtuple_rec_with_match(dtuple, rec, offsets,
-						     matched_fields));
+						     n_cmp, matched_fields));
 	ut_ad(*matched_fields == cur_field); /* In the debug version, the
 					     above cmp_debug_... sets
 					     *matched_fields to a value */
@@ -909,156 +915,181 @@ cmp_dtuple_is_prefix_of_rec(
 }
 
 /*************************************************************//**
-Compare two physical records that contain the same number of columns,
-none of which are stored externally.
-@return	1, 0, -1 if rec1 is greater, equal, less, respectively, than rec2 */
-UNIV_INTERN
+Compare two physical record fields.
+@retval 1 if rec1 field is greater than rec2
+@retval -1 if rec1 field is less than rec2
+@retval 0 if rec1 field equals to rec2 */
+static __attribute__((nonnull, warn_unused_result))
 int
-cmp_rec_rec_simple(
-/*===============*/
+cmp_rec_rec_simple_field(
+/*=====================*/
 	const rec_t*		rec1,	/*!< in: physical record */
 	const rec_t*		rec2,	/*!< in: physical record */
 	const ulint*		offsets1,/*!< in: rec_get_offsets(rec1, ...) */
 	const ulint*		offsets2,/*!< in: rec_get_offsets(rec2, ...) */
 	const dict_index_t*	index,	/*!< in: data dictionary index */
-	ibool*			null_eq)/*!< out: set to TRUE if
-					found matching null values */
+	ulint			n)	/*!< in: field to compare */
 {
-	ulint		rec1_f_len;	/*!< length of current field in rec1 */
-	const byte*	rec1_b_ptr;	/*!< pointer to the current byte
-					in rec1 field */
-	ulint		rec1_byte;	/*!< value of current byte to be
-					compared in rec1 */
-	ulint		rec2_f_len;	/*!< length of current field in rec2 */
-	const byte*	rec2_b_ptr;	/*!< pointer to the current byte
-					in rec2 field */
-	ulint		rec2_byte;	/*!< value of current byte to be
-					compared in rec2 */
-	ulint		cur_field;	/*!< current field number */
-	ulint		n_uniq;
-
-	n_uniq = dict_index_get_n_unique(index);
-	ut_ad(rec_offs_n_fields(offsets1) >= n_uniq);
-	ut_ad(rec_offs_n_fields(offsets2) >= n_uniq);
-
-	ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2));
+	const byte*	rec1_b_ptr;
+	const byte*	rec2_b_ptr;
+	ulint		rec1_f_len;
+	ulint		rec2_f_len;
+	const dict_col_t*	col	= dict_index_get_nth_col(index, n);
 
-	for (cur_field = 0; cur_field < n_uniq; cur_field++) {
+	ut_ad(!rec_offs_nth_extern(offsets1, n));
+	ut_ad(!rec_offs_nth_extern(offsets2, n));
 
-		ulint	cur_bytes;
-		ulint	mtype;
-		ulint	prtype;
-
-		{
-			const dict_col_t*	col
-				= dict_index_get_nth_col(index, cur_field);
+	rec1_b_ptr = rec_get_nth_field(rec1, offsets1, n, &rec1_f_len);
+	rec2_b_ptr = rec_get_nth_field(rec2, offsets2, n, &rec2_f_len);
 
-			mtype = col->mtype;
-			prtype = col->prtype;
+	if (rec1_f_len == UNIV_SQL_NULL || rec2_f_len == UNIV_SQL_NULL) {
+		if (rec1_f_len == rec2_f_len) {
+			return(0);
 		}
+		/* We define the SQL null to be the smallest possible
+		value of a field in the alphabetical order */
+		return(rec1_f_len == UNIV_SQL_NULL ? -1 : 1);
+	}
 
-		ut_ad(!rec_offs_nth_extern(offsets1, cur_field));
-		ut_ad(!rec_offs_nth_extern(offsets2, cur_field));
-
-		rec1_b_ptr = rec_get_nth_field(rec1, offsets1,
-					       cur_field, &rec1_f_len);
-		rec2_b_ptr = rec_get_nth_field(rec2, offsets2,
-					       cur_field, &rec2_f_len);
+	if (col->mtype >= DATA_FLOAT
+	    || (col->mtype == DATA_BLOB
+		&& !(col->prtype & DATA_BINARY_TYPE)
+		&& dtype_get_charset_coll(col->prtype)
+		!= DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
+		return(cmp_whole_field(col->mtype, col->prtype,
+				       rec1_b_ptr, (unsigned) rec1_f_len,
+				       rec2_b_ptr, (unsigned) rec2_f_len));
+	}
 
-		if (rec1_f_len == UNIV_SQL_NULL
-		    || rec2_f_len == UNIV_SQL_NULL) {
+	/* Compare the fields */
+	for (ulint cur_bytes = 0;; cur_bytes++, rec1_b_ptr++, rec2_b_ptr++) {
+		ulint		rec1_byte;
+		ulint		rec2_byte;
 
-			if (rec1_f_len == rec2_f_len) {
-				if (null_eq) {
-					*null_eq = TRUE;
-				}
+		if (rec2_f_len <= cur_bytes) {
+			if (rec1_f_len <= cur_bytes) {
+				return(0);
+			}
 
-				goto next_field;
+			rec2_byte = dtype_get_pad_char(
+				col->mtype, col->prtype);
 
-			} else if (rec2_f_len == UNIV_SQL_NULL) {
+			if (rec2_byte == ULINT_UNDEFINED) {
+				return(1);
+			}
+		} else {
+			rec2_byte = *rec2_b_ptr;
+		}
 
-				/* We define the SQL null to be the
-				smallest possible value of a field
-				in the alphabetical order */
+		if (rec1_f_len <= cur_bytes) {
+			rec1_byte = dtype_get_pad_char(
+				col->mtype, col->prtype);
 
-				return(1);
-			} else {
+			if (rec1_byte == ULINT_UNDEFINED) {
 				return(-1);
 			}
+		} else {
+			rec1_byte = *rec1_b_ptr;
 		}
 
-		if (mtype >= DATA_FLOAT
-		    || (mtype == DATA_BLOB
-			&& 0 == (prtype & DATA_BINARY_TYPE)
-			&& dtype_get_charset_coll(prtype)
-			!= DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
-			int ret = cmp_whole_field(mtype, prtype,
-						  rec1_b_ptr,
-						  (unsigned) rec1_f_len,
-						  rec2_b_ptr,
-						  (unsigned) rec2_f_len);
-			if (ret) {
-				return(ret);
-			}
+		if (rec1_byte == rec2_byte) {
+			/* If the bytes are equal, they will remain such
+			even after the collation transformation below */
+			continue;
+		}
 
-			goto next_field;
+		if (col->mtype <= DATA_CHAR
+		    || (col->mtype == DATA_BLOB
+			&& !(col->prtype & DATA_BINARY_TYPE))) {
+
+			rec1_byte = cmp_collate(rec1_byte);
+			rec2_byte = cmp_collate(rec2_byte);
 		}
 
-		/* Compare the fields */
-		for (cur_bytes = 0;; cur_bytes++, rec1_b_ptr++, rec2_b_ptr++) {
-			if (rec2_f_len <= cur_bytes) {
+		if (rec1_byte < rec2_byte) {
+			return(-1);
+		} else if (rec1_byte > rec2_byte) {
+			return(1);
+		}
+	}
+}
 
-				if (rec1_f_len <= cur_bytes) {
+/*************************************************************//**
+Compare two physical records that contain the same number of columns,
+none of which are stored externally.
+@retval 1 if rec1 (including non-ordering columns) is greater than rec2
+@retval -1 if rec1 (including non-ordering columns) is less than rec2
+@retval 0 if rec1 is a duplicate of rec2 */
+UNIV_INTERN
+int
+cmp_rec_rec_simple(
+/*===============*/
+	const rec_t*		rec1,	/*!< in: physical record */
+	const rec_t*		rec2,	/*!< in: physical record */
+	const ulint*		offsets1,/*!< in: rec_get_offsets(rec1, ...) */
+	const ulint*		offsets2,/*!< in: rec_get_offsets(rec2, ...) */
+	const dict_index_t*	index,	/*!< in: data dictionary index */
+	struct TABLE*		table)	/*!< in: MySQL table, for reporting
+					duplicate key value if applicable,
+					or NULL */
+{
+	ulint		n;
+	ulint		n_uniq	= dict_index_get_n_unique(index);
+	bool		null_eq	= false;
 
-					goto next_field;
-				}
+	ut_ad(rec_offs_n_fields(offsets1) >= n_uniq);
+	ut_ad(rec_offs_n_fields(offsets2) == rec_offs_n_fields(offsets2));
 
-				rec2_byte = dtype_get_pad_char(mtype, prtype);
+	ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2));
 
-				if (rec2_byte == ULINT_UNDEFINED) {
-					return(1);
-				}
-			} else {
-				rec2_byte = *rec2_b_ptr;
-			}
+	for (n = 0; n < n_uniq; n++) {
+		int cmp = cmp_rec_rec_simple_field(
+			rec1, rec2, offsets1, offsets2, index, n);
 
-			if (rec1_f_len <= cur_bytes) {
-				rec1_byte = dtype_get_pad_char(mtype, prtype);
+		if (cmp) {
+			return(cmp);
+		}
 
-				if (rec1_byte == ULINT_UNDEFINED) {
-					return(-1);
-				}
-			} else {
-				rec1_byte = *rec1_b_ptr;
-			}
+		/* If the fields are internally equal, they must both
+		be NULL or non-NULL. */
+		ut_ad(rec_offs_nth_sql_null(offsets1, n)
+		      == rec_offs_nth_sql_null(offsets2, n));
 
-			if (rec1_byte == rec2_byte) {
-				/* If the bytes are equal, they will remain
-				such even after the collation transformation
-				below */
+		if (rec_offs_nth_sql_null(offsets1, n)) {
+			ut_ad(!(dict_index_get_nth_col(index, n)->prtype
+				& DATA_NOT_NULL));
+			null_eq = true;
+		}
+	}
 
-				continue;
-			}
+	/* If we ran out of fields, the ordering columns of rec1 were
+	equal to rec2. Issue a duplicate key error if needed. */
 
-			if (mtype <= DATA_CHAR
-			    || (mtype == DATA_BLOB
-				&& !(prtype & DATA_BINARY_TYPE))) {
+	if (!null_eq && table && dict_index_is_unique(index)) {
+		/* Report erroneous row using new version of table. */
+		innobase_rec_to_mysql(table, rec1, index, offsets1);
+		return(0);
+	}
 
-				rec1_byte = cmp_collate(rec1_byte);
-				rec2_byte = cmp_collate(rec2_byte);
-			}
+	/* Else, keep comparing so that we have the full internal
+	order. */
+	for (; n < dict_index_get_n_fields(index); n++) {
+		int cmp = cmp_rec_rec_simple_field(
+			rec1, rec2, offsets1, offsets2, index, n);
 
-			if (rec1_byte < rec2_byte) {
-				return(-1);
-			} else if (rec1_byte > rec2_byte) {
-				return(1);
-			}
+		if (cmp) {
+			return(cmp);
 		}
-next_field:
-		continue;
+
+		/* If the fields are internally equal, they must both
+		be NULL or non-NULL. */
+		ut_ad(rec_offs_nth_sql_null(offsets1, n)
+		      == rec_offs_nth_sql_null(offsets2, n));
 	}
 
-	/* If we ran out of fields, rec1 was equal to rec2. */
+	/* This should never be reached. Internally, an index must
+	never contain duplicate entries. */
+	ut_ad(0);
 	return(0);
 }
 
@@ -1327,6 +1358,7 @@ cmp_debug_dtuple_rec_with_match(
 				has an equal number or more fields than
 				dtuple */
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n_cmp,	/*!< in: number of fields to compare */
 	ulint*		matched_fields) /*!< in/out: number of already
 				completely matched fields; when function
 				returns, contains the value for current
@@ -1339,14 +1371,16 @@ cmp_debug_dtuple_rec_with_match(
 					field data */
 	ulint		rec_f_len;	/* length of current field in rec */
 	const byte*	rec_f_data;	/* pointer to the current rec field */
-	int		ret = 3333;	/* return value */
+	int		ret;		/* return value */
 	ulint		cur_field;	/* current field number */
 
 	ut_ad(dtuple && rec && matched_fields);
 	ut_ad(dtuple_check_typed(dtuple));
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
 
-	ut_ad(*matched_fields <= dtuple_get_n_fields_cmp(dtuple));
+	ut_ad(n_cmp > 0);
+	ut_ad(n_cmp <= dtuple_get_n_fields(dtuple));
+	ut_ad(*matched_fields <= n_cmp);
 	ut_ad(*matched_fields <= rec_offs_n_fields(offsets));
 
 	cur_field = *matched_fields;
@@ -1372,7 +1406,7 @@ cmp_debug_dtuple_rec_with_match(
 
 	/* Match fields in a loop; stop if we run out of fields in dtuple */
 
-	while (cur_field < dtuple_get_n_fields_cmp(dtuple)) {
+	while (cur_field < n_cmp) {
 
 		ulint	mtype;
 		ulint	prtype;
diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc
index 5a864f122a3..3a5d2f579c3 100644
--- a/storage/innobase/rem/rem0rec.cc
+++ b/storage/innobase/rem/rem0rec.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,6 +29,7 @@ Created 5/30/1994 Heikki Tuuri
 #include "rem0rec.ic"
 #endif
 
+#include "page0page.h"
 #include "mtr0mtr.h"
 #include "mtr0log.h"
 #include "fts0fts.h"
@@ -162,13 +163,12 @@ UNIV_INTERN
 ulint
 rec_get_n_extern_new(
 /*=================*/
-	const rec_t*	rec,	/*!< in: compact physical record */
-	dict_index_t*	index,	/*!< in: record descriptor */
-	ulint		n)	/*!< in: number of columns to scan */
+	const rec_t*		rec,	/*!< in: compact physical record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint			n)	/*!< in: number of columns to scan */
 {
 	const byte*	nulls;
 	const byte*	lens;
-	dict_field_t*	field;
 	ulint		null_mask;
 	ulint		n_extern;
 	ulint		i;
@@ -189,10 +189,13 @@ rec_get_n_extern_new(
 
 	/* read the lengths of fields 0..n */
 	do {
-		ulint	len;
+		const dict_field_t*	field
+			= dict_index_get_nth_field(index, i);
+		const dict_col_t*	col
+			= dict_field_get_col(field);
+		ulint			len;
 
-		field = dict_index_get_nth_field(index, i);
-		if (!(dict_field_get_col(field)->prtype & DATA_NOT_NULL)) {
+		if (!(col->prtype & DATA_NOT_NULL)) {
 			/* nullable field => read the null flag */
 
 			if (UNIV_UNLIKELY(!(byte) null_mask)) {
@@ -210,8 +213,6 @@ rec_get_n_extern_new(
 
 		if (UNIV_UNLIKELY(!field->fixed_len)) {
 			/* Variable-length field: read the length */
-			const dict_col_t*	col
-				= dict_field_get_col(field);
 			len = *lens--;
 			/* If the maximum length of the field is up
 			to 255 bytes, the actual length is always
@@ -240,16 +241,15 @@ rec_get_n_extern_new(
 Determine the offset to each field in a leaf-page record
 in ROW_FORMAT=COMPACT.  This is a special case of
 rec_init_offsets() and rec_get_offsets_func(). */
-UNIV_INTERN
+UNIV_INLINE __attribute__((nonnull))
 void
 rec_init_offsets_comp_ordinary(
 /*===========================*/
 	const rec_t*		rec,	/*!< in: physical record in
 					ROW_FORMAT=COMPACT */
-	ulint			extra,	/*!< in: number of bytes to reserve
-					between the record header and
-					the data payload
-					(usually REC_N_NEW_EXTRA_BYTES) */
+	bool			temp,	/*!< in: whether to use the
+					format for temporary files in
+					index creation */
 	const dict_index_t*	index,	/*!< in: record descriptor */
 	ulint*			offsets)/*!< in/out: array of offsets;
 					in: n=rec_offs_n_fields(offsets) */
@@ -257,28 +257,40 @@ rec_init_offsets_comp_ordinary(
 	ulint		i		= 0;
 	ulint		offs		= 0;
 	ulint		any_ext		= 0;
-	const byte*	nulls		= rec - (extra + 1);
-	const byte*	lens		= nulls
-		- UT_BITS_IN_BYTES(index->n_nullable);
-	dict_field_t*	field;
+	ulint		n_null		= index->n_nullable;
+	const byte*	nulls		= temp
+		? rec - 1
+		: rec - (1 + REC_N_NEW_EXTRA_BYTES);
+	const byte*	lens		= nulls - UT_BITS_IN_BYTES(n_null);
 	ulint		null_mask	= 1;
 
 #ifdef UNIV_DEBUG
-	/* We cannot invoke rec_offs_make_valid() here, because it can hold
-	that extra != REC_N_NEW_EXTRA_BYTES.  Similarly, rec_offs_validate()
-	will fail in that case, because it invokes rec_get_status(). */
+	/* We cannot invoke rec_offs_make_valid() here if temp=true.
+	Similarly, rec_offs_validate() will fail in that case, because
+	it invokes rec_get_status(). */
 	offsets[2] = (ulint) rec;
 	offsets[3] = (ulint) index;
 #endif /* UNIV_DEBUG */
 
+	ut_ad(temp || dict_table_is_comp(index->table));
+
+	if (temp && dict_table_is_comp(index->table)) {
+		/* No need to do adjust fixed_len=0. We only need to
+		adjust it for ROW_FORMAT=REDUNDANT. */
+		temp = false;
+	}
+
 	/* read the lengths of fields 0..n */
 	do {
-		ulint	len;
+		const dict_field_t*	field
+			= dict_index_get_nth_field(index, i);
+		const dict_col_t*	col
+			= dict_field_get_col(field);
+		ulint			len;
 
-		field = dict_index_get_nth_field(index, i);
-		if (!(dict_field_get_col(field)->prtype
-		      & DATA_NOT_NULL)) {
+		if (!(col->prtype & DATA_NOT_NULL)) {
 			/* nullable field => read the null flag */
+			ut_ad(n_null--);
 
 			if (UNIV_UNLIKELY(!(byte) null_mask)) {
 				nulls--;
@@ -297,10 +309,9 @@ rec_init_offsets_comp_ordinary(
 			null_mask <<= 1;
 		}
 
-		if (UNIV_UNLIKELY(!field->fixed_len)) {
+		if (!field->fixed_len
+		    || (temp && !dict_col_get_fixed_size(col, temp))) {
 			/* Variable-length field: read the length */
-			const dict_col_t*	col
-				= dict_field_get_col(field);
 			len = *lens--;
 			/* If the maximum length of the field is up
 			to 255 bytes, the actual length is always
@@ -394,9 +405,8 @@ rec_init_offsets(
 				= dict_index_get_n_unique_in_tree(index);
 			break;
 		case REC_STATUS_ORDINARY:
-			rec_init_offsets_comp_ordinary(rec,
-						       REC_N_NEW_EXTRA_BYTES,
-						       index, offsets);
+			rec_init_offsets_comp_ordinary(
+				rec, false, index, offsets);
 			return;
 		}
 
@@ -774,34 +784,45 @@ rec_get_nth_field_offs_old(
 /**********************************************************//**
 Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
 @return	total size */
-UNIV_INTERN
+UNIV_INLINE __attribute__((warn_unused_result, nonnull(1,2)))
 ulint
-rec_get_converted_size_comp_prefix(
-/*===============================*/
+rec_get_converted_size_comp_prefix_low(
+/*===================================*/
 	const dict_index_t*	index,	/*!< in: record descriptor;
 					dict_table_is_comp() is
 					assumed to hold, even if
 					it does not */
 	const dfield_t*		fields,	/*!< in: array of data fields */
 	ulint			n_fields,/*!< in: number of data fields */
-	ulint*			extra)	/*!< out: extra size */
+	ulint*			extra,	/*!< out: extra size */
+	bool			temp)	/*!< in: whether this is a
+					temporary file record */
 {
 	ulint	extra_size;
 	ulint	data_size;
 	ulint	i;
-	ut_ad(index);
-	ut_ad(fields);
+	ulint	n_null	= index->n_nullable;
 	ut_ad(n_fields > 0);
 	ut_ad(n_fields <= dict_index_get_n_fields(index));
+	ut_ad(!temp || extra);
 
-	extra_size = REC_N_NEW_EXTRA_BYTES
-		+ UT_BITS_IN_BYTES(index->n_nullable);
+	extra_size = temp
+		? UT_BITS_IN_BYTES(n_null)
+		: REC_N_NEW_EXTRA_BYTES
+		+ UT_BITS_IN_BYTES(n_null);
 	data_size = 0;
 
+	if (temp && dict_table_is_comp(index->table)) {
+		/* No need to do adjust fixed_len=0. We only need to
+		adjust it for ROW_FORMAT=REDUNDANT. */
+		temp = false;
+	}
+
 	/* read the lengths of fields 0..n */
 	for (i = 0; i < n_fields; i++) {
 		const dict_field_t*	field;
 		ulint			len;
+		ulint			fixed_len;
 		const dict_col_t*	col;
 
 		field = dict_index_get_nth_field(index, i);
@@ -810,6 +831,8 @@ rec_get_converted_size_comp_prefix(
 
 		ut_ad(dict_col_type_assert_equal(col,
 						 dfield_get_type(&fields[i])));
+		/* All NULLable fields must be included in the n_null count. */
+		ut_ad((col->prtype & DATA_NOT_NULL) || n_null--);
 
 		if (dfield_is_null(&fields[i])) {
 			/* No length is stored for NULL fields. */
@@ -820,6 +843,11 @@ rec_get_converted_size_comp_prefix(
 		ut_ad(len <= col->len || col->mtype == DATA_BLOB
 		      || (col->len == 0 && col->mtype == DATA_VARCHAR));
 
+		fixed_len = field->fixed_len;
+		if (temp && fixed_len
+		    && !dict_col_get_fixed_size(col, temp)) {
+			fixed_len = 0;
+		}
 		/* If the maximum length of a variable-length field
 		is up to 255 bytes, the actual length is always stored
 		in one byte. If the maximum length is more than 255
@@ -827,11 +855,20 @@ rec_get_converted_size_comp_prefix(
 		0..127.  The length will be encoded in two bytes when
 		it is 128 or more, or when the field is stored externally. */
 
-		if (field->fixed_len) {
-			ut_ad(len == field->fixed_len);
+		if (fixed_len) {
+#ifdef UNIV_DEBUG
+			ulint	mbminlen = DATA_MBMINLEN(col->mbminmaxlen);
+			ulint	mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen);
+
+			ut_ad(len <= fixed_len);
+
+			ut_ad(!mbmaxlen || len >= mbminlen
+			      * (fixed_len / mbmaxlen));
+
 			/* dict_index_add_col() should guarantee this */
 			ut_ad(!field->prefix_len
-			      || field->fixed_len == field->prefix_len);
+			      || fixed_len == field->prefix_len);
+#endif /* UNIV_DEBUG */
 		} else if (dfield_is_ext(&fields[i])) {
 			ut_ad(col->len >= 256 || col->mtype == DATA_BLOB);
 			extra_size += 2;
@@ -848,7 +885,7 @@ rec_get_converted_size_comp_prefix(
 		data_size += len;
 	}
 
-	if (UNIV_LIKELY_NULL(extra)) {
+	if (extra) {
 		*extra = extra_size;
 	}
 
@@ -856,6 +893,23 @@ rec_get_converted_size_comp_prefix(
 }
 
 /**********************************************************//**
+Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
+@return	total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_comp_prefix(
+/*===============================*/
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint*			extra)	/*!< out: extra size */
+{
+	ut_ad(dict_table_is_comp(index->table));
+	return(rec_get_converted_size_comp_prefix_low(
+		       index, fields, n_fields, extra, false));
+}
+
+/**********************************************************//**
 Determines the size of a data tuple in ROW_FORMAT=COMPACT.
 @return	total size */
 UNIV_INTERN
@@ -872,8 +926,6 @@ rec_get_converted_size_comp(
 	ulint*			extra)	/*!< out: extra size */
 {
 	ulint	size;
-	ut_ad(index);
-	ut_ad(fields);
 	ut_ad(n_fields > 0);
 
 	switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
@@ -899,8 +951,8 @@ rec_get_converted_size_comp(
 		return(ULINT_UNDEFINED);
 	}
 
-	return(size + rec_get_converted_size_comp_prefix(index, fields,
-							 n_fields, extra));
+	return(size + rec_get_converted_size_comp_prefix_low(
+		       index, fields, n_fields, extra, false));
 }
 
 /***********************************************************//**
@@ -1077,19 +1129,18 @@ rec_convert_dtuple_to_rec_old(
 
 /*********************************************************//**
 Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
-UNIV_INTERN
+UNIV_INLINE __attribute__((nonnull))
 void
 rec_convert_dtuple_to_rec_comp(
 /*===========================*/
 	rec_t*			rec,	/*!< in: origin of record */
-	ulint			extra,	/*!< in: number of bytes to
-					reserve between the record
-					header and the data payload
-					(normally REC_N_NEW_EXTRA_BYTES) */
 	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint			status,	/*!< in: status bits of the record */
 	const dfield_t*		fields,	/*!< in: array of data fields */
-	ulint			n_fields)/*!< in: number of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint			status,	/*!< in: status bits of the record */
+	bool			temp)	/*!< in: whether to use the
+					format for temporary files in
+					index creation */
 {
 	const dfield_t*	field;
 	const dtype_t*	type;
@@ -1101,32 +1152,48 @@ rec_convert_dtuple_to_rec_comp(
 	ulint		n_node_ptr_field;
 	ulint		fixed_len;
 	ulint		null_mask	= 1;
-	ut_ad(extra == 0 || dict_table_is_comp(index->table));
-	ut_ad(extra == 0 || extra == REC_N_NEW_EXTRA_BYTES);
+	ulint		n_null;
+
+	ut_ad(temp || dict_table_is_comp(index->table));
 	ut_ad(n_fields > 0);
 
-	switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
-	case REC_STATUS_ORDINARY:
+	if (temp) {
+		ut_ad(status == REC_STATUS_ORDINARY);
 		ut_ad(n_fields <= dict_index_get_n_fields(index));
 		n_node_ptr_field = ULINT_UNDEFINED;
-		break;
-	case REC_STATUS_NODE_PTR:
-		ut_ad(n_fields == dict_index_get_n_unique_in_tree(index) + 1);
-		n_node_ptr_field = n_fields - 1;
-		break;
-	case REC_STATUS_INFIMUM:
-	case REC_STATUS_SUPREMUM:
-		ut_ad(n_fields == 1);
-		n_node_ptr_field = ULINT_UNDEFINED;
-		break;
-	default:
-		ut_error;
-		return;
+		nulls = rec - 1;
+		if (dict_table_is_comp(index->table)) {
+			/* No need to do adjust fixed_len=0. We only
+			need to adjust it for ROW_FORMAT=REDUNDANT. */
+			temp = false;
+		}
+	} else {
+		nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
+
+		switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
+		case REC_STATUS_ORDINARY:
+			ut_ad(n_fields <= dict_index_get_n_fields(index));
+			n_node_ptr_field = ULINT_UNDEFINED;
+			break;
+		case REC_STATUS_NODE_PTR:
+			ut_ad(n_fields
+			      == dict_index_get_n_unique_in_tree(index) + 1);
+			n_node_ptr_field = n_fields - 1;
+			break;
+		case REC_STATUS_INFIMUM:
+		case REC_STATUS_SUPREMUM:
+			ut_ad(n_fields == 1);
+			n_node_ptr_field = ULINT_UNDEFINED;
+			break;
+		default:
+			ut_error;
+			return;
+		}
 	}
 
 	end = rec;
-	nulls = rec - (extra + 1);
-	lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
+	n_null = index->n_nullable;
+	lens = nulls - UT_BITS_IN_BYTES(n_null);
 	/* clear the SQL-null flags */
 	memset(lens + 1, 0, nulls - lens);
 
@@ -1148,7 +1215,7 @@ rec_convert_dtuple_to_rec_comp(
 
 		if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
 			/* nullable field */
-			ut_ad(index->n_nullable > 0);
+			ut_ad(n_null--);
 
 			if (UNIV_UNLIKELY(!(byte) null_mask)) {
 				nulls--;
@@ -1171,6 +1238,10 @@ rec_convert_dtuple_to_rec_comp(
 
 		ifield = dict_index_get_nth_field(index, i);
 		fixed_len = ifield->fixed_len;
+		if (temp && fixed_len
+		    && !dict_col_get_fixed_size(ifield->col, temp)) {
+			fixed_len = 0;
+		}
 		/* If the maximum length of a variable-length field
 		is up to 255 bytes, the actual length is always stored
 		in one byte. If the maximum length is more than 255
@@ -1178,8 +1249,17 @@ rec_convert_dtuple_to_rec_comp(
 		0..127.  The length will be encoded in two bytes when
 		it is 128 or more, or when the field is stored externally. */
 		if (fixed_len) {
-			ut_ad(len == fixed_len);
+#ifdef UNIV_DEBUG
+			ulint	mbminlen = DATA_MBMINLEN(
+				ifield->col->mbminmaxlen);
+			ulint	mbmaxlen = DATA_MBMAXLEN(
+				ifield->col->mbminmaxlen);
+
+			ut_ad(len <= fixed_len);
+			ut_ad(!mbmaxlen || len >= mbminlen
+			      * (fixed_len / mbmaxlen));
 			ut_ad(!dfield_is_ext(field));
+#endif /* UNIV_DEBUG */
 		} else if (dfield_is_ext(field)) {
 			ut_ad(ifield->col->len >= 256
 			      || ifield->col->mtype == DATA_BLOB);
@@ -1227,14 +1307,12 @@ rec_convert_dtuple_to_rec_new(
 	rec_t*	rec;
 
 	status = dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK;
-	rec_get_converted_size_comp(index, status,
-				    dtuple->fields, dtuple->n_fields,
-				    &extra_size);
+	rec_get_converted_size_comp(
+		index, status, dtuple->fields, dtuple->n_fields, &extra_size);
 	rec = buf + extra_size;
 
 	rec_convert_dtuple_to_rec_comp(
-		rec, REC_N_NEW_EXTRA_BYTES, index, status,
-		dtuple->fields, dtuple->n_fields);
+		rec, index, dtuple->fields, dtuple->n_fields, status, false);
 
 	/* Set the info bits of the record */
 	rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));
@@ -1296,6 +1374,54 @@ rec_convert_dtuple_to_rec(
 	return(rec);
 }
 
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
+@return	total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_temp(
+/*========================*/
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint*			extra)	/*!< out: extra size */
+{
+	return(rec_get_converted_size_comp_prefix_low(
+		       index, fields, n_fields, extra, true));
+}
+
+/******************************************************//**
+Determine the offset to each field in temporary file.
+@see rec_convert_dtuple_to_temp() */
+UNIV_INTERN
+void
+rec_init_offsets_temp(
+/*==================*/
+	const rec_t*		rec,	/*!< in: temporary file record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets)/*!< in/out: array of offsets;
+					in: n=rec_offs_n_fields(offsets) */
+{
+	rec_init_offsets_comp_ordinary(rec, true, index, offsets);
+}
+
+/*********************************************************//**
+Builds a temporary file record out of a data tuple.
+@see rec_init_offsets_temp() */
+UNIV_INTERN
+void
+rec_convert_dtuple_to_temp(
+/*=======================*/
+	rec_t*			rec,		/*!< out: record */
+	const dict_index_t*	index,		/*!< in: record descriptor */
+	const dfield_t*		fields,		/*!< in: array of data fields */
+	ulint			n_fields)	/*!< in: number of fields */
+{
+	rec_convert_dtuple_to_rec_comp(rec, index, fields, n_fields,
+				       REC_STATUS_ORDINARY, true);
+}
+
 /**************************************************************//**
 Copies the first n fields of a physical record to a data tuple. The fields
 are copied to the memory heap. */
@@ -1506,6 +1632,7 @@ rec_copy_prefix_to_buf(
 
 	return(*buf + (rec - (lens + 1)));
 }
+#endif /* UNIV_HOTBACKUP */
 
 /***************************************************************//**
 Validates the consistency of an old-style physical record.
@@ -1782,4 +1909,47 @@ rec_print(
 		}
 	}
 }
+
+# ifdef UNIV_DEBUG
+/************************************************************//**
+Reads the DB_TRX_ID of a clustered index record.
+@return	the value of DB_TRX_ID */
+UNIV_INTERN
+trx_id_t
+rec_get_trx_id(
+/*===========*/
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index)	/*!< in: clustered index */
+{
+	const page_t*	page
+		= page_align(rec);
+	ulint		trx_id_col
+		= dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+	const byte*	trx_id;
+	ulint		len;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
+	      == index->id);
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(trx_id_col > 0);
+	ut_ad(trx_id_col != ULINT_UNDEFINED);
+
+	offsets = rec_get_offsets(rec, index, offsets, trx_id_col + 1, &heap);
+
+	trx_id = rec_get_nth_field(rec, offsets, trx_id_col, &len);
+
+	ut_ad(len == DATA_TRX_ID_LEN);
+
+	if (heap) {
+		mem_heap_free(heap);
+	}
+
+	return(trx_read_trx_id(trx_id));
+}
+# endif /* UNIV_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/row/row0ext.cc b/storage/innobase/row/row0ext.cc
index 8d4da9f034b..f084fa09c5a 100644
--- a/storage/innobase/row/row0ext.cc
+++ b/storage/innobase/row/row0ext.cc
@@ -95,6 +95,8 @@ row_ext_create(
 
 	row_ext_t*	ret;
 
+	ut_ad(n_ext > 0);
+
 	ret = static_cast<row_ext_t*>(
 		mem_heap_alloc(heap,
 			       (sizeof *ret) + (n_ext - 1) * sizeof ret->len));
diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc
index 50b681361d8..9a6af50e09d 100644
--- a/storage/innobase/row/row0ftsort.cc
+++ b/storage/innobase/row/row0ftsort.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,6 +23,7 @@ Create Full Text Index with (parallel) merge sort
 Created 10/13/2010 Jimmy Yang
 *******************************************************/
 
+#include "dict0dict.h" /* dict_table_stats_lock() */
 #include "row0merge.h"
 #include "pars0pars.h"
 #include "row0ftsort.h"
@@ -47,9 +48,6 @@ Created 10/13/2010 Jimmy Yang
 /** Parallel sort degree */
 UNIV_INTERN ulong	fts_sort_pll_degree	= 2;
 
-/** Parallel sort buffer size */
-UNIV_INTERN ulong	srv_sort_buf_size 	= 1048576;
-
 /*********************************************************************//**
 Create a temporary "fts sort index" used to merge sort the
 tokenized doc string. The index has three "fields":
@@ -124,7 +122,7 @@ row_merge_create_fts_sort_index(
 	if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
 		/* If Doc ID column is being added by this create
 		index, then just check the number of rows in the table */
-		if (table->stat_n_rows < MAX_DOC_ID_OPT_VAL) {
+		if (dict_table_get_n_rows(table) < MAX_DOC_ID_OPT_VAL) {
 			*opt_doc_id_size = TRUE;
 		}
 	} else {
@@ -173,10 +171,10 @@ ibool
 row_fts_psort_info_init(
 /*====================*/
 	trx_t*			trx,	/*!< in: transaction */
-	struct TABLE*		table,	/*!< in: MySQL table object */
+	row_merge_dup_t*	dup,	/*!< in,own: descriptor of
+					FTS index being created */
 	const dict_table_t*	new_table,/*!< in: table on which indexes are
 					created */
-	dict_index_t*		index,	/*!< in: FTS index to be created */
 	ibool			opt_doc_id_size,
 					/*!< in: whether to use 4 bytes
 					instead of 8 bytes integer to
@@ -192,7 +190,6 @@ row_fts_psort_info_init(
 	fts_psort_t*		psort_info = NULL;
 	fts_psort_t*		merge_info = NULL;
 	ulint			block_size;
-	os_event_t		sort_event;
 	ibool			ret = TRUE;
 
 	block_size = 3 * srv_sort_buf_size;
@@ -201,28 +198,28 @@ row_fts_psort_info_init(
 		 fts_sort_pll_degree * sizeof *psort_info));
 
 	if (!psort_info) {
-		return FALSE;
+		ut_free(dup);
+		return(FALSE);
 	}
 
-	sort_event = os_event_create(NULL);
-
 	/* Common Info for all sort threads */
 	common_info = static_cast<fts_psort_common_t*>(
 		mem_alloc(sizeof *common_info));
 
-	common_info->table = table;
+	if (!common_info) {
+		ut_free(dup);
+		mem_free(psort_info);
+		return(FALSE);
+	}
+
+	common_info->dup = dup;
 	common_info->new_table = (dict_table_t*) new_table;
 	common_info->trx = trx;
-	common_info->sort_index = index;
 	common_info->all_info = psort_info;
-	common_info->sort_event = sort_event;
+	common_info->sort_event = os_event_create();
+	common_info->merge_event = os_event_create();
 	common_info->opt_doc_id_size = opt_doc_id_size;
 
-	if (!common_info) {
-		mem_free(psort_info);
-		return FALSE;
-	}
-
 	/* There will be FTS_NUM_AUX_INDEX number of "sort buckets" for
 	each parallel sort thread. Each "sort bucket" holds records for
 	a particular "FTS index partition" */
@@ -242,9 +239,12 @@ row_fts_psort_info_init(
 			}
 
 			psort_info[j].merge_buf[i] = row_merge_buf_create(
-				index);
+				dup->index);
 
-			row_merge_file_create(psort_info[j].merge_file[i]);
+			if (row_merge_file_create(psort_info[j].merge_file[i])
+			    < 0) {
+				goto func_exit;
+			}
 
 			/* Need to align memory for O_DIRECT write */
 			psort_info[j].block_alloc[i] =
@@ -314,6 +314,9 @@ row_fts_psort_info_destroy(
 			}
 		}
 
+		os_event_free(merge_info[0].psort_common->sort_event);
+		os_event_free(merge_info[0].psort_common->merge_event);
+		ut_free(merge_info[0].psort_common->dup);
 		mem_free(merge_info[0].psort_common);
 		mem_free(psort_info);
 	}
@@ -433,12 +436,11 @@ row_merge_fts_doc_tokenize(
 		ut_a(t_ctx->buf_used < FTS_NUM_AUX_INDEX);
 		idx = t_ctx->buf_used;
 
-		buf->tuples[buf->n_tuples + n_tuple[idx]] = field =
-			static_cast<dfield_t*>(mem_heap_alloc(
-				buf->heap,
-				FTS_NUM_FIELDS_SORT * sizeof *field));
+		mtuple_t* mtuple = &buf->tuples[buf->n_tuples + n_tuple[idx]];
 
-		ut_a(field);
+		field = mtuple->fields = static_cast<dfield_t*>(
+			mem_heap_alloc(buf->heap,
+				       FTS_NUM_FIELDS_SORT * sizeof *field));
 
 		/* The first field is the tokenized word */
 		dfield_set_data(field, t_str.f_str, t_str.f_len);
@@ -522,6 +524,10 @@ row_merge_fts_doc_tokenize(
 	/* Update the data length and the number of new word tuples
 	added in this round of tokenization */
 	for (i = 0; i <  FTS_NUM_AUX_INDEX; i++) {
+		/* The computation of total_size below assumes that no
+		delete-mark flags will be stored and that all fields
+		are NOT NULL and fixed-length. */
+
 		sort_buf[i]->total_size += data_size[i];
 
 		sort_buf[i]->n_tuples += n_tuple[i];
@@ -560,7 +566,7 @@ fts_parallel_tokenization(
 	ulint			mycount[FTS_NUM_AUX_INDEX];
 	ib_uint64_t		total_rec = 0;
 	ulint			num_doc_processed = 0;
-	doc_id_t		last_doc_id;
+	doc_id_t		last_doc_id = 0;
 	ulint			zip_size;
 	mem_heap_t*		blob_heap = NULL;
 	fts_doc_t		doc;
@@ -581,10 +587,10 @@ fts_parallel_tokenization(
 	memset(mycount, 0, FTS_NUM_AUX_INDEX * sizeof(int));
 
 	doc.charset = fts_index_get_charset(
-		psort_info->psort_common->sort_index);
+		psort_info->psort_common->dup->index);
 
 	idx_field = dict_index_get_nth_field(
-		psort_info->psort_common->sort_index, 0);
+		psort_info->psort_common->dup->index, 0);
 	word_dtype.prtype = idx_field->col->prtype;
 	word_dtype.mbminmaxlen = idx_field->col->mbminmaxlen;
 	word_dtype.mtype = (strcmp(doc.charset->name, "latin1_swedish_ci") == 0)
@@ -742,7 +748,12 @@ loop:
 	}
 
 	if (doc_item) {
-		 prev_doc_item = doc_item;
+		prev_doc_item = doc_item;
+
+		if (last_doc_id != doc_item->doc_id) {
+			t_ctx.init_pos = 0;
+		}
+
 		retried = 0;
 	} else if (psort_info->state == FTS_PARENT_COMPLETE) {
 		retried++;
@@ -751,16 +762,51 @@ loop:
 	goto loop;
 
 exit:
+	/* Do a final sort of the last (or latest) batch of records
+	in block memory. Flush them to temp file if records cannot
+	be hold in one block memory */
 	for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
 		if (t_ctx.rows_added[i]) {
 			row_merge_buf_sort(buf[i], NULL);
 			row_merge_buf_write(
-				buf[i], (const merge_file_t*) merge_file[i],
-				block[i]);
-			row_merge_write(merge_file[i]->fd,
-					merge_file[i]->offset++, block[i]);
+				buf[i], merge_file[i], block[i]);
+
+			/* Write to temp file, only if records have
+			been flushed to temp file before (offset > 0):
+			The pseudo code for sort is following:
+
+				while (there are rows) {
+					tokenize rows, put result in block[]
+					if (block[] runs out) {
+						sort rows;
+						write to temp file with
+						row_merge_write();
+						offset++;
+					}
+				}
+
+				# write out the last batch
+				if (offset > 0) {
+					row_merge_write();
+					offset++;
+				} else {
+					# no need to write anything
+					offset stay as 0
+				}
+
+			so if merge_file[i]->offset is 0 when we come to
+			here as the last batch, this means rows have
+			never flush to temp file, it can be held all in
+			memory */
+			if (merge_file[i]->offset != 0) {
+				row_merge_write(merge_file[i]->fd,
+						merge_file[i]->offset++,
+						block[i]);
+
+				UNIV_MEM_INVALID(block[i][0],
+						 srv_sort_buf_size);
+			}
 
-			UNIV_MEM_INVALID(block[i][0], srv_sort_buf_size);
 			buf[i] = row_merge_buf_empty(buf[i]);
 			t_ctx.rows_added[i] = 0;
 		}
@@ -776,16 +822,19 @@ exit:
 			continue;
 		}
 
-		tmpfd[i] = innobase_mysql_tmpfile();
+		tmpfd[i] = row_merge_file_create_low();
+		if (tmpfd[i] < 0) {
+			goto func_exit;
+		}
+
 		row_merge_sort(psort_info->psort_common->trx,
-				       psort_info->psort_common->sort_index,
-				       merge_file[i],
-				       (row_merge_block_t*) block[i], &tmpfd[i],
-				       psort_info->psort_common->table);
+			       psort_info->psort_common->dup,
+			       merge_file[i], block[i], &tmpfd[i]);
 		total_rec += merge_file[i]->n_rec;
 		close(tmpfd[i]);
 	}
 
+func_exit:
 	if (fts_enable_diag_print) {
 		DEBUG_FTS_SORT_PRINT("  InnoDB_FTS: complete merge sort\n");
 	}
@@ -794,8 +843,14 @@ exit:
 
 	psort_info->child_status = FTS_CHILD_COMPLETE;
 	os_event_set(psort_info->psort_common->sort_event);
+	psort_info->child_status = FTS_CHILD_EXITING;
+
+#ifdef __WIN__
+	CloseHandle(psort_info->thread_hdl);
+#endif /*__WIN__ */
 
 	os_thread_exit(NULL);
+
 	OS_THREAD_DUMMY_RETURN;
 }
 
@@ -812,8 +867,9 @@ row_fts_start_psort(
 
 	for (i = 0; i < fts_sort_pll_degree; i++) {
 		psort_info[i].psort_id = i;
-		os_thread_create(fts_parallel_tokenization,
-				 (void*) &psort_info[i], &thd_id);
+		psort_info[i].thread_hdl = os_thread_create(
+			fts_parallel_tokenization,
+			(void*) &psort_info[i], &thd_id);
 	}
 }
 
@@ -833,14 +889,20 @@ fts_parallel_merge(
 
 	id = psort_info->psort_id;
 
-	row_fts_merge_insert(psort_info->psort_common->sort_index,
+	row_fts_merge_insert(psort_info->psort_common->dup->index,
 			     psort_info->psort_common->new_table,
 			     psort_info->psort_common->all_info, id);
 
 	psort_info->child_status = FTS_CHILD_COMPLETE;
-	os_event_set(psort_info->psort_common->sort_event);
+	os_event_set(psort_info->psort_common->merge_event);
+	psort_info->child_status = FTS_CHILD_EXITING;
+
+#ifdef __WIN__
+	CloseHandle(psort_info->thread_hdl);
+#endif /*__WIN__ */
 
 	os_thread_exit(NULL);
+
 	OS_THREAD_DUMMY_RETURN;
 }
 
@@ -860,16 +922,16 @@ row_fts_start_parallel_merge(
 		merge_info[i].psort_id = i;
 		merge_info[i].child_status = 0;
 
-		os_thread_create(fts_parallel_merge,
-				 (void*) &merge_info[i], &thd_id);
+		merge_info[i].thread_hdl = os_thread_create(
+			fts_parallel_merge, (void*) &merge_info[i], &thd_id);
 	}
 }
 
 /********************************************************************//**
 Insert processed FTS data to auxillary index tables.
 @return	DB_SUCCESS if insertion runs fine */
-UNIV_INTERN
-ulint
+static __attribute__((nonnull))
+dberr_t
 row_merge_write_fts_word(
 /*=====================*/
 	trx_t*		trx,		/*!< in: transaction */
@@ -880,15 +942,15 @@ row_merge_write_fts_word(
 	CHARSET_INFO*	charset)	/*!< in: charset */
 {
 	ulint	selected;
-	ulint	ret = DB_SUCCESS;
+	dberr_t	ret = DB_SUCCESS;
 
 	selected = fts_select_index(
 		charset, word->text.f_str, word->text.f_len);
 	fts_table->suffix = fts_get_suffix(selected);
 
 	/* Pop out each fts_node in word->nodes write them to auxiliary table */
-	while(ib_vector_size(word->nodes) > 0) {
-		ulint		error;
+	while (ib_vector_size(word->nodes) > 0) {
+		dberr_t		error;
 		fts_node_t*	fts_node;
 
 		fts_node = static_cast<fts_node_t*>(ib_vector_pop(word->nodes));
@@ -900,8 +962,8 @@ row_merge_write_fts_word(
 		if (error != DB_SUCCESS) {
 			fprintf(stderr, "InnoDB: failed to write"
 				" word %s to FTS auxiliary index"
-				" table, error (%lu) \n",
-				word->text.f_str, error);
+				" table, error (%s) \n",
+				word->text.f_str, ut_strerr(error));
 			ret = error;
 		}
 
@@ -1064,7 +1126,6 @@ row_fts_sel_tree_propagate(
 	int	child_left;
 	int	child_right;
 	int	selected;
-	ibool	null_eq = FALSE;
 
 	/* Find which parent this value will be propagated to */
 	parent = (propogated - 1) / 2;
@@ -1083,10 +1144,10 @@ row_fts_sel_tree_propagate(
 	} else if (child_right == -1
 		   || mrec[child_right] == NULL) {
 		selected = child_left;
-	} else if (row_merge_cmp(mrec[child_left], mrec[child_right],
-				 offsets[child_left],
-				 offsets[child_right],
-				 index, &null_eq) < 0) {
+	} else if (cmp_rec_rec_simple(mrec[child_left], mrec[child_right],
+				      offsets[child_left],
+				      offsets[child_right],
+				      index, NULL) < 0) {
 		selected = child_left;
 	} else {
 		selected = child_right;
@@ -1143,8 +1204,6 @@ row_fts_build_sel_tree_level(
 	num_item = (1 << level);
 
 	for (i = 0; i < num_item;  i++) {
-		ibool	null_eq = FALSE;
-
 		child_left = sel_tree[(start + i) * 2 + 1];
 		child_right = sel_tree[(start + i) * 2 + 2];
 
@@ -1174,14 +1233,12 @@ row_fts_build_sel_tree_level(
 		}
 
 		/* Select the smaller one to set parent pointer */
-		if (row_merge_cmp(mrec[child_left], mrec[child_right],
-				  offsets[child_left],
-				  offsets[child_right],
-				  index, &null_eq) < 0) {
-			sel_tree[start + i] = child_left;
-		} else {
-			sel_tree[start + i] = child_right;
-		}
+		int cmp = cmp_rec_rec_simple(
+			mrec[child_left], mrec[child_right],
+			offsets[child_left], offsets[child_right],
+			index, NULL);
+
+		sel_tree[start + i] = cmp < 0 ? child_left : child_right;
 	}
 }
 
@@ -1231,7 +1288,7 @@ Read sorted file containing index data tuples and insert these data
 tuples to the index
 @return	DB_SUCCESS or error number */
 UNIV_INTERN
-ulint
+dberr_t
 row_fts_merge_insert(
 /*=================*/
 	dict_index_t*		index,	/*!< in: index */
@@ -1243,7 +1300,7 @@ row_fts_merge_insert(
 	const byte**		b;
 	mem_heap_t*		tuple_heap;
 	mem_heap_t*		heap;
-	ulint			error = DB_SUCCESS;
+	dberr_t			error = DB_SUCCESS;
 	ulint*			foffs;
 	ulint**			offsets;
 	fts_tokenizer_word_t	new_word;
@@ -1317,7 +1374,7 @@ row_fts_merge_insert(
 		count_diag += (int) psort_info[i].merge_file[id]->n_rec;
 	}
 
-	if (fts_enable_diag_print) { 
+	if (fts_enable_diag_print) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr, "  InnoDB_FTS: to inserted %lu records\n",
 			(ulong) count_diag);
@@ -1349,8 +1406,13 @@ row_fts_merge_insert(
 			/* No Rows to read */
 			mrec[i] = b[i] = NULL;
 		} else {
-			if (!row_merge_read(fd[i], foffs[i],
-			    (row_merge_block_t*) block[i])) {
+			/* Read from temp file only if it has been
+			written to. Otherwise, block memory holds
+			all the sorted records */
+			if (psort_info[i].merge_file[id]->offset > 0
+			    && (!row_merge_read(
+					fd[i], foffs[i],
+					(row_merge_block_t*) block[i]))) {
 				error = DB_CORRUPTION;
 				goto exit;
 			}
@@ -1386,14 +1448,14 @@ row_fts_merge_insert(
 			}
 
 			for (i = min_rec + 1; i < fts_sort_pll_degree; i++) {
-				ibool           null_eq = FALSE;
 				if (!mrec[i]) {
 					continue;
 				}
 
-				if (row_merge_cmp(mrec[i], mrec[min_rec],
-						  offsets[i], offsets[min_rec],
-						  index, &null_eq) < 0) {
+				if (cmp_rec_rec_simple(
+					    mrec[i], mrec[min_rec],
+					    offsets[i], offsets[min_rec],
+					    index, NULL) < 0) {
 					min_rec = i;
 				}
 			}
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
new file mode 100644
index 00000000000..f5eb31191a5
--- /dev/null
+++ b/storage/innobase/row/row0import.cc
@@ -0,0 +1,3806 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0import.cc
+Import a tablespace to a running instance.
+
+Created 2012-02-08 by Sunny Bains.
+*******************************************************/
+
+#include "row0import.h"
+
+#ifdef UNIV_NONINL
+#include "row0import.ic"
+#endif
+
+#include "btr0pcur.h"
+#include "que0que.h"
+#include "dict0boot.h"
+#include "ibuf0ibuf.h"
+#include "pars0pars.h"
+#include "row0upd.h"
+#include "row0sel.h"
+#include "row0mysql.h"
+#include "srv0start.h"
+#include "row0quiesce.h"
+
+#include <vector>
+
+/** The size of the buffer to use for IO. Note: os_file_read() doesn't expect
+reads to fail. If you set the buffer size to be greater than a multiple of the
+file size then it will assert. TODO: Fix this limitation of the IO functions.
+@param n - page size of the tablespace.
+@retval number of pages */
+#define IO_BUFFER_SIZE(n)	((1024 * 1024) / n)
+
+/** For gathering stats on records during phase I */
+struct row_stats_t {
+	ulint		m_n_deleted;		/*!< Number of deleted records
+						found in the index */
+
+	ulint		m_n_purged;		/*!< Number of records purged
+						optimisatically */
+
+	ulint		m_n_rows;		/*!< Number of rows */
+
+	ulint		m_n_purge_failed;	/*!< Number of deleted rows
+						that could not be purged */
+};
+
+/** Index information required by IMPORT. */
+struct row_index_t {
+	index_id_t	m_id;			/*!< Index id of the table
+						in the exporting server */
+	byte*		m_name;			/*!< Index name */
+
+	ulint		m_space;		/*!< Space where it is placed */
+
+	ulint		m_page_no;		/*!< Root page number */
+
+	ulint		m_type;			/*!< Index type */
+
+	ulint		m_trx_id_offset;	/*!< Relevant only for clustered
+						indexes, offset of transaction
+						id system column */
+
+	ulint		m_n_user_defined_cols;	/*!< User defined columns */
+
+	ulint		m_n_uniq;		/*!< Number of columns that can
+						uniquely identify the row */
+
+	ulint		m_n_nullable;		/*!< Number of nullable
+						columns */
+
+	ulint		m_n_fields;		/*!< Total number of fields */
+
+	dict_field_t*	m_fields;		/*!< Index fields */
+
+	const dict_index_t*
+			m_srv_index;		/*!< Index instance in the
+						importing server */
+
+	row_stats_t	m_stats;		/*!< Statistics gathered during
+						the import phase */
+
+};
+
+/** Meta data required by IMPORT. */
+struct row_import {
+	row_import() UNIV_NOTHROW
+		:
+		m_table(),
+		m_version(),
+		m_hostname(),
+		m_table_name(),
+		m_autoinc(),
+		m_page_size(),
+		m_flags(),
+		m_n_cols(),
+		m_cols(),
+		m_col_names(),
+		m_n_indexes(),
+		m_indexes(),
+		m_missing(true) { }
+
+	~row_import() UNIV_NOTHROW;
+
+	/**
+	Find the index entry in in the indexes array.
+	@param name - index name
+	@return instance if found else 0. */
+	row_index_t* get_index(const char* name) const UNIV_NOTHROW;
+
+	/**
+	Get the number of rows in the index.
+	@param name - index name
+	@return number of rows (doesn't include delete marked rows). */
+	ulint	get_n_rows(const char* name) const UNIV_NOTHROW;
+
+	/**
+	Find the ordinal value of the column name in the cfg table columns.
+	@param name - of column to look for.
+	@return ULINT_UNDEFINED if not found. */
+	ulint find_col(const char* name) const UNIV_NOTHROW;
+
+	/**
+	Find the index field entry in in the cfg indexes fields.
+	@name - of the index to look for
+	@return instance if found else 0. */
+	const dict_field_t* find_field(
+		const row_index_t*	cfg_index,
+		const char* 		name) const UNIV_NOTHROW;
+
+	/**
+	Get the number of rows for which purge failed during the convert phase.
+	@param name - index name
+	@return number of rows for which purge failed. */
+	ulint	get_n_purge_failed(const char* name) const UNIV_NOTHROW;
+
+	/**
+	Check if the index is clean. ie. no delete-marked records
+	@param name - index name
+	@return true if index needs to be purged. */
+	bool requires_purge(const char* name) const UNIV_NOTHROW
+	{
+		return(get_n_purge_failed(name) > 0);
+	}
+
+	/**
+	Set the index root <space, pageno> using the index name */
+	void set_root_by_name() UNIV_NOTHROW;
+
+	/**
+	Set the index root <space, pageno> using a heuristic
+	@return DB_SUCCESS or error code */
+	dberr_t set_root_by_heuristic() UNIV_NOTHROW;
+
+	/** Check if the index schema that was read from the .cfg file
+	matches the in memory index definition.
+	Note: It will update row_import_t::m_srv_index to map the meta-data
+	read from the .cfg file to the server index instance.
+	@return DB_SUCCESS or error code. */
+	dberr_t match_index_columns(
+		THD*			thd,
+		const dict_index_t*	index) UNIV_NOTHROW;
+
+	/**
+	Check if the table schema that was read from the .cfg file matches the
+	in memory table definition.
+	@param thd - MySQL session variable
+	@return DB_SUCCESS or error code. */
+	dberr_t match_table_columns(
+		THD*			thd) UNIV_NOTHROW;
+
+	/**
+	Check if the table (and index) schema that was read from the .cfg file
+	matches the in memory table definition.
+	@param thd - MySQL session variable
+	@return DB_SUCCESS or error code. */
+	dberr_t match_schema(
+		THD*			thd) UNIV_NOTHROW;
+
+	dict_table_t*	m_table;		/*!< Table instance */
+
+	ulint		m_version;		/*!< Version of config file */
+
+	byte*		m_hostname;		/*!< Hostname where the
+						tablespace was exported */
+	byte*		m_table_name;		/*!< Exporting instance table
+						name */
+
+	ib_uint64_t	m_autoinc;		/*!< Next autoinc value */
+
+	ulint		m_page_size;		/*!< Tablespace page size */
+
+	ulint		m_flags;		/*!< Table flags */
+
+	ulint		m_n_cols;		/*!< Number of columns in the
+						meta-data file */
+
+	dict_col_t*	m_cols;			/*!< Column data */
+
+	byte**		m_col_names;		/*!< Column names, we store the
+						column naems separately becuase
+						there is no field to store the
+						value in dict_col_t */
+
+	ulint		m_n_indexes;		/*!< Number of indexes,
+						including clustered index */
+
+	row_index_t*	m_indexes;		/*!< Index meta data */
+
+	bool		m_missing;		/*!< true if a .cfg file was
+						found and was readable */
+};
+
+/** Use the page cursor to iterate over records in a block. */
+class RecIterator {
+public:
+	/**
+	Default constructor */
+	RecIterator() UNIV_NOTHROW
+	{
+		memset(&m_cur, 0x0, sizeof(m_cur));
+	}
+
+	/**
+	Position the cursor on the first user record. */
+	void	open(buf_block_t* block) UNIV_NOTHROW
+	{
+		page_cur_set_before_first(block, &m_cur);
+
+		if (!end()) {
+			next();
+		}
+	}
+
+	/**
+	Move to the next record. */
+	void	next() UNIV_NOTHROW
+	{
+		page_cur_move_to_next(&m_cur);
+	}
+
+	/**
+	@return the current record */
+	rec_t*	current() UNIV_NOTHROW
+	{
+		ut_ad(!end());
+		return(page_cur_get_rec(&m_cur));
+	}
+
+	/**
+	@return true if cursor is at the end */
+	bool	end() UNIV_NOTHROW
+	{
+		return(page_cur_is_after_last(&m_cur) == TRUE);
+	}
+
+	/** Remove the current record
+	@return true on success */
+	bool remove(
+		const dict_index_t*	index,
+		page_zip_des_t*		page_zip,
+		ulint*			offsets) UNIV_NOTHROW
+	{
+		/* We can't end up with an empty page unless it is root. */
+		if (page_get_n_recs(m_cur.block->frame) <= 1) {
+			return(false);
+		}
+
+		return(page_delete_rec(index, &m_cur, page_zip, offsets));
+	}
+
+private:
+	page_cur_t	m_cur;
+};
+
+/** Class that purges delete marked reocords from indexes, both secondary
+and cluster. It does a pessimistic delete. This should only be done if we
+couldn't purge the delete marked reocrds during Phase I. */
+class IndexPurge {
+public:
+	/** Constructor
+	@param trx - the user transaction covering the import tablespace
+	@param index - to be imported
+	@param space_id - space id of the tablespace */
+	IndexPurge(
+		trx_t*		trx,
+		dict_index_t*	index) UNIV_NOTHROW
+		:
+		m_trx(trx),
+		m_index(index),
+		m_n_rows(0)
+	{
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"Phase II - Purge records from index %s",
+			index->name);
+	}
+
+	/** Descructor */
+	~IndexPurge() UNIV_NOTHROW { }
+
+	/** Purge delete marked records.
+	@return DB_SUCCESS or error code. */
+	dberr_t	garbage_collect() UNIV_NOTHROW;
+
+	/** The number of records that are not delete marked.
+	@return total records in the index after purge */
+	ulint	get_n_rows() const UNIV_NOTHROW
+	{
+		return(m_n_rows);
+	}
+
+private:
+	/**
+	Begin import, position the cursor on the first record. */
+	void	open() UNIV_NOTHROW;
+
+	/**
+	Close the persistent curosr and commit the mini-transaction. */
+	void	close() UNIV_NOTHROW;
+
+	/**
+	Position the cursor on the next record.
+	@return DB_SUCCESS or error code */
+	dberr_t	next() UNIV_NOTHROW;
+
+	/**
+	Store the persistent cursor position and reopen the
+	B-tree cursor in BTR_MODIFY_TREE mode, because the
+	tree structure may be changed during a pessimistic delete. */
+	void	purge_pessimistic_delete() UNIV_NOTHROW;
+
+	/**
+	Purge delete-marked records.
+	@param offsets - current row offsets. */
+	void	purge() UNIV_NOTHROW;
+
+protected:
+	// Disable copying
+	IndexPurge();
+	IndexPurge(const IndexPurge&);
+	IndexPurge &operator=(const IndexPurge&);
+
+private:
+	trx_t*			m_trx;		/*!< User transaction */
+	mtr_t			m_mtr;		/*!< Mini-transaction */
+	btr_pcur_t		m_pcur;		/*!< Persistent cursor */
+	dict_index_t*		m_index;	/*!< Index to be processed */
+	ulint			m_n_rows;	/*!< Records in index */
+};
+
+/** Functor that is called for each physical page that is read from the
+tablespace file.  */
+class AbstractCallback : public PageCallback {
+public:
+	/** Constructor
+	@param trx - covering transaction */
+	AbstractCallback(trx_t* trx)
+		:
+		m_trx(trx),
+		m_space(ULINT_UNDEFINED),
+		m_xdes(),
+		m_xdes_page_no(ULINT_UNDEFINED),
+		m_space_flags(ULINT_UNDEFINED),
+		m_table_flags(ULINT_UNDEFINED) UNIV_NOTHROW { }
+
+	/**
+	Free any extent descriptor instance */
+	virtual ~AbstractCallback()
+	{
+		delete [] m_xdes;
+	}
+
+	/** Determine the page size to use for traversing the tablespace
+	@param file_size - size of the tablespace file in bytes
+	@param block - contents of the first page in the tablespace file.
+	@retval DB_SUCCESS or error code. */
+	virtual dberr_t init(
+		os_offset_t		file_size,
+		const buf_block_t*	block) UNIV_NOTHROW;
+
+	/** @return true if compressed table. */
+	bool is_compressed_table() const UNIV_NOTHROW
+	{
+		return(get_zip_size() > 0);
+	}
+
+protected:
+	/**
+	Get the data page depending on the table type, compressed or not.
+	@param block - block read from disk
+	@retval the buffer frame */
+	buf_frame_t* get_frame(buf_block_t* block) const UNIV_NOTHROW
+	{
+		if (is_compressed_table()) {
+			return(block->page.zip.data);
+		}
+
+		return(buf_block_get_frame(block));
+	}
+
+	/** Check for session interrupt. If required we could
+	even flush to disk here every N pages.
+	@retval DB_SUCCESS or error code */
+	dberr_t periodic_check() UNIV_NOTHROW
+	{
+		if (trx_is_interrupted(m_trx)) {
+			return(DB_INTERRUPTED);
+		}
+
+		return(DB_SUCCESS);
+	}
+
+	/**
+	Get the physical offset of the extent descriptor within the page.
+	@param page_no - page number of the extent descriptor
+	@param page - contents of the page containing the extent descriptor.
+	@return the start of the xdes array in a page */
+	const xdes_t* xdes(
+		ulint		page_no,
+		const page_t*	page) const UNIV_NOTHROW
+	{
+		ulint	offset;
+
+		offset = xdes_calc_descriptor_index(get_zip_size(), page_no);
+
+		return(page + XDES_ARR_OFFSET + XDES_SIZE * offset);
+	}
+
+	/**
+	Set the current page directory (xdes). If the extent descriptor is
+	marked as free then free the current extent descriptor and set it to
+	0. This implies that all pages that are covered by this extent
+	descriptor are also freed.
+
+	@param page_no - offset of page within the file
+	@param page - page contents
+	@return DB_SUCCESS or error code. */
+	dberr_t	set_current_xdes(
+		ulint		page_no,
+		const page_t*	page) UNIV_NOTHROW
+	{
+		m_xdes_page_no = page_no;
+
+		delete[] m_xdes;
+
+		m_xdes = 0;
+
+		ulint		state;
+		const xdes_t*	xdesc = page + XDES_ARR_OFFSET;
+
+		state = mach_read_ulint(xdesc + XDES_STATE, MLOG_4BYTES);
+
+		if (state != XDES_FREE) {
+
+			m_xdes = new(std::nothrow) xdes_t[m_page_size];
+
+			/* Trigger OOM */
+			DBUG_EXECUTE_IF("ib_import_OOM_13",
+					delete [] m_xdes; m_xdes = 0;);
+
+			if (m_xdes == 0) {
+				return(DB_OUT_OF_MEMORY);
+			}
+
+			memcpy(m_xdes, page, m_page_size);
+		}
+
+		return(DB_SUCCESS);
+	}
+
+	/**
+	@return true if it is a root page */
+	bool is_root_page(const page_t* page) const UNIV_NOTHROW
+	{
+		ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+
+		return(mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL
+		       && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL);
+	}
+
+	/**
+	Check if the page is marked as free in the extent descriptor.
+	@param page_no - page number to check in the extent descriptor.
+	@return true if the page is marked as free */
+	bool is_free(ulint page_no) const UNIV_NOTHROW
+	{
+		ut_a(xdes_calc_descriptor_page(get_zip_size(), page_no)
+		     == m_xdes_page_no);
+
+		if (m_xdes != 0) {
+			const xdes_t*	xdesc = xdes(page_no, m_xdes);
+			ulint		pos = page_no % FSP_EXTENT_SIZE;
+
+			return(xdes_get_bit(xdesc, XDES_FREE_BIT, pos));
+		}
+
+		/* If the current xdes was free, the page must be free. */
+		return(true);
+	}
+
+protected:
+	/** Covering transaction. */
+	trx_t*			m_trx;
+
+	/** Space id of the file being iterated over. */
+	ulint			m_space;
+
+	/** Minimum page number for which the free list has not been
+	initialized: the pages >= this limit are, by definition, free;
+	note that in a single-table tablespace where size < 64 pages,
+	this number is 64, i.e., we have initialized the space about
+	the first extent, but have not physically allocted those pages
+	to the file. @see FSP_LIMIT. */
+	ulint			m_free_limit;
+
+	/** Current size of the space in pages */
+	ulint			m_size;
+
+	/** Current extent descriptor page */
+	xdes_t*			m_xdes;
+
+	/** Physical page offset in the file of the extent descriptor */
+	ulint			m_xdes_page_no;
+
+	/** Flags value read from the header page */
+	ulint			m_space_flags;
+
+	/** Derived from m_space_flags and row format type, the row format
+	type is determined from the page header. */
+	ulint			m_table_flags;
+};
+
+/** Determine the page size to use for traversing the tablespace
+@param file_size - size of the tablespace file in bytes
+@param block - contents of the first page in the tablespace file.
+@retval DB_SUCCESS or error code. */
+dberr_t
+AbstractCallback::init(
+	os_offset_t		file_size,
+	const buf_block_t*	block) UNIV_NOTHROW
+{
+	const page_t*		page = block->frame;
+
+	m_space_flags = fsp_header_get_flags(page);
+
+	/* Since we don't know whether it is a compressed table
+	or not, the data is always read into the block->frame. */
+
+	dberr_t	err = set_zip_size(block->frame);
+
+	if (err != DB_SUCCESS) {
+		return(DB_CORRUPTION);
+	}
+
+	/* Set the page size used to traverse the tablespace. */
+
+	m_page_size = (is_compressed_table())
+		? get_zip_size() : fsp_flags_get_page_size(m_space_flags);
+
+	if (m_page_size == 0) {
+		ib_logf(IB_LOG_LEVEL_ERROR, "Page size is 0");
+		return(DB_CORRUPTION);
+	} else if (!is_compressed_table() && m_page_size != UNIV_PAGE_SIZE) {
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Page size %lu of ibd file is not the same "
+			"as the server page size %lu",
+			m_page_size, UNIV_PAGE_SIZE);
+
+		return(DB_CORRUPTION);
+
+	} else if ((file_size % m_page_size)) {
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"File size " UINT64PF " is not a multiple "
+			"of the page size %lu",
+			(ib_uint64_t) file_size, (ulong) m_page_size);
+
+		return(DB_CORRUPTION);
+	}
+
+	ut_a(m_space == ULINT_UNDEFINED);
+
+	m_size  = mach_read_from_4(page + FSP_SIZE);
+	m_free_limit = mach_read_from_4(page + FSP_FREE_LIMIT);
+	m_space = mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID);
+
+	if ((err = set_current_xdes(0, page)) != DB_SUCCESS) {
+		return(err);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/**
+Try and determine the index root pages by checking if the next/prev
+pointers are both FIL_NULL. We need to ensure that skip deleted pages. */
+struct FetchIndexRootPages : public AbstractCallback {
+
+	/** Index information gathered from the .ibd file. */
+	struct Index {
+
+		Index(index_id_t id, ulint page_no)
+			:
+			m_id(id),
+			m_page_no(page_no) { }
+
+		index_id_t	m_id;		/*!< Index id */
+		ulint		m_page_no;	/*!< Root page number */
+	};
+
+	typedef std::vector<Index> Indexes;
+
+	/** Constructor
+	@param trx - covering (user) transaction
+	@param table - table definition in server .*/
+	FetchIndexRootPages(const dict_table_t* table, trx_t* trx)
+		:
+		AbstractCallback(trx),
+		m_table(table) UNIV_NOTHROW { }
+
+	/** Destructor */
+	virtual ~FetchIndexRootPages() UNIV_NOTHROW { }
+
+	/**
+	@retval the space id of the tablespace being iterated over */
+	virtual ulint get_space_id() const UNIV_NOTHROW
+	{
+		return(m_space);
+	}
+
+	/**
+	Check if the .ibd file row format is the same as the table's.
+	@param ibd_table_flags - determined from space and page.
+	@return DB_SUCCESS or error code. */
+	dberr_t check_row_format(ulint ibd_table_flags) UNIV_NOTHROW
+	{
+		dberr_t		err;
+		rec_format_t	ibd_rec_format;
+		rec_format_t	table_rec_format;
+
+		if (!dict_tf_is_valid(ibd_table_flags)) {
+
+			ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+				ER_TABLE_SCHEMA_MISMATCH,
+				".ibd file has invlad table flags: %lx",
+				ibd_table_flags);
+
+			return(DB_CORRUPTION);
+		}
+
+		ibd_rec_format = dict_tf_get_rec_format(ibd_table_flags);
+		table_rec_format = dict_tf_get_rec_format(m_table->flags);
+
+		if (table_rec_format != ibd_rec_format) {
+
+			ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+				ER_TABLE_SCHEMA_MISMATCH,
+				"Table has %s row format, .ibd "
+				"file has %s row format.",
+				dict_tf_to_row_format_string(m_table->flags),
+				dict_tf_to_row_format_string(ibd_table_flags));
+
+			err = DB_CORRUPTION;
+		} else {
+			err = DB_SUCCESS;
+		}
+
+		return(err);
+	}
+
+	/**
+	Called for each block as it is read from the file.
+	@param offset - physical offset in the file
+	@param block - block to convert, it is not from the buffer pool.
+	@retval DB_SUCCESS or error code. */
+	virtual dberr_t operator() (
+		os_offset_t	offset,
+		buf_block_t*	block) UNIV_NOTHROW;
+
+	/** Update the import configuration that will be used to import
+	the tablespace. */
+	dberr_t build_row_import(row_import* cfg) const UNIV_NOTHROW;
+
+	/** Table definition in server. */
+	const dict_table_t*	m_table;
+
+	/** Index information */
+	Indexes			m_indexes;
+};
+
+/**
+Called for each block as it is read from the file. Check index pages to
+determine the exact row format. We can't get that from the tablespace
+header flags alone.
+
+@param offset - physical offset in the file
+@param block - block to convert, it is not from the buffer pool.
+@retval DB_SUCCESS or error code. */
+dberr_t
+FetchIndexRootPages::operator() (
+	os_offset_t	offset,
+	buf_block_t*	block) UNIV_NOTHROW
+{
+	dberr_t		err;
+
+	if ((err = periodic_check()) != DB_SUCCESS) {
+		return(err);
+	}
+
+	const page_t*	page = get_frame(block);
+
+	ulint	page_type = fil_page_get_type(page);
+
+	if (block->page.offset * m_page_size != offset) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Page offset doesn't match file offset: "
+			"page offset: %lu, file offset: %lu",
+			(ulint) block->page.offset,
+			(ulint) (offset / m_page_size));
+
+		err = DB_CORRUPTION;
+	} else if (page_type == FIL_PAGE_TYPE_XDES) {
+		err = set_current_xdes(block->page.offset, page);
+	} else if (page_type == FIL_PAGE_INDEX
+		   && !is_free(block->page.offset)
+		   && is_root_page(page)) {
+
+		index_id_t	id = btr_page_get_index_id(page);
+		ulint		page_no = buf_block_get_page_no(block);
+
+		m_indexes.push_back(Index(id, page_no));
+
+		if (m_indexes.size() == 1) {
+
+			m_table_flags = dict_sys_tables_type_to_tf(
+				m_space_flags,
+				page_is_comp(page) ? DICT_N_COLS_COMPACT : 0);
+
+			err = check_row_format(m_table_flags);
+		}
+	}
+
+	return(err);
+}
+
+/**
+Update the import configuration that will be used to import the tablespace.
+@return error code or DB_SUCCESS */
+dberr_t
+FetchIndexRootPages::build_row_import(row_import* cfg) const UNIV_NOTHROW
+{
+	Indexes::const_iterator end = m_indexes.end();
+
+	ut_a(cfg->m_table == m_table);
+	cfg->m_page_size = m_page_size;
+	cfg->m_n_indexes = m_indexes.size();
+
+	if (cfg->m_n_indexes == 0) {
+
+		ib_logf(IB_LOG_LEVEL_ERROR, "No B+Tree found in tablespace");
+
+		return(DB_CORRUPTION);
+	}
+
+	cfg->m_indexes = new(std::nothrow) row_index_t[cfg->m_n_indexes];
+
+	/* Trigger OOM */
+	DBUG_EXECUTE_IF("ib_import_OOM_11",
+			delete [] cfg->m_indexes; cfg->m_indexes = 0;);
+
+	if (cfg->m_indexes == 0) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	memset(cfg->m_indexes, 0x0, sizeof(*cfg->m_indexes) * cfg->m_n_indexes);
+
+	row_index_t*	cfg_index = cfg->m_indexes;
+
+	for (Indexes::const_iterator it = m_indexes.begin();
+	     it != end;
+	     ++it, ++cfg_index) {
+
+		char	name[BUFSIZ];
+
+		ut_snprintf(name, sizeof(name), "index" IB_ID_FMT, it->m_id);
+
+		ulint	len = strlen(name) + 1;
+
+		cfg_index->m_name = new(std::nothrow) byte[len];
+
+		/* Trigger OOM */
+		DBUG_EXECUTE_IF("ib_import_OOM_12",
+				delete [] cfg_index->m_name;
+				cfg_index->m_name = 0;);
+
+		if (cfg_index->m_name == 0) {
+			return(DB_OUT_OF_MEMORY);
+		}
+
+		memcpy(cfg_index->m_name, name, len);
+
+		cfg_index->m_id = it->m_id;
+
+		cfg_index->m_space = m_space;
+
+		cfg_index->m_page_no = it->m_page_no;
+	}
+
+	return(DB_SUCCESS);
+}
+
+/* Functor that is called for each physical page that is read from the
+tablespace file.
+
+  1. Check each page for corruption.
+
+  2. Update the space id and LSN on every page
+     * For the header page
+       - Validate the flags
+       - Update the LSN
+
+  3. On Btree pages
+     * Set the index id
+     * Update the max trx id
+     * In a cluster index, update the system columns
+     * In a cluster index, update the BLOB ptr, set the space id
+     * Purge delete marked records, but only if they can be easily
+       removed from the page
+     * Keep a counter of number of rows, ie. non-delete-marked rows
+     * Keep a counter of number of delete marked rows
+     * Keep a counter of number of purge failure
+     * If a page is stamped with an index id that isn't in the .cfg file
+       we assume it is deleted and the page can be ignored.
+
+   4. Set the page state to dirty so that it will be written to disk.
+*/
+class PageConverter : public AbstractCallback {
+public:
+	/** Constructor
+	* @param cfg - config of table being imported.
+	* @param trx - transaction covering the import */
+	PageConverter(row_import* cfg, trx_t* trx) UNIV_NOTHROW;
+
+	virtual ~PageConverter() UNIV_NOTHROW
+	{
+		if (m_heap != 0) {
+			mem_heap_free(m_heap);
+		}
+	}
+
+	/**
+	@retval the server space id of the tablespace being iterated over */
+	virtual ulint get_space_id() const UNIV_NOTHROW
+	{
+		return(m_cfg->m_table->space);
+	}
+
+	/**
+	Called for each block as it is read from the file.
+	@param offset - physical offset in the file
+	@param block - block to convert, it is not from the buffer pool.
+	@retval DB_SUCCESS or error code. */
+	virtual dberr_t operator() (
+		os_offset_t	offset,
+		buf_block_t*	block) UNIV_NOTHROW;
+private:
+
+	/** Status returned by PageConverter::validate() */
+	enum import_page_status_t {
+		IMPORT_PAGE_STATUS_OK,		/*!< Page is OK */
+		IMPORT_PAGE_STATUS_ALL_ZERO,	/*!< Page is all zeros */
+		IMPORT_PAGE_STATUS_CORRUPTED	/*!< Page is corrupted */
+	};
+
+	/**
+	Update the page, set the space id, max trx id and index id.
+	@param block - block read from file
+	@param page_type - type of the page
+	@retval DB_SUCCESS or error code */
+	dberr_t update_page(
+		buf_block_t*	block,
+		ulint&		page_type) UNIV_NOTHROW;
+
+#if defined UNIV_DEBUG
+	/**
+	@return true error condition is enabled. */
+	bool trigger_corruption() UNIV_NOTHROW
+	{
+		return(false);
+	}
+	#else
+#define trigger_corruption()	(false)
+#endif /* UNIV_DEBUG */
+
+	/**
+	Update the space, index id, trx id.
+	@param block - block to convert
+	@return DB_SUCCESS or error code */
+	dberr_t	update_index_page(buf_block_t*	block) UNIV_NOTHROW;
+
+	/** Update the BLOB refrences and write UNDO log entries for
+	rows that can't be purged optimistically.
+	@param block - block to update
+	@retval DB_SUCCESS or error code */
+	dberr_t	update_records(buf_block_t* block) UNIV_NOTHROW;
+
+	/**
+	Validate the page, check for corruption.
+	@param offset - physical offset within file.
+	@param page - page read from file.
+	@return 0 on success, 1 if all zero, 2 if corrupted */
+	import_page_status_t validate(
+		os_offset_t	offset,
+		buf_block_t*	page) UNIV_NOTHROW;
+
+	/**
+	Validate the space flags and update tablespace header page.
+	@param block - block read from file, not from the buffer pool.
+	@retval DB_SUCCESS or error code */
+	dberr_t	update_header(buf_block_t* block) UNIV_NOTHROW;
+
+	/**
+	Adjust the BLOB reference for a single column that is externally stored
+	@param rec - record to update
+	@param offsets - column offsets for the record
+	@param i - column ordinal value
+	@return DB_SUCCESS or error code */
+	dberr_t	adjust_cluster_index_blob_column(
+		rec_t*		rec,
+		const ulint*	offsets,
+		ulint		i) UNIV_NOTHROW;
+
+	/**
+	Adjusts the BLOB reference in the clustered index row for all
+	externally stored columns.
+	@param rec - record to update
+	@param offsets - column offsets for the record
+	@return DB_SUCCESS or error code */
+	dberr_t	adjust_cluster_index_blob_columns(
+		rec_t*		rec,
+		const ulint*	offsets) UNIV_NOTHROW;
+
+	/**
+	In the clustered index, adjist the BLOB pointers as needed.
+	Also update the BLOB reference, write the new space id.
+	@param rec - record to update
+	@param offsets - column offsets for the record
+	@return DB_SUCCESS or error code */
+	dberr_t	adjust_cluster_index_blob_ref(
+		rec_t*		rec,
+		const ulint*	offsets) UNIV_NOTHROW;
+
+	/**
+	Purge delete-marked records, only if it is possible to do
+	so without re-organising the B+tree.
+	@param offsets - current row offsets.
+	@retval true if purged */
+	bool	purge(const ulint* offsets) UNIV_NOTHROW;
+
+	/**
+	Adjust the BLOB references and sys fields for the current record.
+	@param index - the index being converted
+	@param rec - record to update
+	@param offsets - column offsets for the record
+	@param deleted - true if row is delete marked
+	@return DB_SUCCESS or error code. */
+	dberr_t	adjust_cluster_record(
+		const dict_index_t*	index,
+		rec_t*			rec,
+		const ulint*		offsets,
+		bool			deleted) UNIV_NOTHROW;
+
+	/**
+	Find an index with the matching id.
+	@return row_index_t* instance or 0 */
+	row_index_t* find_index(index_id_t id) UNIV_NOTHROW
+	{
+		row_index_t*	index = &m_cfg->m_indexes[0];
+
+		for (ulint i = 0; i < m_cfg->m_n_indexes; ++i, ++index) {
+			if (id == index->m_id) {
+				return(index);
+			}
+		}
+
+		return(0);
+
+	}
+private:
+	/** Config for table that is being imported. */
+	row_import*		m_cfg;
+
+	/** Current index whose pages are being imported */
+	row_index_t*		m_index;
+
+	/** Current system LSN */
+	lsn_t			m_current_lsn;
+
+	/** Alias for m_page_zip, only set for compressed pages. */
+	page_zip_des_t*		m_page_zip_ptr;
+
+	/** Iterator over records in a block */
+	RecIterator		m_rec_iter;
+
+	/** Record offset */
+	ulint			m_offsets_[REC_OFFS_NORMAL_SIZE];
+
+	/** Pointer to m_offsets_ */
+	ulint*			m_offsets;
+
+	/** Memory heap for the record offsets */
+	mem_heap_t*		m_heap;
+
+	/** Cluster index instance */
+	dict_index_t*		m_cluster_index;
+};
+
+/**
+row_import destructor. */
+row_import::~row_import() UNIV_NOTHROW
+{
+	for (ulint i = 0; m_indexes != 0 && i < m_n_indexes; ++i) {
+		delete [] m_indexes[i].m_name;
+
+		if (m_indexes[i].m_fields == 0) {
+			continue;
+		}
+
+		dict_field_t*	fields = m_indexes[i].m_fields;
+		ulint		n_fields = m_indexes[i].m_n_fields;
+
+		for (ulint j = 0; j < n_fields; ++j) {
+			delete [] fields[j].name;
+		}
+
+		delete [] fields;
+	}
+
+	for (ulint i = 0; m_col_names != 0 && i < m_n_cols; ++i) {
+		delete [] m_col_names[i];
+	}
+
+	delete [] m_cols;
+	delete [] m_indexes;
+	delete [] m_col_names;
+	delete [] m_table_name;
+	delete [] m_hostname;
+}
+
+/**
+Find the index entry in in the indexes array.
+@param name - index name
+@return instance if found else 0. */
+row_index_t*
+row_import::get_index(
+	const char*	name) const UNIV_NOTHROW
+{
+	for (ulint i = 0; i < m_n_indexes; ++i) {
+		const char*	index_name;
+		row_index_t*	index = &m_indexes[i];
+
+		index_name = reinterpret_cast<const char*>(index->m_name);
+
+		if (strcmp(index_name, name) == 0) {
+
+			return(index);
+		}
+	}
+
+	return(0);
+}
+
+/**
+Get the number of rows in the index.
+@param name - index name
+@return number of rows (doesn't include delete marked rows). */
+ulint
+row_import::get_n_rows(
+	const char*	name) const UNIV_NOTHROW
+{
+	const row_index_t*	index = get_index(name);
+
+	ut_a(name != 0);
+
+	return(index->m_stats.m_n_rows);
+}
+
+/**
+Get the number of rows for which purge failed uding the convert phase.
+@param name - index name
+@return number of rows for which purge failed. */
+ulint
+row_import::get_n_purge_failed(
+	const char*	name) const UNIV_NOTHROW
+{
+	const row_index_t*	index = get_index(name);
+
+	ut_a(name != 0);
+
+	return(index->m_stats.m_n_purge_failed);
+}
+
+/**
+Find the ordinal value of the column name in the cfg table columns.
+@param name - of column to look for.
+@return ULINT_UNDEFINED if not found. */
+ulint
+row_import::find_col(
+	const char*	name) const UNIV_NOTHROW
+{
+	for (ulint i = 0; i < m_n_cols; ++i) {
+		const char*	col_name;
+
+		col_name = reinterpret_cast<const char*>(m_col_names[i]);
+
+		if (strcmp(col_name, name) == 0) {
+			return(i);
+		}
+	}
+
+	return(ULINT_UNDEFINED);
+}
+
+/**
+Find the index field entry in in the cfg indexes fields.
+@name - of the index to look for
+@return instance if found else 0. */
+const dict_field_t*
+row_import::find_field(
+	const row_index_t*	cfg_index,
+	const char* 		name) const UNIV_NOTHROW
+{
+	const dict_field_t*	field = cfg_index->m_fields;
+
+	for (ulint i = 0; i < cfg_index->m_n_fields; ++i, ++field) {
+		const char*	field_name;
+
+		field_name = reinterpret_cast<const char*>(field->name);
+
+		if (strcmp(field_name, name) == 0) {
+			return(field);
+		}
+	}
+
+	return(0);
+}
+
+/**
+Check if the index schema that was read from the .cfg file matches the
+in memory index definition.
+@return DB_SUCCESS or error code. */
+dberr_t
+row_import::match_index_columns(
+	THD*			thd,
+	const dict_index_t*	index) UNIV_NOTHROW
+{
+	row_index_t*		cfg_index;
+	dberr_t			err = DB_SUCCESS;
+
+	cfg_index = get_index(index->name);
+
+	if (cfg_index == 0) {
+		ib_errf(thd, IB_LOG_LEVEL_ERROR,
+			 ER_TABLE_SCHEMA_MISMATCH,
+			 "Index %s not found in tablespace meta-data file.",
+			 index->name);
+
+		return(DB_ERROR);
+	}
+
+	cfg_index->m_srv_index = index;
+
+	const dict_field_t*	field = index->fields;
+
+	for (ulint i = 0; i < index->n_fields; ++i, ++field) {
+
+		const dict_field_t*	cfg_field;
+
+		cfg_field = find_field(cfg_index, field->name);
+
+		if (cfg_field == 0) {
+			ib_errf(thd, IB_LOG_LEVEL_ERROR,
+				 ER_TABLE_SCHEMA_MISMATCH,
+				 "Index %s field %s not found in tablespace "
+				 "meta-data file.",
+				 index->name, field->name);
+
+			err = DB_ERROR;
+		} else {
+
+			if (cfg_field->prefix_len != field->prefix_len) {
+				ib_errf(thd, IB_LOG_LEVEL_ERROR,
+					 ER_TABLE_SCHEMA_MISMATCH,
+					 "Index %s field %s prefix len %lu "
+					 "doesn't match meta-data file value "
+					 "%lu",
+					 index->name, field->name,
+					 (ulong) field->prefix_len,
+					 (ulong) cfg_field->prefix_len);
+
+				err = DB_ERROR;
+			}
+
+			if (cfg_field->fixed_len != field->fixed_len) {
+				ib_errf(thd, IB_LOG_LEVEL_ERROR,
+					 ER_TABLE_SCHEMA_MISMATCH,
+					 "Index %s field %s fixed len %lu "
+					 "doesn't match meta-data file value "
+					 "%lu",
+					 index->name, field->name,
+					 (ulong) field->fixed_len,
+					 (ulong) cfg_field->fixed_len);
+
+				err = DB_ERROR;
+			}
+		}
+	}
+
+	return(err);
+}
+
+/**
+Check if the table schema that was read from the .cfg file matches the
+in memory table definition.
+@param thd - MySQL session variable
+@return DB_SUCCESS or error code. */
+dberr_t
+row_import::match_table_columns(
+	THD*			thd) UNIV_NOTHROW
+{
+	dberr_t			err = DB_SUCCESS;
+	const dict_col_t*	col = m_table->cols;
+
+	for (ulint i = 0; i < m_table->n_cols; ++i, ++col) {
+
+		const char*	col_name;
+		ulint		cfg_col_index;
+
+		col_name = dict_table_get_col_name(
+			m_table, dict_col_get_no(col));
+
+		cfg_col_index = find_col(col_name);
+
+		if (cfg_col_index == ULINT_UNDEFINED) {
+
+			ib_errf(thd, IB_LOG_LEVEL_ERROR,
+				 ER_TABLE_SCHEMA_MISMATCH,
+				 "Column %s not found in tablespace.",
+				 col_name);
+
+			err = DB_ERROR;
+		} else if (cfg_col_index != col->ind) {
+
+			ib_errf(thd, IB_LOG_LEVEL_ERROR,
+				 ER_TABLE_SCHEMA_MISMATCH,
+				 "Column %s ordinal value mismatch, it's at "
+				 "%lu in the table and %lu in the tablespace "
+				 "meta-data file",
+				 col_name,
+				 (ulong) col->ind, (ulong) cfg_col_index);
+
+			err = DB_ERROR;
+		} else {
+			const dict_col_t*	cfg_col;
+
+			cfg_col = &m_cols[cfg_col_index];
+			ut_a(cfg_col->ind == cfg_col_index);
+
+			if (cfg_col->prtype != col->prtype) {
+				ib_errf(thd,
+					 IB_LOG_LEVEL_ERROR,
+					 ER_TABLE_SCHEMA_MISMATCH,
+					 "Column %s precise type mismatch.",
+					 col_name);
+				err = DB_ERROR;
+			}
+
+			if (cfg_col->mtype != col->mtype) {
+				ib_errf(thd,
+					 IB_LOG_LEVEL_ERROR,
+					 ER_TABLE_SCHEMA_MISMATCH,
+					 "Column %s main type mismatch.",
+					 col_name);
+				err = DB_ERROR;
+			}
+
+			if (cfg_col->len != col->len) {
+				ib_errf(thd,
+					 IB_LOG_LEVEL_ERROR,
+					 ER_TABLE_SCHEMA_MISMATCH,
+					 "Column %s length mismatch.",
+					 col_name);
+				err = DB_ERROR;
+			}
+
+			if (cfg_col->mbminmaxlen != col->mbminmaxlen) {
+				ib_errf(thd,
+					 IB_LOG_LEVEL_ERROR,
+					 ER_TABLE_SCHEMA_MISMATCH,
+					 "Column %s multi-byte len mismatch.",
+					 col_name);
+				err = DB_ERROR;
+			}
+
+			if (cfg_col->ind != col->ind) {
+				err = DB_ERROR;
+			}
+
+			if (cfg_col->ord_part != col->ord_part) {
+				ib_errf(thd,
+					 IB_LOG_LEVEL_ERROR,
+					 ER_TABLE_SCHEMA_MISMATCH,
+					 "Column %s ordering mismatch.",
+					 col_name);
+				err = DB_ERROR;
+			}
+
+			if (cfg_col->max_prefix != col->max_prefix) {
+				ib_errf(thd,
+					 IB_LOG_LEVEL_ERROR,
+					 ER_TABLE_SCHEMA_MISMATCH,
+					 "Column %s max prefix mismatch.",
+					 col_name);
+				err = DB_ERROR;
+			}
+		}
+	}
+
+	return(err);
+}
+
+/**
+Check if the table (and index) schema that was read from the .cfg file
+matches the in memory table definition.
+@param thd - MySQL session variable
+@return DB_SUCCESS or error code. */
+dberr_t
+row_import::match_schema(
+	THD*		thd) UNIV_NOTHROW
+{
+	/* Do some simple checks. */
+
+	if (m_flags != m_table->flags) {
+		ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
+			 "Table flags don't match, server table has 0x%lx "
+			 "and the meta-data file has 0x%lx",
+			 (ulong) m_table->n_cols, (ulong) m_flags);
+
+		return(DB_ERROR);
+	} else if (m_table->n_cols != m_n_cols) {
+		ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
+			 "Number of columns don't match, table has %lu "
+			 "columns but the tablespace meta-data file has "
+			 "%lu columns",
+			 (ulong) m_table->n_cols, (ulong) m_n_cols);
+
+		return(DB_ERROR);
+	} else if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) {
+
+		/* If the number of indexes don't match then it is better
+		to abort the IMPORT. It is easy for the user to create a
+		table matching the IMPORT definition. */
+
+		ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
+			 "Number of indexes don't match, table has %lu "
+			 "indexes but the tablespace meta-data file has "
+			 "%lu indexes",
+			 (ulong) UT_LIST_GET_LEN(m_table->indexes),
+			 (ulong) m_n_indexes);
+
+		return(DB_ERROR);
+	}
+
+	dberr_t	err = match_table_columns(thd);
+
+	if (err != DB_SUCCESS) {
+		return(err);
+	}
+
+	/* Check if the index definitions match. */
+
+	const dict_index_t* index;
+
+	for (index = UT_LIST_GET_FIRST(m_table->indexes);
+	     index != 0;
+	     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+		dberr_t	index_err;
+
+		index_err = match_index_columns(thd, index);
+
+		if (index_err != DB_SUCCESS) {
+			err = index_err;
+		}
+	}
+
+	return(err);
+}
+
+/**
+Set the index root <space, pageno>, using index name. */
+void
+row_import::set_root_by_name() UNIV_NOTHROW
+{
+	row_index_t*	cfg_index = m_indexes;
+
+	for (ulint i = 0; i < m_n_indexes; ++i, ++cfg_index) {
+		dict_index_t*	index;
+
+		const char*	index_name;
+
+		index_name = reinterpret_cast<const char*>(cfg_index->m_name);
+
+		index = dict_table_get_index_on_name(m_table, index_name);
+
+		/* We've already checked that it exists. */
+		ut_a(index != 0);
+
+		/* Set the root page number and space id. */
+		index->space = m_table->space;
+		index->page = cfg_index->m_page_no;
+	}
+}
+
+/**
+Set the index root <space, pageno>, using a heuristic.
+@return DB_SUCCESS or error code */
+dberr_t
+row_import::set_root_by_heuristic() UNIV_NOTHROW
+{
+	row_index_t*	cfg_index = m_indexes;
+
+	ut_a(m_n_indexes > 0);
+
+	// TODO: For now use brute force, based on ordinality
+
+	if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) {
+
+		char	table_name[MAX_FULL_NAME_LEN + 1];
+
+		innobase_format_name(
+			table_name, sizeof(table_name), m_table->name, FALSE);
+
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Table %s should have %lu indexes but the tablespace "
+			"has %lu indexes",
+			table_name,
+			UT_LIST_GET_LEN(m_table->indexes),
+			m_n_indexes);
+	}
+
+	dict_mutex_enter_for_mysql();
+
+	ulint	i = 0;
+	dberr_t	err = DB_SUCCESS;
+
+	for (dict_index_t* index = UT_LIST_GET_FIRST(m_table->indexes);
+	     index != 0;
+	     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+		if (index->type & DICT_FTS) {
+			index->type |= DICT_CORRUPT;
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Skipping FTS index: %s", index->name);
+		} else if (i < m_n_indexes) {
+
+			delete [] cfg_index[i].m_name;
+
+			ulint	len = strlen(index->name) + 1;
+
+			cfg_index[i].m_name = new(std::nothrow) byte[len];
+
+			/* Trigger OOM */
+			DBUG_EXECUTE_IF("ib_import_OOM_14",
+					delete[] cfg_index[i].m_name;
+					cfg_index[i].m_name = 0;);
+
+			if (cfg_index[i].m_name == 0) {
+				err = DB_OUT_OF_MEMORY;
+				break;
+			}
+
+			memcpy(cfg_index[i].m_name, index->name, len);
+
+			cfg_index[i].m_srv_index = index;
+
+			index->space = m_table->space;
+			index->page = cfg_index[i].m_page_no;
+
+			++i;
+		}
+	}
+
+	dict_mutex_exit_for_mysql();
+
+	return(err);
+}
+
+/**
+Purge delete marked records.
+@return DB_SUCCESS or error code. */
+dberr_t
+IndexPurge::garbage_collect() UNIV_NOTHROW
+{
+	dberr_t	err;
+	ibool	comp = dict_table_is_comp(m_index->table);
+
+	/* Open the persistent cursor and start the mini-transaction. */
+
+	open();
+
+	while ((err = next()) == DB_SUCCESS) {
+
+		rec_t*	rec = btr_pcur_get_rec(&m_pcur);
+		ibool	deleted = rec_get_deleted_flag(rec, comp);
+
+		if (!deleted) {
+			++m_n_rows;
+		} else {
+			purge();
+		}
+	}
+
+	/* Close the persistent cursor and commit the mini-transaction. */
+
+	close();
+
+	return(err == DB_END_OF_INDEX ? DB_SUCCESS : err);
+}
+
+/**
+Begin import, position the cursor on the first record. */
+void
+IndexPurge::open() UNIV_NOTHROW
+{
+	mtr_start(&m_mtr);
+
+	mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
+
+	btr_pcur_open_at_index_side(
+		true, m_index, BTR_MODIFY_LEAF, &m_pcur, true, 0, &m_mtr);
+}
+
+/**
+Close the persistent curosr and commit the mini-transaction. */
+void
+IndexPurge::close() UNIV_NOTHROW
+{
+	btr_pcur_close(&m_pcur);
+	mtr_commit(&m_mtr);
+}
+
+/**
+Position the cursor on the next record.
+@return DB_SUCCESS or error code */
+dberr_t
+IndexPurge::next() UNIV_NOTHROW
+{
+	btr_pcur_move_to_next_on_page(&m_pcur);
+
+	/* When switching pages, commit the mini-transaction
+	in order to release the latch on the old page. */
+
+	if (!btr_pcur_is_after_last_on_page(&m_pcur)) {
+		return(DB_SUCCESS);
+	} else if (trx_is_interrupted(m_trx)) {
+		/* Check after every page because the check
+		is expensive. */
+		return(DB_INTERRUPTED);
+	}
+
+	btr_pcur_store_position(&m_pcur, &m_mtr);
+
+	mtr_commit(&m_mtr);
+
+	mtr_start(&m_mtr);
+
+	mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
+
+	btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
+
+	if (!btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr)) {
+
+		return(DB_END_OF_INDEX);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/**
+Store the persistent cursor position and reopen the
+B-tree cursor in BTR_MODIFY_TREE mode, because the
+tree structure may be changed during a pessimistic delete. */
+void
+IndexPurge::purge_pessimistic_delete() UNIV_NOTHROW
+{
+	dberr_t	err;
+
+	btr_pcur_restore_position(BTR_MODIFY_TREE, &m_pcur, &m_mtr);
+
+	ut_ad(rec_get_deleted_flag(
+			btr_pcur_get_rec(&m_pcur),
+			dict_table_is_comp(m_index->table)));
+
+	btr_cur_pessimistic_delete(
+		&err, FALSE, btr_pcur_get_btr_cur(&m_pcur), 0, RB_NONE, &m_mtr);
+
+	ut_a(err == DB_SUCCESS);
+
+	/* Reopen the B-tree cursor in BTR_MODIFY_LEAF mode */
+	mtr_commit(&m_mtr);
+}
+
+/**
+Purge delete-marked records. */
+void
+IndexPurge::purge() UNIV_NOTHROW
+{
+	btr_pcur_store_position(&m_pcur, &m_mtr);
+
+	purge_pessimistic_delete();
+
+	mtr_start(&m_mtr);
+
+	mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
+
+	btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
+}
+
+/**
+Constructor
+* @param cfg - config of table being imported.
+* @param trx - transaction covering the import */
+PageConverter::PageConverter(
+	row_import*	cfg,
+	trx_t*		trx)
+	:
+	AbstractCallback(trx),
+	m_cfg(cfg),
+	m_page_zip_ptr(0),
+	m_heap(0) UNIV_NOTHROW
+{
+	m_index = m_cfg->m_indexes;
+
+	m_current_lsn = log_get_lsn();
+	ut_a(m_current_lsn > 0);
+
+	m_offsets = m_offsets_;
+	rec_offs_init(m_offsets_);
+
+	m_cluster_index = dict_table_get_first_index(m_cfg->m_table);
+}
+
+/**
+Adjust the BLOB reference for a single column that is externally stored
+@param rec - record to update
+@param offsets - column offsets for the record
+@param i - column ordinal value
+@return DB_SUCCESS or error code */
+dberr_t
+PageConverter::adjust_cluster_index_blob_column(
+	rec_t*		rec,
+	const ulint*	offsets,
+	ulint		i) UNIV_NOTHROW
+{
+	ulint		len;
+	byte*		field;
+
+	field = rec_get_nth_field(rec, offsets, i, &len);
+
+	DBUG_EXECUTE_IF("ib_import_trigger_corruption_2",
+			len = BTR_EXTERN_FIELD_REF_SIZE - 1;);
+
+	if (len < BTR_EXTERN_FIELD_REF_SIZE) {
+
+		char index_name[MAX_FULL_NAME_LEN + 1];
+
+		innobase_format_name(
+			index_name, sizeof(index_name),
+			m_cluster_index->name, TRUE);
+
+		ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_INNODB_INDEX_CORRUPT,
+			"Externally stored column(%lu) has a reference "
+			"length of %lu in the cluster index %s",
+			(ulong) i, (ulong) len, index_name);
+
+		return(DB_CORRUPTION);
+	}
+
+	field += BTR_EXTERN_SPACE_ID - BTR_EXTERN_FIELD_REF_SIZE + len;
+
+	if (is_compressed_table()) {
+		mach_write_to_4(field, get_space_id());
+
+		page_zip_write_blob_ptr(
+			m_page_zip_ptr, rec, m_cluster_index, offsets, i, 0);
+	} else {
+		mlog_write_ulint(field, get_space_id(), MLOG_4BYTES, 0);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/**
+Adjusts the BLOB reference in the clustered index row for all externally
+stored columns.
+@param rec - record to update
+@param offsets - column offsets for the record
+@return DB_SUCCESS or error code */
+dberr_t
+PageConverter::adjust_cluster_index_blob_columns(
+	rec_t*		rec,
+	const ulint*	offsets) UNIV_NOTHROW
+{
+	ut_ad(rec_offs_any_extern(offsets));
+
+	/* Adjust the space_id in the BLOB pointers. */
+
+	for (ulint i = 0; i < rec_offs_n_fields(offsets); ++i) {
+
+		/* Only if the column is stored "externally". */
+
+		if (rec_offs_nth_extern(offsets, i)) {
+			dberr_t	err;
+
+			err = adjust_cluster_index_blob_column(rec, offsets, i);
+
+			if (err != DB_SUCCESS) {
+				return(err);
+			}
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/**
+In the clustered index, adjust BLOB pointers as needed. Also update the
+BLOB reference, write the new space id.
+@param rec - record to update
+@param offsets - column offsets for the record
+@return DB_SUCCESS or error code */
+dberr_t
+PageConverter::adjust_cluster_index_blob_ref(
+	rec_t*		rec,
+	const ulint*	offsets) UNIV_NOTHROW
+{
+	if (rec_offs_any_extern(offsets)) {
+		dberr_t	err;
+
+		err = adjust_cluster_index_blob_columns(rec, offsets);
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/**
+Purge delete-marked records, only if it is possible to do so without
+re-organising the B+tree.
+@param offsets - current row offsets.
+@return true if purge succeeded */
+bool
+PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
+{
+	const dict_index_t*	index = m_index->m_srv_index;
+
+	/* We can't have a page that is empty and not root. */
+	if (m_rec_iter.remove(index, m_page_zip_ptr, m_offsets)) {
+
+		++m_index->m_stats.m_n_purged;
+
+		return(true);
+	} else {
+		++m_index->m_stats.m_n_purge_failed;
+	}
+
+	return(false);
+}
+
+/**
+Adjust the BLOB references and sys fields for the current record.
+@param rec - record to update
+@param offsets - column offsets for the record
+@param deleted - true if row is delete marked
+@return DB_SUCCESS or error code. */
+dberr_t
+PageConverter::adjust_cluster_record(
+	const dict_index_t*	index,
+	rec_t*			rec,
+	const ulint*		offsets,
+	bool			deleted) UNIV_NOTHROW
+{
+	dberr_t	err;
+
+	if ((err = adjust_cluster_index_blob_ref(rec, offsets)) == DB_SUCCESS) {
+
+		/* Reset DB_TRX_ID and DB_ROLL_PTR.  Normally, these fields
+		are only written in conjunction with other changes to the
+		record. */
+
+		row_upd_rec_sys_fields(
+			rec, m_page_zip_ptr, m_cluster_index, m_offsets,
+			m_trx, 0);
+	}
+
+	return(err);
+}
+
+/**
+Update the BLOB refrences and write UNDO log entries for
+rows that can't be purged optimistically.
+@param block - block to update
+@retval DB_SUCCESS or error code */
+dberr_t
+PageConverter::update_records(
+	buf_block_t*	block) UNIV_NOTHROW
+{
+	ibool	comp = dict_table_is_comp(m_cfg->m_table);
+	bool	clust_index = m_index->m_srv_index == m_cluster_index;
+
+	/* This will also position the cursor on the first user record. */
+
+	m_rec_iter.open(block);
+
+	while (!m_rec_iter.end()) {
+
+		rec_t*	rec = m_rec_iter.current();
+
+		/* FIXME: Move out of the loop */
+
+		if (rec_get_status(rec) == REC_STATUS_NODE_PTR) {
+			break;
+		}
+
+		ibool	deleted = rec_get_deleted_flag(rec, comp);
+
+		/* For the clustered index we have to adjust the BLOB
+		reference and the system fields irrespective of the
+		delete marked flag. The adjustment of delete marked
+		cluster records is required for purge to work later. */
+
+		if (deleted || clust_index) {
+			m_offsets = rec_get_offsets(
+				rec, m_index->m_srv_index, m_offsets,
+				ULINT_UNDEFINED, &m_heap);
+		}
+
+		if (clust_index) {
+
+			dberr_t err = adjust_cluster_record(
+				m_index->m_srv_index, rec, m_offsets,
+				deleted);
+
+			if (err != DB_SUCCESS) {
+				return(err);
+			}
+		}
+
+		/* If it is a delete marked record then try an
+		optimistic delete. */
+
+		if (deleted) {
+			/* A successful purge will move the cursor to the
+			next record. */
+
+			if (!purge(m_offsets)) {
+				m_rec_iter.next();
+			}
+
+			++m_index->m_stats.m_n_deleted;
+		} else {
+			++m_index->m_stats.m_n_rows;
+			m_rec_iter.next();
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/**
+Update the space, index id, trx id.
+@return DB_SUCCESS or error code */
+dberr_t
+PageConverter::update_index_page(
+	buf_block_t*	block) UNIV_NOTHROW
+{
+	index_id_t	id;
+	buf_frame_t*	page = block->frame;
+
+	if (is_free(buf_block_get_page_no(block))) {
+		return(DB_SUCCESS);
+	} else if ((id = btr_page_get_index_id(page)) != m_index->m_id) {
+
+		row_index_t*	index = find_index(id);
+
+		if (index == 0) {
+			m_index = 0;
+			return(DB_CORRUPTION);
+		}
+
+		/* Update current index */
+		m_index = index;
+	}
+
+	/* If the .cfg file is missing and there is an index mismatch
+	then ignore the error. */
+	if (m_cfg->m_missing && (m_index == 0 || m_index->m_srv_index == 0)) {
+		return(DB_SUCCESS);
+	}
+
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!is_compressed_table()
+	     || page_zip_validate(m_page_zip_ptr, page, m_index->m_srv_index));
+#endif /* UNIV_ZIP_DEBUG */
+
+	/* This has to be written to uncompressed index header. Set it to
+	the current index id. */
+	btr_page_set_index_id(
+		page, m_page_zip_ptr, m_index->m_srv_index->id, 0);
+
+	page_set_max_trx_id(block, m_page_zip_ptr, m_trx->id, 0);
+
+	if (page_get_n_recs(block->frame) == 0) {
+
+		/* Only a root page can be empty. */
+		if (!is_root_page(block->frame)) {
+			// TODO: We should relax this and skip secondary
+			// indexes. Mark them as corrupt because they can
+			// always be rebuilt.
+			return(DB_CORRUPTION);
+		}
+
+		return(DB_SUCCESS);
+	}
+
+	return(update_records(block));
+}
+
+/**
+Validate the space flags and update tablespace header page.
+@param block - block read from file, not from the buffer pool.
+@retval DB_SUCCESS or error code */
+dberr_t
+PageConverter::update_header(
+	buf_block_t*	block) UNIV_NOTHROW
+{
+	/* Check for valid header */
+	switch(fsp_header_get_space_id(get_frame(block))) {
+	case 0:
+		return(DB_CORRUPTION);
+	case ULINT_UNDEFINED:
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Space id check in the header failed "
+			"- ignored");
+	}
+
+	ulint		space_flags = fsp_header_get_flags(get_frame(block));
+
+	if (!fsp_flags_is_valid(space_flags)) {
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Unsupported tablespace format %lu",
+			(ulong) space_flags);
+
+		return(DB_UNSUPPORTED);
+	}
+
+	mach_write_to_8(
+		get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN, m_current_lsn);
+
+	/* Write space_id to the tablespace header, page 0. */
+	mach_write_to_4(
+		get_frame(block) + FSP_HEADER_OFFSET + FSP_SPACE_ID,
+		get_space_id());
+
+	/* This is on every page in the tablespace. */
+	mach_write_to_4(
+		get_frame(block) + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+		get_space_id());
+
+	return(DB_SUCCESS);
+}
+
+/**
+Update the page, set the space id, max trx id and index id.
+@param block - block read from file
+@retval DB_SUCCESS or error code */
+dberr_t
+PageConverter::update_page(
+	buf_block_t*	block,
+	ulint&		page_type) UNIV_NOTHROW
+{
+	dberr_t		err = DB_SUCCESS;
+
+	switch (page_type = fil_page_get_type(get_frame(block))) {
+	case FIL_PAGE_TYPE_FSP_HDR:
+		/* Work directly on the uncompressed page headers. */
+		ut_a(buf_block_get_page_no(block) == 0);
+		return(update_header(block));
+
+	case FIL_PAGE_INDEX:
+		/* We need to decompress the contents into block->frame
+		before we can do any thing with Btree pages. */
+
+		if (is_compressed_table() && !buf_zip_decompress(block, TRUE)) {
+			return(DB_CORRUPTION);
+		}
+
+		/* This is on every page in the tablespace. */
+		mach_write_to_4(
+			get_frame(block)
+			+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, get_space_id());
+
+		/* Only update the Btree nodes. */
+		return(update_index_page(block));
+
+	case FIL_PAGE_TYPE_SYS:
+		/* This is page 0 in the system tablespace. */
+		return(DB_CORRUPTION);
+
+	case FIL_PAGE_TYPE_XDES:
+		err = set_current_xdes(
+			buf_block_get_page_no(block), get_frame(block));
+	case FIL_PAGE_INODE:
+	case FIL_PAGE_TYPE_TRX_SYS:
+	case FIL_PAGE_IBUF_FREE_LIST:
+	case FIL_PAGE_TYPE_ALLOCATED:
+	case FIL_PAGE_IBUF_BITMAP:
+	case FIL_PAGE_TYPE_BLOB:
+	case FIL_PAGE_TYPE_ZBLOB:
+	case FIL_PAGE_TYPE_ZBLOB2:
+
+		/* Work directly on the uncompressed page headers. */
+		/* This is on every page in the tablespace. */
+		mach_write_to_4(
+			get_frame(block)
+			+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, get_space_id());
+
+		return(err);
+	}
+
+	ib_logf(IB_LOG_LEVEL_WARN, "Unknown page type (%lu)", page_type);
+
+	return(DB_CORRUPTION);
+}
+
+/**
+Validate the page
+@param offset - physical offset within file.
+@param page - page read from file.
+@return status */
+PageConverter::import_page_status_t
+PageConverter::validate(
+	os_offset_t	offset,
+	buf_block_t*	block) UNIV_NOTHROW
+{
+	buf_frame_t*	page = get_frame(block);
+
+	/* Check that the page number corresponds to the offset in
+	the file. Flag as corrupt if it doesn't. Disable the check
+	for LSN in buf_page_is_corrupted() */
+
+	if (buf_page_is_corrupted(false, page, get_zip_size())
+	    || (page_get_page_no(page) != offset / m_page_size
+		&& page_get_page_no(page) != 0)) {
+
+		return(IMPORT_PAGE_STATUS_CORRUPTED);
+
+	} else if (offset > 0 && page_get_page_no(page) == 0) {
+		const byte*	b = page;
+		const byte*	e = b + m_page_size;
+
+		/* If the page number is zero and offset > 0 then
+		the entire page MUST consist of zeroes. If not then
+		we flag it as corrupt. */
+
+		while (b != e) {
+
+			if (*b++ && !trigger_corruption()) {
+				return(IMPORT_PAGE_STATUS_CORRUPTED);
+			}
+		}
+
+		/* The page is all zero: do nothing. */
+		return(IMPORT_PAGE_STATUS_ALL_ZERO);
+	}
+
+	return(IMPORT_PAGE_STATUS_OK);
+}
+
+/**
+Called for every page in the tablespace. If the page was not
+updated then its state must be set to BUF_PAGE_NOT_USED.
+@param offset - physical offset within the file
+@param block - block read from file, note it is not from the buffer pool
+@retval DB_SUCCESS or error code. */
+dberr_t
+PageConverter::operator() (
+	os_offset_t	offset,
+	buf_block_t*	block) UNIV_NOTHROW
+{
+	ulint		page_type;
+	dberr_t		err = DB_SUCCESS;
+
+	if ((err = periodic_check()) != DB_SUCCESS) {
+		return(err);
+	}
+
+	if (is_compressed_table()) {
+		m_page_zip_ptr = &block->page.zip;
+	} else {
+		ut_ad(m_page_zip_ptr == 0);
+	}
+
+	switch(validate(offset, block)) {
+	case IMPORT_PAGE_STATUS_OK:
+
+		/* We have to decompress the compressed pages before
+		we can work on them */
+
+		if ((err = update_page(block, page_type)) != DB_SUCCESS) {
+			return(err);
+		}
+
+		/* Note: For compressed pages this function will write to the
+		zip descriptor and for uncompressed pages it will write to
+		page (ie. the block->frame). Therefore the caller should write
+		out the descriptor contents and not block->frame for compressed
+		pages. */
+
+		if (!is_compressed_table() || page_type == FIL_PAGE_INDEX) {
+
+			buf_flush_init_for_writing(
+				!is_compressed_table()
+				? block->frame : block->page.zip.data,
+				!is_compressed_table() ? 0 : m_page_zip_ptr,
+				m_current_lsn);
+		} else {
+			/* Calculate and update the checksum of non-btree
+			pages for compressed tables explicitly here. */
+
+			buf_flush_update_zip_checksum(
+				get_frame(block), get_zip_size(),
+				m_current_lsn);
+		}
+
+		break;
+
+	case IMPORT_PAGE_STATUS_ALL_ZERO:
+		/* The page is all zero: leave it as is. */
+		break;
+
+	case IMPORT_PAGE_STATUS_CORRUPTED:
+
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"%s: Page %lu at offset " UINT64PF " looks corrupted.",
+			m_filepath, (ulong) (offset / m_page_size), offset);
+
+		return(DB_CORRUPTION);
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Clean up after import tablespace failure, this function will acquire
+the dictionary latches on behalf of the transaction if the transaction
+hasn't already acquired them. */
+static	__attribute__((nonnull))
+void
+row_import_discard_changes(
+/*=======================*/
+	row_prebuilt_t*	prebuilt,	/*!< in/out: prebuilt from handler */
+	trx_t*		trx,		/*!< in/out: transaction for import */
+	dberr_t		err)		/*!< in: error code */
+{
+	dict_table_t*	table = prebuilt->table;
+
+	ut_a(err != DB_SUCCESS);
+
+	prebuilt->trx->error_info = NULL;
+
+	char	table_name[MAX_FULL_NAME_LEN + 1];
+
+	innobase_format_name(
+		table_name, sizeof(table_name),
+		prebuilt->table->name, FALSE);
+
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Discarding tablespace of table %s: %s",
+		table_name, ut_strerr(err));
+
+	if (trx->dict_operation_lock_mode != RW_X_LATCH) {
+		ut_a(trx->dict_operation_lock_mode == 0);
+		row_mysql_lock_data_dictionary(trx);
+	}
+
+	ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+
+	/* Since we update the index root page numbers on disk after
+	we've done a successful import. The table will not be loadable.
+	However, we need to ensure that the in memory root page numbers
+	are reset to "NULL". */
+
+	for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+		index != 0;
+		index = UT_LIST_GET_NEXT(indexes, index)) {
+
+		index->page = FIL_NULL;
+		index->space = FIL_NULL;
+	}
+
+	table->ibd_file_missing = TRUE;
+
+	fil_close_tablespace(trx, table->space);
+}
+
+/*****************************************************************//**
+Clean up after import tablespace. */
+static	__attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_cleanup(
+/*===============*/
+	row_prebuilt_t*	prebuilt,	/*!< in/out: prebuilt from handler */
+	trx_t*		trx,		/*!< in/out: transaction for import */
+	dberr_t		err)		/*!< in: error code */
+{
+	ut_a(prebuilt->trx != trx);
+
+	if (err != DB_SUCCESS) {
+		row_import_discard_changes(prebuilt, trx, err);
+	}
+
+	ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+
+	DBUG_EXECUTE_IF("ib_import_before_commit_crash", DBUG_SUICIDE(););
+
+	trx_commit_for_mysql(trx);
+
+	row_mysql_unlock_data_dictionary(trx);
+
+	trx_free_for_mysql(trx);
+
+	prebuilt->trx->op_info = "";
+
+	DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
+
+	log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+
+	return(err);
+}
+
+/*****************************************************************//**
+Report error during tablespace import. */
+static	__attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_error(
+/*=============*/
+	row_prebuilt_t*	prebuilt,	/*!< in/out: prebuilt from handler */
+	trx_t*		trx,		/*!< in/out: transaction for import */
+	dberr_t		err)		/*!< in: error code */
+{
+	if (!trx_is_interrupted(trx)) {
+		char	table_name[MAX_FULL_NAME_LEN + 1];
+
+		innobase_format_name(
+			table_name, sizeof(table_name),
+			prebuilt->table->name, FALSE);
+
+		ib_senderrf(
+			trx->mysql_thd, IB_LOG_LEVEL_WARN,
+			ER_INNODB_IMPORT_ERROR,
+			table_name, (ulong) err, ut_strerr(err));
+	}
+
+	return(row_import_cleanup(prebuilt, trx, err));
+}
+
+/*****************************************************************//**
+Adjust the root page index node and leaf node segment headers, update
+with the new space id. For all the table's secondary indexes.
+@return error code */
+static	__attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_adjust_root_pages_of_secondary_indexes(
+/*==============================================*/
+	row_prebuilt_t*		prebuilt,	/*!< in/out: prebuilt from
+						handler */
+	trx_t*			trx,		/*!< in: transaction used for
+						the import */
+	dict_table_t*		table,		/*!< in: table the indexes
+						belong to */
+	const row_import&	cfg)		/*!< Import context */
+{
+	dict_index_t*		index;
+	ulint			n_rows_in_table;
+	dberr_t			err = DB_SUCCESS;
+
+	/* Skip the clustered index. */
+	index = dict_table_get_first_index(table);
+
+	n_rows_in_table = cfg.get_n_rows(index->name);
+
+	DBUG_EXECUTE_IF("ib_import_sec_rec_count_mismatch_failure",
+			n_rows_in_table++;);
+
+	/* Adjust the root pages of the secondary indexes only. */
+	while ((index = dict_table_get_next_index(index)) != NULL) {
+		char		index_name[MAX_FULL_NAME_LEN + 1];
+
+		innobase_format_name(
+			index_name, sizeof(index_name), index->name, TRUE);
+
+		ut_a(!dict_index_is_clust(index));
+
+		if (!(index->type & DICT_CORRUPT)
+		    && index->space != FIL_NULL
+		    && index->page != FIL_NULL) {
+
+			/* Update the Btree segment headers for index node and
+			leaf nodes in the root page. Set the new space id. */
+
+			err = btr_root_adjust_on_import(index);
+		} else {
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Skip adjustment of root pages for "
+				"index %s.", index->name);
+
+			err = DB_CORRUPTION;
+		}
+
+		if (err != DB_SUCCESS) {
+
+			if (index->type & DICT_CLUSTERED) {
+				break;
+			}
+
+			ib_errf(trx->mysql_thd,
+				IB_LOG_LEVEL_WARN,
+				ER_INNODB_INDEX_CORRUPT,
+				"Index '%s' not found or corrupt, "
+				"you should recreate this index.",
+				index_name);
+
+			/* Do not bail out, so that the data
+			can be recovered. */
+
+			err = DB_SUCCESS;
+			index->type |= DICT_CORRUPT;
+			continue;
+		}
+
+		/* If we failed to purge any records in the index then
+		do it the hard way.
+
+		TODO: We can do this in the first pass by generating UNDO log
+		records for the failed rows. */
+
+		if (!cfg.requires_purge(index->name)) {
+			continue;
+		}
+
+		IndexPurge   purge(trx, index);
+
+		trx->op_info = "secondary: purge delete marked records";
+
+		err = purge.garbage_collect();
+
+		trx->op_info = "";
+
+		if (err != DB_SUCCESS) {
+			break;
+		} else if (purge.get_n_rows() != n_rows_in_table) {
+
+			ib_errf(trx->mysql_thd,
+				IB_LOG_LEVEL_WARN,
+				ER_INNODB_INDEX_CORRUPT,
+				"Index '%s' contains %lu entries, "
+				"should be %lu, you should recreate "
+				"this index.", index_name,
+				(ulong) purge.get_n_rows(),
+				(ulong) n_rows_in_table);
+
+			index->type |= DICT_CORRUPT;
+
+			/* Do not bail out, so that the data
+			can be recovered. */
+
+			err = DB_SUCCESS;
+                }
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Ensure that dict_sys->row_id exceeds SELECT MAX(DB_ROW_ID).
+@return error code */
+static	__attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_set_sys_max_row_id(
+/*==========================*/
+	row_prebuilt_t*		prebuilt,	/*!< in/out: prebuilt from
+						handler */
+	const dict_table_t*	table)		/*!< in: table to import */
+{
+	dberr_t			err;
+	const rec_t*		rec;
+	mtr_t			mtr;
+	btr_pcur_t		pcur;
+	row_id_t		row_id	= 0;
+	dict_index_t*		index;
+
+	index = dict_table_get_first_index(table);
+	ut_a(dict_index_is_clust(index));
+
+	mtr_start(&mtr);
+
+	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+	btr_pcur_open_at_index_side(
+		false,		// High end
+		index,
+		BTR_SEARCH_LEAF,
+		&pcur,
+		true,		// Init cursor
+		0,		// Leaf level
+		&mtr);
+
+	btr_pcur_move_to_prev_on_page(&pcur);
+	rec = btr_pcur_get_rec(&pcur);
+
+	/* Check for empty table. */
+	if (!page_rec_is_infimum(rec)) {
+		ulint		len;
+		const byte*	field;
+		mem_heap_t*	heap = NULL;
+		ulint		offsets_[1 + REC_OFFS_HEADER_SIZE];
+		ulint*		offsets;
+
+		rec_offs_init(offsets_);
+
+		offsets = rec_get_offsets(
+			rec, index, offsets_, ULINT_UNDEFINED, &heap);
+
+		field = rec_get_nth_field(
+			rec, offsets,
+			dict_index_get_sys_col_pos(index, DATA_ROW_ID),
+			&len);
+
+		if (len == DATA_ROW_ID_LEN) {
+			row_id = mach_read_from_6(field);
+			err = DB_SUCCESS;
+		} else {
+			err = DB_CORRUPTION;
+		}
+
+		if (heap != NULL) {
+			mem_heap_free(heap);
+		}
+	} else {
+		/* The table is empty. */
+		err = DB_SUCCESS;
+	}
+
+	btr_pcur_close(&pcur);
+	mtr_commit(&mtr);
+
+	DBUG_EXECUTE_IF("ib_import_set_max_rowid_failure",
+			err = DB_CORRUPTION;);
+
+	if (err != DB_SUCCESS) {
+		char		index_name[MAX_FULL_NAME_LEN + 1];
+
+		innobase_format_name(
+			index_name, sizeof(index_name), index->name, TRUE);
+
+		ib_errf(prebuilt->trx->mysql_thd,
+			IB_LOG_LEVEL_WARN,
+			ER_INNODB_INDEX_CORRUPT,
+			"Index '%s' corruption detected, invalid DB_ROW_ID "
+			"in index.", index_name);
+
+		return(err);
+
+	} else if (row_id > 0) {
+
+		/* Update the system row id if the imported index row id is
+		greater than the max system row id. */
+
+		mutex_enter(&dict_sys->mutex);
+
+		if (row_id >= dict_sys->row_id) {
+			dict_sys->row_id = row_id + 1;
+			dict_hdr_flush_row_id();
+		}
+
+		mutex_exit(&dict_sys->mutex);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Read the a string from the meta data file.
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+row_import_cfg_read_string(
+/*=======================*/
+	FILE*		file,		/*!< in/out: File to read from */
+	byte*		ptr,		/*!< out: string to read */
+	ulint		max_len)	/*!< in: maximum length of the output
+					buffer in bytes */
+{
+	DBUG_EXECUTE_IF("ib_import_string_read_error",
+			errno = EINVAL; return(DB_IO_ERROR););
+
+	ulint		len = 0;
+
+	while (!feof(file)) {
+		int	ch = fgetc(file);
+
+		if (ch == EOF) {
+			break;
+		} else if (ch != 0) {
+			if (len < max_len) {
+				ptr[len++] = ch;
+			} else {
+				break;
+			}
+		/* max_len includes the NUL byte */
+		} else if (len != max_len - 1) {
+			break;
+		} else {
+			ptr[len] = 0;
+			return(DB_SUCCESS);
+		}
+	}
+
+	errno = EINVAL;
+
+	return(DB_IO_ERROR);
+}
+
+/*********************************************************************//**
+Write the meta data (index user fields) config file.
+@return DB_SUCCESS or error code. */
+static	__attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_cfg_read_index_fields(
+/*=============================*/
+	FILE*			file,	/*!< in: file to write to */
+	THD*			thd,	/*!< in/out: session */
+	row_index_t*		index,	/*!< Index being read in */
+	row_import*		cfg)	/*!< in/out: meta-data read */
+{
+	byte			row[sizeof(ib_uint32_t) * 3];
+	ulint			n_fields = index->m_n_fields;
+
+	index->m_fields = new(std::nothrow) dict_field_t[n_fields];
+
+	/* Trigger OOM */
+	DBUG_EXECUTE_IF("ib_import_OOM_4",
+			delete [] index->m_fields; index->m_fields = 0;);
+
+	if (index->m_fields == 0) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	dict_field_t*	field = index->m_fields;
+
+	memset(field, 0x0, sizeof(*field) * n_fields);
+
+	for (ulint i = 0; i < n_fields; ++i, ++field) {
+		byte*		ptr = row;
+
+		/* Trigger EOF */
+		DBUG_EXECUTE_IF("ib_import_io_read_error_1",
+				(void) fseek(file, 0L, SEEK_END););
+
+		if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+				errno, strerror(errno),
+				"while reading index fields.");
+
+			return(DB_IO_ERROR);
+		}
+
+		field->prefix_len = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		field->fixed_len = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		/* Include the NUL byte in the length. */
+		ulint	len = mach_read_from_4(ptr);
+
+		byte*	name = new(std::nothrow) byte[len];
+
+		/* Trigger OOM */
+		DBUG_EXECUTE_IF("ib_import_OOM_5", delete [] name; name = 0;);
+
+		if (name == 0) {
+			return(DB_OUT_OF_MEMORY);
+		}
+
+		field->name = reinterpret_cast<const char*>(name);
+
+		dberr_t	err = row_import_cfg_read_string(file, name, len);
+
+		if (err != DB_SUCCESS) {
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+				errno, strerror(errno),
+				"while parsing table name.");
+
+			return(err);
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Read the index names and root page numbers of the indexes and set the values.
+Row format [root_page_no, len of str, str ... ]
+@return DB_SUCCESS or error code. */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_read_index_data(
+/*=======================*/
+	FILE*		file,		/*!< in: File to read from */
+	THD*		thd,		/*!< in: session */
+	row_import*	cfg)		/*!< in/out: meta-data read */
+{
+	byte*		ptr;
+	row_index_t*	cfg_index;
+	byte		row[sizeof(index_id_t) + sizeof(ib_uint32_t) * 9];
+
+	/* FIXME: What is the max value? */
+	ut_a(cfg->m_n_indexes > 0);
+	ut_a(cfg->m_n_indexes < 1024);
+
+	cfg->m_indexes = new(std::nothrow) row_index_t[cfg->m_n_indexes];
+
+	/* Trigger OOM */
+	DBUG_EXECUTE_IF("ib_import_OOM_6",
+			delete [] cfg->m_indexes; cfg->m_indexes = 0;);
+
+	if (cfg->m_indexes == 0) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	memset(cfg->m_indexes, 0x0, sizeof(*cfg->m_indexes) * cfg->m_n_indexes);
+
+	cfg_index = cfg->m_indexes;
+
+	for (ulint i = 0; i < cfg->m_n_indexes; ++i, ++cfg_index) {
+		/* Trigger EOF */
+		DBUG_EXECUTE_IF("ib_import_io_read_error_2",
+				(void) fseek(file, 0L, SEEK_END););
+
+		/* Read the index data. */
+		size_t	n_bytes = fread(row, 1, sizeof(row), file);
+
+		/* Trigger EOF */
+		DBUG_EXECUTE_IF("ib_import_io_read_error",
+				(void) fseek(file, 0L, SEEK_END););
+
+		if (n_bytes != sizeof(row)) {
+			char	msg[BUFSIZ];
+
+			ut_snprintf(msg, sizeof(msg),
+				    "while reading index meta-data, expected "
+				    "to read %lu bytes but read only %lu "
+				    "bytes",
+				    (ulong) sizeof(row), (ulong) n_bytes);
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+				errno, strerror(errno), msg);
+
+			ib_logf(IB_LOG_LEVEL_ERROR, "IO Error: %s", msg);
+
+			return(DB_IO_ERROR);
+		}
+
+		ptr = row;
+
+		cfg_index->m_id = mach_read_from_8(ptr);
+		ptr += sizeof(index_id_t);
+
+		cfg_index->m_space = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		cfg_index->m_page_no = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		cfg_index->m_type = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		cfg_index->m_trx_id_offset = mach_read_from_4(ptr);
+		if (cfg_index->m_trx_id_offset != mach_read_from_4(ptr)) {
+			ut_ad(0);
+			/* Overflow. Pretend that the clustered index
+			has a variable-length PRIMARY KEY. */
+			cfg_index->m_trx_id_offset = 0;
+		}
+		ptr += sizeof(ib_uint32_t);
+
+		cfg_index->m_n_user_defined_cols = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		cfg_index->m_n_uniq = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		cfg_index->m_n_nullable = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		cfg_index->m_n_fields = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		/* The NUL byte is included in the name length. */
+		ulint	len = mach_read_from_4(ptr);
+
+		if (len > OS_FILE_MAX_PATH) {
+			ib_errf(thd, IB_LOG_LEVEL_ERROR,
+				ER_INNODB_INDEX_CORRUPT,
+				"Index name length (%lu) is too long, "
+				"the meta-data is corrupt", len);
+
+			return(DB_CORRUPTION);
+		}
+
+		cfg_index->m_name = new(std::nothrow) byte[len];
+
+		/* Trigger OOM */
+		DBUG_EXECUTE_IF("ib_import_OOM_7",
+				delete [] cfg_index->m_name;
+				cfg_index->m_name = 0;);
+
+		if (cfg_index->m_name == 0) {
+			return(DB_OUT_OF_MEMORY);
+		}
+
+		dberr_t	err;
+
+		err = row_import_cfg_read_string(file, cfg_index->m_name, len);
+
+		if (err != DB_SUCCESS) {
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+				errno, strerror(errno),
+				"while parsing index name.");
+
+			return(err);
+		}
+
+		err = row_import_cfg_read_index_fields(
+			file, thd, cfg_index, cfg);
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+
+	}
+
+	return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Set the index root page number for v1 format.
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+row_import_read_indexes(
+/*====================*/
+	FILE*		file,		/*!< in: File to read from */
+	THD*		thd,		/*!< in: session */
+	row_import*	cfg)		/*!< in/out: meta-data read */
+{
+	byte		row[sizeof(ib_uint32_t)];
+
+	/* Trigger EOF */
+	DBUG_EXECUTE_IF("ib_import_io_read_error_3",
+			(void) fseek(file, 0L, SEEK_END););
+
+	/* Read the number of indexes. */
+	if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			errno, strerror(errno),
+			"while reading number of indexes.");
+
+		return(DB_IO_ERROR);
+	}
+
+	cfg->m_n_indexes = mach_read_from_4(row);
+
+	if (cfg->m_n_indexes == 0) {
+		ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			"Number of indexes in meta-data file is 0");
+
+		return(DB_CORRUPTION);
+
+	} else if (cfg->m_n_indexes > 1024) {
+		// FIXME: What is the upper limit? */
+		ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			"Number of indexes in meta-data file is too high: %lu",
+			(ulong) cfg->m_n_indexes);
+		cfg->m_n_indexes = 0;
+
+		return(DB_CORRUPTION);
+	}
+
+	return(row_import_read_index_data(file, thd, cfg));
+}
+
+/*********************************************************************//**
+Read the meta data (table columns) config file. Deserialise the contents of
+dict_col_t structure, along with the column name. */
+static	__attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_read_columns(
+/*====================*/
+	FILE*			file,	/*!< in: file to write to */
+	THD*			thd,	/*!< in/out: session */
+	row_import*		cfg)	/*!< in/out: meta-data read */
+{
+	dict_col_t*		col;
+	byte			row[sizeof(ib_uint32_t) * 8];
+
+	/* FIXME: What should the upper limit be? */
+	ut_a(cfg->m_n_cols > 0);
+	ut_a(cfg->m_n_cols < 1024);
+
+	cfg->m_cols = new(std::nothrow) dict_col_t[cfg->m_n_cols];
+
+	/* Trigger OOM */
+	DBUG_EXECUTE_IF("ib_import_OOM_8",
+			delete [] cfg->m_cols; cfg->m_cols = 0;);
+
+	if (cfg->m_cols == 0) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	cfg->m_col_names = new(std::nothrow) byte* [cfg->m_n_cols];
+
+	/* Trigger OOM */
+	DBUG_EXECUTE_IF("ib_import_OOM_9",
+			delete [] cfg->m_col_names; cfg->m_col_names = 0;);
+
+	if (cfg->m_col_names == 0) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	memset(cfg->m_cols, 0x0, sizeof(cfg->m_cols) * cfg->m_n_cols);
+	memset(cfg->m_col_names, 0x0, sizeof(cfg->m_col_names) * cfg->m_n_cols);
+
+	col = cfg->m_cols;
+
+	for (ulint i = 0; i < cfg->m_n_cols; ++i, ++col) {
+		byte*		ptr = row;
+
+		/* Trigger EOF */
+		DBUG_EXECUTE_IF("ib_import_io_read_error_4",
+				(void) fseek(file, 0L, SEEK_END););
+
+		if (fread(row, 1,  sizeof(row), file) != sizeof(row)) {
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+				errno, strerror(errno),
+				"while reading table column meta-data.");
+
+			return(DB_IO_ERROR);
+		}
+
+		col->prtype = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		col->mtype = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		col->len = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		col->mbminmaxlen = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		col->ind = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		col->ord_part = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		col->max_prefix = mach_read_from_4(ptr);
+		ptr += sizeof(ib_uint32_t);
+
+		/* Read in the column name as [len, byte array]. The len
+		includes the NUL byte. */
+
+		ulint		len = mach_read_from_4(ptr);
+
+		/* FIXME: What is the maximum column name length? */
+		if (len == 0 || len > 128) {
+			ib_errf(thd, IB_LOG_LEVEL_ERROR,
+				ER_IO_READ_ERROR,
+				"Column name length %lu, is invalid",
+				(ulong) len);
+
+			return(DB_CORRUPTION);
+		}
+
+		cfg->m_col_names[i] = new(std::nothrow) byte[len];
+
+		/* Trigger OOM */
+		DBUG_EXECUTE_IF("ib_import_OOM_10",
+				delete [] cfg->m_col_names[i];
+				cfg->m_col_names[i] = 0;);
+
+		if (cfg->m_col_names[i] == 0) {
+			return(DB_OUT_OF_MEMORY);
+		}
+
+		dberr_t	err;
+
+		err = row_import_cfg_read_string(
+			file, cfg->m_col_names[i], len);
+
+		if (err != DB_SUCCESS) {
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+				errno, strerror(errno),
+				"while parsing table column name.");
+
+			return(err);
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Read the contents of the <tablespace>.cfg file.
+@return DB_SUCCESS or error code. */
+static	__attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_read_v1(
+/*===============*/
+	FILE*		file,		/*!< in: File to read from */
+	THD*		thd,		/*!< in: session */
+	row_import*	cfg)		/*!< out: meta data */
+{
+	byte		value[sizeof(ib_uint32_t)];
+
+	/* Trigger EOF */
+	DBUG_EXECUTE_IF("ib_import_io_read_error_5",
+			(void) fseek(file, 0L, SEEK_END););
+
+	/* Read the hostname where the tablespace was exported. */
+	if (fread(value, 1, sizeof(value), file) != sizeof(value)) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			errno, strerror(errno),
+			"while reading meta-data export hostname length.");
+
+		return(DB_IO_ERROR);
+	}
+
+	ulint	len = mach_read_from_4(value);
+
+	/* NUL byte is part of name length. */
+	cfg->m_hostname = new(std::nothrow) byte[len];
+
+	/* Trigger OOM */
+	DBUG_EXECUTE_IF("ib_import_OOM_1",
+			delete [] cfg->m_hostname; cfg->m_hostname = 0;);
+
+	if (cfg->m_hostname == 0) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	dberr_t	err = row_import_cfg_read_string(file, cfg->m_hostname, len);
+
+	if (err != DB_SUCCESS) {
+
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			errno, strerror(errno),
+			"while parsing export hostname.");
+
+		return(err);
+	}
+
+	/* Trigger EOF */
+	DBUG_EXECUTE_IF("ib_import_io_read_error_6",
+			(void) fseek(file, 0L, SEEK_END););
+
+	/* Read the table name of tablespace that was exported. */
+	if (fread(value, 1, sizeof(value), file) != sizeof(value)) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			errno, strerror(errno),
+			"while reading meta-data table name length.");
+
+		return(DB_IO_ERROR);
+	}
+
+	len = mach_read_from_4(value);
+
+	/* NUL byte is part of name length. */
+	cfg->m_table_name = new(std::nothrow) byte[len];
+
+	/* Trigger OOM */
+	DBUG_EXECUTE_IF("ib_import_OOM_2",
+			delete [] cfg->m_table_name; cfg->m_table_name = 0;);
+
+	if (cfg->m_table_name == 0) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	err = row_import_cfg_read_string(file, cfg->m_table_name, len);
+
+	if (err != DB_SUCCESS) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			errno, strerror(errno),
+			"while parsing table name.");
+
+		return(err);
+	}
+
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Importing tablespace for table '%s' that was exported "
+		"from host '%s'", cfg->m_table_name, cfg->m_hostname);
+
+	byte		row[sizeof(ib_uint32_t) * 3];
+
+	/* Trigger EOF */
+	DBUG_EXECUTE_IF("ib_import_io_read_error_7",
+			(void) fseek(file, 0L, SEEK_END););
+
+	/* Read the autoinc value. */
+	if (fread(row, 1, sizeof(ib_uint64_t), file) != sizeof(ib_uint64_t)) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			errno, strerror(errno),
+			"while reading autoinc value.");
+
+		return(DB_IO_ERROR);
+	}
+
+	cfg->m_autoinc = mach_read_from_8(row);
+
+	/* Trigger EOF */
+	DBUG_EXECUTE_IF("ib_import_io_read_error_8",
+			(void) fseek(file, 0L, SEEK_END););
+
+	/* Read the tablespace page size. */
+	if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			errno, strerror(errno),
+			"while reading meta-data header.");
+
+		return(DB_IO_ERROR);
+	}
+
+	byte*		ptr = row;
+
+	cfg->m_page_size = mach_read_from_4(ptr);
+	ptr += sizeof(ib_uint32_t);
+
+	if (cfg->m_page_size != UNIV_PAGE_SIZE) {
+
+		ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
+			"Tablespace to be imported has a different "
+			"page size than this server. Server page size "
+			"is %lu, whereas tablespace page size is %lu",
+			UNIV_PAGE_SIZE, (ulong) cfg->m_page_size);
+
+		return(DB_ERROR);
+	}
+
+	cfg->m_flags = mach_read_from_4(ptr);
+	ptr += sizeof(ib_uint32_t);
+
+	cfg->m_n_cols = mach_read_from_4(ptr);
+
+	if (!dict_tf_is_valid(cfg->m_flags)) {
+
+		return(DB_CORRUPTION);
+
+	} else if ((err = row_import_read_columns(file, thd, cfg))
+		   != DB_SUCCESS) {
+
+		return(err);
+
+	} else  if ((err = row_import_read_indexes(file, thd, cfg))
+		   != DB_SUCCESS) {
+
+		return(err);
+	}
+
+	ut_a(err == DB_SUCCESS);
+	return(err);
+}
+
+/**
+Read the contents of the <tablespace>.cfg file.
+@return DB_SUCCESS or error code. */
+static	__attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_read_meta_data(
+/*======================*/
+	dict_table_t*	table,		/*!< in: table */
+	FILE*		file,		/*!< in: File to read from */
+	THD*		thd,		/*!< in: session */
+	row_import&	cfg)		/*!< out: contents of the .cfg file */
+{
+	byte		row[sizeof(ib_uint32_t)];
+
+	/* Trigger EOF */
+	DBUG_EXECUTE_IF("ib_import_io_read_error_9",
+			(void) fseek(file, 0L, SEEK_END););
+
+	if (fread(&row, 1, sizeof(row), file) != sizeof(row)) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			errno, strerror(errno),
+			"while reading meta-data version.");
+
+		return(DB_IO_ERROR);
+	}
+
+	cfg.m_version = mach_read_from_4(row);
+
+	/* Check the version number. */
+	switch (cfg.m_version) {
+	case IB_EXPORT_CFG_VERSION_V1:
+
+		return(row_import_read_v1(file, thd, &cfg));
+	default:
+		ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			"Unsupported meta-data version number (%lu), "
+			"file ignored", (ulong) cfg.m_version);
+	}
+
+	return(DB_ERROR);
+}
+
+/**
+Read the contents of the <tablename>.cfg file.
+@return DB_SUCCESS or error code. */
+static	__attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_read_cfg(
+/*================*/
+	dict_table_t*	table,	/*!< in: table */
+	THD*		thd,	/*!< in: session */
+	row_import&	cfg)	/*!< out: contents of the .cfg file */
+{
+	dberr_t		err;
+	char		name[OS_FILE_MAX_PATH];
+
+	cfg.m_table = table;
+
+	srv_get_meta_data_filename(table, name, sizeof(name));
+
+	FILE*	file = fopen(name, "rb");
+
+	if (file == NULL) {
+		char	msg[BUFSIZ];
+
+		ut_snprintf(msg, sizeof(msg),
+			    "Error opening '%s', will attempt to import "
+			    "without schema verification", name);
+
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_READ_ERROR,
+			errno, strerror(errno), msg);
+
+		cfg.m_missing = true;
+
+		err = DB_FAIL;
+	} else {
+
+		cfg.m_missing = false;
+
+		err = row_import_read_meta_data(table, file, thd, cfg);
+		fclose(file);
+	}
+
+	return(err);
+}
+
+/*****************************************************************//**
+Update the <space, root page> of a table's indexes from the values
+in the data dictionary.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_import_update_index_root(
+/*=========================*/
+	trx_t*			trx,		/*!< in/out: transaction that
+						covers the update */
+	const dict_table_t*	table,		/*!< in: Table for which we want
+						to set the root page_no */
+	bool			reset,		/*!< in: if true then set to
+						FIL_NUL */
+	bool			dict_locked)	/*!< in: Set to true if the
+						caller already owns the
+						dict_sys_t:: mutex. */
+
+{
+	const dict_index_t*	index;
+	que_t*			graph = 0;
+	dberr_t			err = DB_SUCCESS;
+
+	static const char	sql[] = {
+		"PROCEDURE UPDATE_INDEX_ROOT() IS\n"
+		"BEGIN\n"
+		"UPDATE SYS_INDEXES\n"
+		"SET SPACE = :space,\n"
+		"    PAGE_NO = :page,\n"
+		"    TYPE = :type\n"
+		"WHERE TABLE_ID = :table_id AND ID = :index_id;\n"
+		"END;\n"};
+
+	if (!dict_locked) {
+		mutex_enter(&dict_sys->mutex);
+	}
+
+	for (index = dict_table_get_first_index(table);
+	     index != 0;
+	     index = dict_table_get_next_index(index)) {
+
+		pars_info_t*	info;
+		ib_uint32_t	page;
+		ib_uint32_t	space;
+		ib_uint32_t	type;
+		index_id_t	index_id;
+		table_id_t	table_id;
+
+		info = (graph != 0) ? graph->info : pars_info_create();
+
+		mach_write_to_4(
+			reinterpret_cast<byte*>(&type),
+			index->type);
+
+		mach_write_to_4(
+			reinterpret_cast<byte*>(&page),
+			reset ? FIL_NULL : index->page);
+
+		mach_write_to_4(
+			reinterpret_cast<byte*>(&space),
+			reset ? FIL_NULL : index->space);
+
+		mach_write_to_8(
+			reinterpret_cast<byte*>(&index_id),
+			index->id);
+
+		mach_write_to_8(
+			reinterpret_cast<byte*>(&table_id),
+			table->id);
+
+		/* If we set the corrupt bit during the IMPORT phase then
+		we need to update the system tables. */
+		pars_info_bind_int4_literal(info, "type", &type);
+		pars_info_bind_int4_literal(info, "space", &space);
+		pars_info_bind_int4_literal(info, "page", &page);
+		pars_info_bind_ull_literal(info, "index_id", &index_id);
+		pars_info_bind_ull_literal(info, "table_id", &table_id);
+
+		if (graph == 0) {
+			graph = pars_sql(info, sql);
+			ut_a(graph);
+			graph->trx = trx;
+		}
+
+		que_thr_t*	thr;
+
+		graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
+
+		ut_a(thr = que_fork_start_command(graph));
+
+		que_run_threads(thr);
+
+		DBUG_EXECUTE_IF("ib_import_internal_error",
+				trx->error_state = DB_ERROR;);
+
+		err = trx->error_state;
+
+		if (err != DB_SUCCESS) {
+			char		index_name[MAX_FULL_NAME_LEN + 1];
+
+			innobase_format_name(
+				index_name, sizeof(index_name),
+				index->name, TRUE);
+
+			ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+				ER_INTERNAL_ERROR,
+				"While updating the <space, root page "
+				"number> of index %s - %s",
+				index_name, ut_strerr(err));
+
+			break;
+		}
+	}
+
+	que_graph_free(graph);
+
+	if (!dict_locked) {
+		mutex_exit(&dict_sys->mutex);
+	}
+
+	return(err);
+}
+
+/** Callback arg for row_import_set_discarded. */
+struct discard_t {
+	ib_uint32_t	flags2;			/*!< Value read from column */
+	bool		state;			/*!< New state of the flag */
+	ulint		n_recs;			/*!< Number of recs processed */
+};
+
+/******************************************************************//**
+Fetch callback that sets or unsets the DISCARDED tablespace flag in
+SYS_TABLES. The flags is stored in MIX_LEN column.
+@return FALSE if all OK */
+static
+ibool
+row_import_set_discarded(
+/*=====================*/
+	void*		row,			/*!< in: sel_node_t* */
+	void*		user_arg)		/*!< in: bool set/unset flag */
+{
+	sel_node_t*	node = static_cast<sel_node_t*>(row);
+	discard_t*	discard = static_cast<discard_t*>(user_arg);
+	dfield_t*	dfield = que_node_get_val(node->select_list);
+	dtype_t*	type = dfield_get_type(dfield);
+	ulint		len = dfield_get_len(dfield);
+
+	ut_a(dtype_get_mtype(type) == DATA_INT);
+	ut_a(len == sizeof(ib_uint32_t));
+
+	ulint	flags2 = mach_read_from_4(
+		static_cast<byte*>(dfield_get_data(dfield)));
+
+	if (discard->state) {
+		flags2 |= DICT_TF2_DISCARDED;
+	} else {
+		flags2 &= ~DICT_TF2_DISCARDED;
+	}
+
+	mach_write_to_4(reinterpret_cast<byte*>(&discard->flags2), flags2);
+
+	++discard->n_recs;
+
+	/* There should be at most one matching record. */
+	ut_a(discard->n_recs == 1);
+
+	return(FALSE);
+}
+
+/*****************************************************************//**
+Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
+@return DB_SUCCESS or error code. */
+UNIV_INTERN
+dberr_t
+row_import_update_discarded_flag(
+/*=============================*/
+	trx_t*		trx,		/*!< in/out: transaction that
+					covers the update */
+	table_id_t	table_id,	/*!< in: Table for which we want
+					to set the root table->flags2 */
+	bool		discarded,	/*!< in: set MIX_LEN column bit
+					to discarded, if true */
+	bool		dict_locked)	/*!< in: set to true if the
+					caller already owns the
+					dict_sys_t:: mutex. */
+
+{
+	pars_info_t*		info;
+	discard_t		discard;
+
+	static const char	sql[] =
+		"PROCEDURE UPDATE_DISCARDED_FLAG() IS\n"
+		"DECLARE FUNCTION my_func;\n"
+		"DECLARE CURSOR c IS\n"
+		" SELECT MIX_LEN "
+		" FROM SYS_TABLES "
+		" WHERE ID = :table_id FOR UPDATE;"
+		"\n"
+		"BEGIN\n"
+		"OPEN c;\n"
+		"WHILE 1 = 1 LOOP\n"
+		"  FETCH c INTO my_func();\n"
+		"  IF c % NOTFOUND THEN\n"
+		"    EXIT;\n"
+		"  END IF;\n"
+		"END LOOP;\n"
+		"UPDATE SYS_TABLES"
+		" SET MIX_LEN = :flags2"
+		" WHERE ID = :table_id;\n"
+		"CLOSE c;\n"
+		"END;\n";
+
+	discard.n_recs = 0;
+	discard.state = discarded;
+	discard.flags2 = ULINT32_UNDEFINED;
+
+	info = pars_info_create();
+
+	pars_info_add_ull_literal(info, "table_id", table_id);
+	pars_info_bind_int4_literal(info, "flags2", &discard.flags2);
+
+	pars_info_bind_function(
+		info, "my_func", row_import_set_discarded, &discard);
+
+	dberr_t	err = que_eval_sql(info, sql, !dict_locked, trx);
+
+	ut_a(discard.n_recs == 1);
+	ut_a(discard.flags2 != ULINT32_UNDEFINED);
+
+	return(err);
+}
+
+/*****************************************************************//**
+Imports a tablespace. The space id in the .ibd file must match the space id
+of the table in the data dictionary.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_import_for_mysql(
+/*=================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL */
+{
+	dberr_t		err;
+	trx_t*		trx;
+	ib_uint64_t	autoinc = 0;
+	char		table_name[MAX_FULL_NAME_LEN + 1];
+	char*		filepath = NULL;
+
+	ut_ad(!srv_read_only_mode);
+
+	innobase_format_name(
+		table_name, sizeof(table_name), table->name, FALSE);
+
+	ut_a(table->space);
+	ut_ad(prebuilt->trx);
+	ut_a(table->ibd_file_missing);
+
+	trx_start_if_not_started(prebuilt->trx);
+
+	trx = trx_allocate_for_mysql();
+
+	/* So that the table is not DROPped during recovery. */
+	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+
+	trx_start_if_not_started(trx);
+
+	/* So that we can send error messages to the user. */
+	trx->mysql_thd = prebuilt->trx->mysql_thd;
+
+	/* Ensure that the table will be dropped by trx_rollback_active()
+	in case of a crash. */
+
+	trx->table_id = table->id;
+
+	/* Assign an undo segment for the transaction, so that the
+	transaction will be recovered after a crash. */
+
+	mutex_enter(&trx->undo_mutex);
+
+	err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
+
+	mutex_exit(&trx->undo_mutex);
+
+	DBUG_EXECUTE_IF("ib_import_undo_assign_failure",
+			err = DB_TOO_MANY_CONCURRENT_TRXS;);
+
+	if (err != DB_SUCCESS) {
+
+		return(row_import_cleanup(prebuilt, trx, err));
+
+	} else if (trx->update_undo == 0) {
+
+		err = DB_TOO_MANY_CONCURRENT_TRXS;
+		return(row_import_cleanup(prebuilt, trx, err));
+	}
+
+	prebuilt->trx->op_info = "read meta-data file";
+
+	/* Prevent DDL operations while we are checking. */
+
+	rw_lock_s_lock_func(&dict_operation_lock, 0, __FILE__, __LINE__);
+
+	row_import	cfg;
+
+	memset(&cfg, 0x0, sizeof(cfg));
+
+	err = row_import_read_cfg(table, trx->mysql_thd, cfg);
+
+	/* Check if the table column definitions match the contents
+	of the config file. */
+
+	if (err == DB_SUCCESS) {
+
+		/* We have a schema file, try and match it with the our
+		data dictionary. */
+
+		err = cfg.match_schema(trx->mysql_thd);
+
+		/* Update index->page and SYS_INDEXES.PAGE_NO to match the
+		B-tree root page numbers in the tablespace. Use the index
+		name from the .cfg file to find match. */
+
+		if (err == DB_SUCCESS) {
+			cfg.set_root_by_name();
+			autoinc = cfg.m_autoinc;
+		}
+
+		rw_lock_s_unlock_gen(&dict_operation_lock, 0);
+
+		DBUG_EXECUTE_IF("ib_import_set_index_root_failure",
+				err = DB_TOO_MANY_CONCURRENT_TRXS;);
+
+	} else if (cfg.m_missing) {
+
+		rw_lock_s_unlock_gen(&dict_operation_lock, 0);
+
+		/* We don't have a schema file, we will have to discover
+		the index root pages from the .ibd file and skip the schema
+		matching step. */
+
+		ut_a(err == DB_FAIL);
+
+		cfg.m_page_size = UNIV_PAGE_SIZE;
+
+		FetchIndexRootPages	fetchIndexRootPages(table, trx);
+
+		err = fil_tablespace_iterate(
+			table, IO_BUFFER_SIZE(cfg.m_page_size),
+			fetchIndexRootPages);
+
+		if (err == DB_SUCCESS) {
+
+			err = fetchIndexRootPages.build_row_import(&cfg);
+
+			/* Update index->page and SYS_INDEXES.PAGE_NO
+			to match the B-tree root page numbers in the
+			tablespace. */
+
+			if (err == DB_SUCCESS) {
+				err = cfg.set_root_by_heuristic();
+			}
+		}
+
+	} else {
+		rw_lock_s_unlock_gen(&dict_operation_lock, 0);
+	}
+
+	if (err != DB_SUCCESS) {
+		return(row_import_error(prebuilt, trx, err));
+	}
+
+	prebuilt->trx->op_info = "importing tablespace";
+
+	ib_logf(IB_LOG_LEVEL_INFO, "Phase I - Update all pages");
+
+	/* Iterate over all the pages and do the sanity checking and
+	the conversion required to import the tablespace. */
+
+	PageConverter	converter(&cfg, trx);
+
+	/* Set the IO buffer size in pages. */
+
+	err = fil_tablespace_iterate(
+		table, IO_BUFFER_SIZE(cfg.m_page_size), converter);
+
+	DBUG_EXECUTE_IF("ib_import_reset_space_and_lsn_failure",
+			err = DB_TOO_MANY_CONCURRENT_TRXS;);
+
+	if (err != DB_SUCCESS) {
+		char	table_name[MAX_FULL_NAME_LEN + 1];
+
+		innobase_format_name(
+			table_name, sizeof(table_name), table->name, FALSE);
+
+		ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_INTERNAL_ERROR,
+			"Cannot reset LSNs in table '%s' : %s",
+			table_name, ut_strerr(err));
+
+		return(row_import_cleanup(prebuilt, trx, err));
+	}
+
+	row_mysql_lock_data_dictionary(trx);
+
+	/* If the table is stored in a remote tablespace, we need to
+	determine that filepath from the link file and system tables.
+	Find the space ID in SYS_TABLES since this is an ALTER TABLE. */
+	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+		dict_get_and_save_data_dir_path(table, true);
+		ut_a(table->data_dir_path);
+
+		filepath = os_file_make_remote_pathname(
+			table->data_dir_path, table->name, "ibd");
+	} else {
+		filepath = fil_make_ibd_name(table->name, false);
+	}
+	ut_a(filepath);
+
+	/* Open the tablespace so that we can access via the buffer pool.
+	We set the 2nd param (fix_dict = true) here because we already
+	have an x-lock on dict_operation_lock and dict_sys->mutex. */
+
+	err = fil_open_single_table_tablespace(
+		true, true, table->space,
+		dict_tf_to_fsp_flags(table->flags),
+		table->name, filepath);
+
+	DBUG_EXECUTE_IF("ib_import_open_tablespace_failure",
+			err = DB_TABLESPACE_NOT_FOUND;);
+
+	if (err != DB_SUCCESS) {
+		row_mysql_unlock_data_dictionary(trx);
+
+		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_FILE_NOT_FOUND,
+			filepath, err, ut_strerr(err));
+
+		mem_free(filepath);
+
+		return(row_import_cleanup(prebuilt, trx, err));
+	}
+
+	row_mysql_unlock_data_dictionary(trx);
+
+	mem_free(filepath);
+
+	err = ibuf_check_bitmap_on_import(trx, table->space);
+
+	DBUG_EXECUTE_IF("ib_import_check_bitmap_failure", err = DB_CORRUPTION;);
+
+	if (err != DB_SUCCESS) {
+		return(row_import_cleanup(prebuilt, trx, err));
+	}
+
+	/* The first index must always be the clustered index. */
+
+	dict_index_t*	index = dict_table_get_first_index(table);
+
+	if (!dict_index_is_clust(index)) {
+		return(row_import_error(prebuilt, trx, DB_CORRUPTION));
+	}
+
+	/* Update the Btree segment headers for index node and
+	leaf nodes in the root page. Set the new space id. */
+
+	err = btr_root_adjust_on_import(index);
+
+	DBUG_EXECUTE_IF("ib_import_cluster_root_adjust_failure",
+			err = DB_CORRUPTION;);
+
+	if (err != DB_SUCCESS) {
+		return(row_import_error(prebuilt, trx, err));
+	}
+
+	if (err != DB_SUCCESS) {
+		return(row_import_error(prebuilt, trx, err));
+	} else if (cfg.requires_purge(index->name)) {
+
+		/* Purge any delete-marked records that couldn't be
+		purged during the page conversion phase from the
+		cluster index. */
+
+		IndexPurge	purge(trx, index);
+
+		trx->op_info = "cluster: purging delete marked records";
+
+		err = purge.garbage_collect();
+
+		trx->op_info = "";
+	}
+
+	DBUG_EXECUTE_IF("ib_import_cluster_failure", err = DB_CORRUPTION;);
+
+	if (err != DB_SUCCESS) {
+		return(row_import_error(prebuilt, trx, err));
+	}
+
+	/* For secondary indexes, purge any records that couldn't be purged
+	during the page conversion phase. */
+
+	err = row_import_adjust_root_pages_of_secondary_indexes(
+		prebuilt, trx, table, cfg);
+
+	DBUG_EXECUTE_IF("ib_import_sec_root_adjust_failure",
+			err = DB_CORRUPTION;);
+
+	if (err != DB_SUCCESS) {
+		return(row_import_error(prebuilt, trx, err));
+	}
+
+	/* Ensure that the next available DB_ROW_ID is not smaller than
+	any DB_ROW_ID stored in the table. */
+
+	if (prebuilt->clust_index_was_generated) {
+
+		err = row_import_set_sys_max_row_id(prebuilt, table);
+
+		if (err != DB_SUCCESS) {
+			return(row_import_error(prebuilt, trx, err));
+		}
+	}
+
+	ib_logf(IB_LOG_LEVEL_INFO, "Phase III - Flush changes to disk");
+
+	/* Ensure that all pages dirtied during the IMPORT make it to disk.
+	The only dirty pages generated should be from the pessimistic purge
+	of delete marked records that couldn't be purged in Phase I. */
+
+	buf_LRU_flush_or_remove_pages(
+		prebuilt->table->space, BUF_REMOVE_FLUSH_WRITE, trx);
+
+	if (trx_is_interrupted(trx)) {
+		ib_logf(IB_LOG_LEVEL_INFO, "Phase III - Flush interrupted");
+		return(row_import_error(prebuilt, trx, DB_INTERRUPTED));
+	} else {
+		ib_logf(IB_LOG_LEVEL_INFO, "Phase IV - Flush complete");
+	}
+
+	/* The dictionary latches will be released in in row_import_cleanup()
+	after the transaction commit, for both success and error. */
+
+	row_mysql_lock_data_dictionary(trx);
+
+	/* Update the root pages of the table's indexes. */
+	err = row_import_update_index_root(trx, table, false, true);
+
+	if (err != DB_SUCCESS) {
+		return(row_import_error(prebuilt, trx, err));
+	}
+
+	/* Update the table's discarded flag, unset it. */
+	err = row_import_update_discarded_flag(trx, table->id, false, true);
+
+	if (err != DB_SUCCESS) {
+		return(row_import_error(prebuilt, trx, err));
+	}
+
+	table->ibd_file_missing = false;
+	table->flags2 &= ~DICT_TF2_DISCARDED;
+
+	if (autoinc != 0) {
+		char	table_name[MAX_FULL_NAME_LEN + 1];
+
+		innobase_format_name(
+			table_name, sizeof(table_name), table->name, FALSE);
+
+		ib_logf(IB_LOG_LEVEL_INFO, "%s autoinc value set to " IB_ID_FMT,
+			table_name, autoinc);
+
+		dict_table_autoinc_lock(table);
+		dict_table_autoinc_initialize(table, autoinc);
+		dict_table_autoinc_unlock(table);
+	}
+
+	ut_a(err == DB_SUCCESS);
+
+	return(row_import_cleanup(prebuilt, trx, err));
+}
+
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index e79518e24de..c1c27152831 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -23,11 +23,8 @@ Insert into a table
 Created 4/20/1996 Heikki Tuuri
 *******************************************************/
 
-#include "m_string.h" /* for my_sys.h */
 #include "row0ins.h"
 
-#define DEBUG_SYNC_C_IF_THD(A,B) DEBUG_SYNC(A,B)
-
 #ifdef UNIV_NONINL
 #include "row0ins.ic"
 #endif
@@ -35,6 +32,7 @@ Created 4/20/1996 Heikki Tuuri
 #include "ha_prototypes.h"
 #include "dict0dict.h"
 #include "dict0boot.h"
+#include "trx0rec.h"
 #include "trx0undo.h"
 #include "btr0btr.h"
 #include "btr0cur.h"
@@ -43,6 +41,7 @@ Created 4/20/1996 Heikki Tuuri
 #include "row0upd.h"
 #include "row0sel.h"
 #include "row0row.h"
+#include "row0log.h"
 #include "rem0cmp.h"
 #include "lock0lock.h"
 #include "log0log.h"
@@ -52,6 +51,7 @@ Created 4/20/1996 Heikki Tuuri
 #include "buf0lru.h"
 #include "fts0fts.h"
 #include "fts0types.h"
+#include "m_string.h"
 
 /*************************************************************************
 IMPORTANT NOTE: Any operation that generates redo MUST check that there
@@ -101,7 +101,7 @@ ins_node_create(
 
 /***********************************************************//**
 Creates an entry template for each index of a table. */
-UNIV_INTERN
+static
 void
 ins_node_create_entry_list(
 /*=======================*/
@@ -222,68 +222,92 @@ Does an insert operation by updating a delete-marked existing record
 in the index. This situation can occur if the delete-marked record is
 kept in the index for consistent reads.
 @return	DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_ins_sec_index_entry_by_modify(
 /*==============================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
 	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
 				depending on whether mtr holds just a leaf
 				latch or also a tree latch */
 	btr_cur_t*	cursor,	/*!< in: B-tree cursor */
+	ulint**		offsets,/*!< in/out: offsets on cursor->page_cur.rec */
+	mem_heap_t*	offsets_heap,
+				/*!< in/out: memory heap that can be emptied */
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
 	const dtuple_t*	entry,	/*!< in: index entry to insert */
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr)	/*!< in: mtr; must be committed before
 				latching any further pages */
 {
 	big_rec_t*	dummy_big_rec;
-	mem_heap_t*	heap;
 	upd_t*		update;
 	rec_t*		rec;
-	ulint		err;
+	dberr_t		err;
 
 	rec = btr_cur_get_rec(cursor);
 
 	ut_ad(!dict_index_is_clust(cursor->index));
-	ut_ad(rec_get_deleted_flag(rec,
-				   dict_table_is_comp(cursor->index->table)));
+	ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
+	ut_ad(!entry->info_bits);
 
 	/* We know that in the alphabetical ordering, entry and rec are
 	identified. But in their binary form there may be differences if
 	there are char fields in them. Therefore we have to calculate the
 	difference. */
 
-	heap = mem_heap_create(1024);
-
 	update = row_upd_build_sec_rec_difference_binary(
-		cursor->index, entry, rec, thr_get_trx(thr), heap);
+		rec, cursor->index, *offsets, entry, heap);
+
+	if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
+		/* We should never insert in place of a record that
+		has not been delete-marked. The only exception is when
+		online CREATE INDEX copied the changes that we already
+		made to the clustered index, and completed the
+		secondary index creation before we got here. In this
+		case, the change would already be there. The CREATE
+		INDEX should be waiting for a MySQL meta-data lock
+		upgrade at least until this INSERT or UPDATE
+		returns. After that point, the TEMP_INDEX_PREFIX
+		would be dropped from the index name in
+		commit_inplace_alter_table(). */
+		ut_a(update->n_fields == 0);
+		ut_a(*cursor->index->name == TEMP_INDEX_PREFIX);
+		ut_ad(!dict_index_is_online_ddl(cursor->index));
+		return(DB_SUCCESS);
+	}
+
 	if (mode == BTR_MODIFY_LEAF) {
 		/* Try an optimistic updating of the record, keeping changes
 		within the page */
 
-		err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG, cursor,
-						update, 0, thr, mtr);
+		/* TODO: pass only *offsets */
+		err = btr_cur_optimistic_update(
+			flags | BTR_KEEP_SYS_FLAG, cursor,
+			offsets, &offsets_heap, update, 0, thr,
+			thr_get_trx(thr)->id, mtr);
 		switch (err) {
 		case DB_OVERFLOW:
 		case DB_UNDERFLOW:
 		case DB_ZIP_OVERFLOW:
 			err = DB_FAIL;
+		default:
+			break;
 		}
 	} else {
 		ut_a(mode == BTR_MODIFY_TREE);
 		if (buf_LRU_buf_pool_running_out()) {
 
-			err = DB_LOCK_TABLE_FULL;
-
-			goto func_exit;
+			return(DB_LOCK_TABLE_FULL);
 		}
 
-		err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor,
-						 &heap, &dummy_big_rec, update,
-						 0, thr, mtr);
+		err = btr_cur_pessimistic_update(
+			flags | BTR_KEEP_SYS_FLAG, cursor,
+			offsets, &offsets_heap,
+			heap, &dummy_big_rec, update, 0,
+			thr, thr_get_trx(thr)->id, mtr);
 		ut_ad(!dummy_big_rec);
 	}
-func_exit:
-	mem_heap_free(heap);
 
 	return(err);
 }
@@ -293,15 +317,20 @@ Does an insert operation by delete unmarking and updating a delete marked
 existing record in the index. This situation can occur if the delete marked
 record is kept in the index for consistent reads.
 @return	DB_SUCCESS, DB_FAIL, or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_ins_clust_index_entry_by_modify(
 /*================================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
 	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
 				depending on whether mtr holds just a leaf
 				latch or also a tree latch */
 	btr_cur_t*	cursor,	/*!< in: B-tree cursor */
-	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
+	ulint**		offsets,/*!< out: offsets on cursor->page_cur.rec */
+	mem_heap_t**	offsets_heap,
+				/*!< in/out: pointer to memory heap that can
+				be emptied, or NULL */
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
 	big_rec_t**	big_rec,/*!< out: possible big rec vector of fields
 				which have to be stored externally by the
 				caller */
@@ -310,9 +339,9 @@ row_ins_clust_index_entry_by_modify(
 	mtr_t*		mtr)	/*!< in: mtr; must be committed before
 				latching any further pages */
 {
-	rec_t*		rec;
-	upd_t*		update;
-	ulint		err;
+	const rec_t*	rec;
+	const upd_t*	update;
+	dberr_t		err;
 
 	ut_ad(dict_index_is_clust(cursor->index));
 
@@ -323,38 +352,40 @@ row_ins_clust_index_entry_by_modify(
 	ut_ad(rec_get_deleted_flag(rec,
 				   dict_table_is_comp(cursor->index->table)));
 
-	if (!*heap) {
-		*heap = mem_heap_create(1024);
-	}
-
 	/* Build an update vector containing all the fields to be modified;
 	NOTE that this vector may NOT contain system columns trx_id or
 	roll_ptr */
 
-	update = row_upd_build_difference_binary(cursor->index, entry, rec,
-						 thr_get_trx(thr), *heap);
-	if (mode == BTR_MODIFY_LEAF) {
+	update = row_upd_build_difference_binary(
+		cursor->index, entry, rec, NULL, true,
+		thr_get_trx(thr), heap);
+	if (mode != BTR_MODIFY_TREE) {
+		ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF);
+
 		/* Try optimistic updating of the record, keeping changes
 		within the page */
 
-		err = btr_cur_optimistic_update(0, cursor, update, 0, thr,
-						mtr);
+		err = btr_cur_optimistic_update(
+			flags, cursor, offsets, offsets_heap, update, 0, thr,
+			thr_get_trx(thr)->id, mtr);
 		switch (err) {
 		case DB_OVERFLOW:
 		case DB_UNDERFLOW:
 		case DB_ZIP_OVERFLOW:
 			err = DB_FAIL;
+		default:
+			break;
 		}
 	} else {
-		ut_a(mode == BTR_MODIFY_TREE);
 		if (buf_LRU_buf_pool_running_out()) {
 
 			return(DB_LOCK_TABLE_FULL);
 
 		}
 		err = btr_cur_pessimistic_update(
-			BTR_KEEP_POS_FLAG, cursor, heap, big_rec, update,
-			0, thr, mtr);
+			flags | BTR_KEEP_POS_FLAG,
+			cursor, offsets, offsets_heap, heap,
+			big_rec, update, 0, thr, thr_get_trx(thr)->id, mtr);
 	}
 
 	return(err);
@@ -394,7 +425,7 @@ row_ins_cascade_ancestor_updates_table(
 Returns the number of ancestor UPDATE or DELETE nodes of a
 cascaded update/delete node.
 @return	number of ancestors */
-static
+static __attribute__((nonnull, warn_unused_result))
 ulint
 row_ins_cascade_n_ancestors(
 /*========================*/
@@ -420,7 +451,7 @@ a cascaded update.
 can also be 0 if no foreign key fields changed; the returned value is
 ULINT_UNDEFINED if the column type in the child table is too short to
 fit the new value in the parent table: that means the update fails */
-static
+static __attribute__((nonnull, warn_unused_result))
 ulint
 row_ins_cascade_calc_update_vec(
 /*============================*/
@@ -691,6 +722,8 @@ row_ins_set_detailed(
 	trx_t*		trx,		/*!< in: transaction */
 	dict_foreign_t*	foreign)	/*!< in: foreign key constraint */
 {
+	ut_ad(!srv_read_only_mode);
+
 	mutex_enter(&srv_misc_tmpfile_mutex);
 	rewind(srv_misc_tmpfile);
 
@@ -717,13 +750,17 @@ row_ins_foreign_trx_print(
 /*======================*/
 	trx_t*	trx)	/*!< in: transaction */
 {
-	ulint	n_lock_rec;
-	ulint	n_lock_struct;
+	ulint	n_rec_locks;
+	ulint	n_trx_locks;
 	ulint	heap_size;
 
+	if (srv_read_only_mode) {
+		return;
+	}
+
 	lock_mutex_enter();
-	n_lock_rec = lock_number_of_rows_locked(&trx->lock);
-	n_lock_struct = UT_LIST_GET_LEN(trx->lock.trx_locks);
+	n_rec_locks = lock_number_of_rows_locked(&trx->lock);
+	n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
 	heap_size = mem_heap_get_size(trx->lock.lock_heap);
 	lock_mutex_exit();
 
@@ -735,7 +772,7 @@ row_ins_foreign_trx_print(
 	fputs(" Transaction:\n", dict_foreign_err_file);
 
 	trx_print_low(dict_foreign_err_file, trx, 600,
-		      n_lock_rec, n_lock_struct, heap_size);
+		      n_rec_locks, n_trx_locks, heap_size);
 
 	mutex_exit(&trx_sys->mutex);
 
@@ -759,6 +796,10 @@ row_ins_foreign_report_err(
 	const dtuple_t*	entry)		/*!< in: index entry in the parent
 					table */
 {
+	if (srv_read_only_mode) {
+		return;
+	}
+
 	FILE*	ef	= dict_foreign_err_file;
 	trx_t*	trx	= thr_get_trx(thr);
 
@@ -810,6 +851,10 @@ row_ins_foreign_report_add_err(
 	const dtuple_t*	entry)		/*!< in: index entry to insert in the
 					child table */
 {
+	if (srv_read_only_mode) {
+		return;
+	}
+
 	FILE*	ef	= dict_foreign_err_file;
 
 	row_ins_set_detailed(trx, foreign);
@@ -879,8 +924,8 @@ Perform referential actions or checks when a parent row is deleted or updated
 and the constraint had an ON DELETE or ON UPDATE condition which was not
 RESTRICT.
 @return	DB_SUCCESS, DB_LOCK_WAIT, or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_ins_foreign_check_on_constraint(
 /*================================*/
 	que_thr_t*	thr,		/*!< in: query thread whose run_node
@@ -906,7 +951,7 @@ row_ins_foreign_check_on_constraint(
 	const buf_block_t* clust_block;
 	upd_t*		update;
 	ulint		n_to_update;
-	ulint		err;
+	dberr_t		err;
 	ulint		i;
 	trx_t*		trx;
 	mem_heap_t*	tmp_heap	= NULL;
@@ -1242,6 +1287,9 @@ row_ins_foreign_check_on_constraint(
 	release the latch. */
 
 	row_mysql_unfreeze_data_dictionary(thr_get_trx(thr));
+
+	DEBUG_SYNC_C("innodb_dml_cascade_dict_unfreeze");
+
 	row_mysql_freeze_data_dictionary(thr_get_trx(thr));
 
 	mtr_start(mtr);
@@ -1284,7 +1332,7 @@ Sets a shared lock on a record. Used in locking possible duplicate key
 records and also in checking foreign key constraints.
 @return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
 static
-enum db_err
+dberr_t
 row_ins_set_shared_rec_lock(
 /*========================*/
 	ulint			type,	/*!< in: LOCK_ORDINARY, LOCK_GAP, or
@@ -1295,7 +1343,7 @@ row_ins_set_shared_rec_lock(
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
 	que_thr_t*		thr)	/*!< in: query thread */
 {
-	enum db_err	err;
+	dberr_t	err;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
 
@@ -1315,7 +1363,7 @@ Sets a exclusive lock on a record. Used in locking possible duplicate key
 records
 @return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
 static
-enum db_err
+dberr_t
 row_ins_set_exclusive_rec_lock(
 /*===========================*/
 	ulint			type,	/*!< in: LOCK_ORDINARY, LOCK_GAP, or
@@ -1326,7 +1374,7 @@ row_ins_set_exclusive_rec_lock(
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
 	que_thr_t*		thr)	/*!< in: query thread */
 {
-	enum db_err	err;
+	dberr_t	err;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
 
@@ -1347,7 +1395,7 @@ which lock either the success or the failure of the constraint. NOTE that
 the caller must have a shared latch on dict_operation_lock.
 @return	DB_SUCCESS, DB_NO_REFERENCED_ROW, or DB_ROW_IS_REFERENCED */
 UNIV_INTERN
-ulint
+dberr_t
 row_ins_check_foreign_constraint(
 /*=============================*/
 	ibool		check_ref,/*!< in: TRUE if we want to check that
@@ -1361,7 +1409,7 @@ row_ins_check_foreign_constraint(
 	dtuple_t*	entry,	/*!< in: index entry for index */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
-	ulint		err;
+	dberr_t		err;
 	upd_node_t*	upd_node;
 	dict_table_t*	check_table;
 	dict_index_t*	check_index;
@@ -1433,9 +1481,11 @@ run_again:
 		check_index = foreign->foreign_index;
 	}
 
-	if (check_table == NULL || check_table->ibd_file_missing
+	if (check_table == NULL
+	    || check_table->ibd_file_missing
 	    || check_index == NULL) {
-		if (check_ref) {
+
+		if (!srv_read_only_mode && check_ref) {
 			FILE*	ef = dict_foreign_err_file;
 
 			row_ins_set_detailed(trx, foreign);
@@ -1611,6 +1661,8 @@ run_again:
 				} else {
 					err = DB_SUCCESS;
 				}
+			default:
+				break;
 			}
 
 			goto end_scan;
@@ -1635,18 +1687,43 @@ end_scan:
 
 do_possible_lock_wait:
 	if (err == DB_LOCK_WAIT) {
-		trx->error_state = static_cast<enum db_err>(err);
+		bool		verified = false;
+
+		trx->error_state = err;
 
 		que_thr_stop_for_mysql(thr);
 
 		lock_wait_suspend_thread(thr);
 
-		if (trx->error_state == DB_SUCCESS) {
+		if (check_table->to_be_dropped) {
+			/* The table is being dropped. We shall timeout
+			this operation */
+			err = DB_LOCK_WAIT_TIMEOUT;
+			goto exit_func;
+		}
 
-			goto run_again;
+		/* We had temporarily released dict_operation_lock in
+		above lock sleep wait, now we have the lock again, and
+		we will need to re-check whether the foreign key has been
+		dropped */
+		for (const dict_foreign_t* check_foreign = UT_LIST_GET_FIRST(
+			table->referenced_list);
+		     check_foreign;
+		     check_foreign = UT_LIST_GET_NEXT(
+                                referenced_list, check_foreign)) {
+			if (check_foreign == foreign) {
+				verified = true;
+				break;
+			}
 		}
 
-		err = trx->error_state;
+		if (!verified) {
+			err = DB_DICT_CHANGED;
+		} else if (trx->error_state == DB_SUCCESS) {
+			goto run_again;
+		} else {
+			err = trx->error_state;
+		}
 	}
 
 exit_func:
@@ -1663,8 +1740,8 @@ Otherwise does searches to the indexes of referenced tables and
 sets shared locks which lock either the success or the failure of
 a constraint.
 @return	DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_ins_check_foreign_constraints(
 /*==============================*/
 	dict_table_t*	table,	/*!< in: table */
@@ -1673,7 +1750,7 @@ row_ins_check_foreign_constraints(
 	que_thr_t*	thr)	/*!< in: query thread */
 {
 	dict_foreign_t*	foreign;
-	ulint		err;
+	dberr_t		err;
 	trx_t*		trx;
 	ibool		got_s_lock	= FALSE;
 
@@ -1681,14 +1758,21 @@ row_ins_check_foreign_constraints(
 
 	foreign = UT_LIST_GET_FIRST(table->foreign_list);
 
+	DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+			    "foreign_constraint_check_for_ins");
+
 	while (foreign) {
 		if (foreign->foreign_index == index) {
 			dict_table_t*	ref_table = NULL;
+			dict_table_t*	foreign_table = foreign->foreign_table;
+			dict_table_t*	referenced_table
+						= foreign->referenced_table;
 
-			if (foreign->referenced_table == NULL) {
+			if (referenced_table == NULL) {
 
 				ref_table = dict_table_open_on_name(
-					foreign->referenced_table_name_lookup, FALSE);
+					foreign->referenced_table_name_lookup,
+					FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 			}
 
 			if (0 == trx->dict_operation_lock_mode) {
@@ -1697,9 +1781,9 @@ row_ins_check_foreign_constraints(
 				row_mysql_freeze_data_dictionary(trx);
 			}
 
-			if (foreign->referenced_table) {
+			if (referenced_table) {
 				os_inc_counter(dict_sys->mutex,
-					       foreign->foreign_table
+					       foreign_table
 					       ->n_foreign_key_checks_running);
 			}
 
@@ -1711,9 +1795,12 @@ row_ins_check_foreign_constraints(
 			err = row_ins_check_foreign_constraint(
 				TRUE, foreign, table, entry, thr);
 
-			if (foreign->referenced_table) {
+			DBUG_EXECUTE_IF("row_ins_dict_change_err",
+					err = DB_DICT_CHANGED;);
+
+			if (referenced_table) {
 				os_dec_counter(dict_sys->mutex,
-					       foreign->foreign_table
+					       foreign_table
 					       ->n_foreign_key_checks_running);
 			}
 
@@ -1722,7 +1809,7 @@ row_ins_check_foreign_constraints(
 			}
 
 			if (ref_table != NULL) {
-				dict_table_close(ref_table, FALSE);
+				dict_table_close(ref_table, FALSE, FALSE);
 			}
 
 			if (err != DB_SUCCESS) {
@@ -1778,8 +1865,7 @@ row_ins_dupl_error_with_rec(
 	if (!dict_index_is_clust(index)) {
 
 		for (i = 0; i < n_unique; i++) {
-			if (UNIV_SQL_NULL == dfield_get_len(
-				    dtuple_get_nth_field(entry, i))) {
+			if (dfield_is_null(dtuple_get_nth_field(entry, i))) {
 
 				return(FALSE);
 			}
@@ -1794,26 +1880,30 @@ Scans a unique non-clustered index at a given index entry to determine
 whether a uniqueness violation has occurred for the key value of the entry.
 Set shared locks on possible duplicate records.
 @return	DB_SUCCESS, DB_DUPLICATE_KEY, or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_ins_scan_sec_index_for_duplicate(
 /*=================================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
 	dict_index_t*	index,	/*!< in: non-clustered unique index */
 	dtuple_t*	entry,	/*!< in: index entry */
-	que_thr_t*	thr)	/*!< in: query thread */
+	que_thr_t*	thr,	/*!< in: query thread */
+	bool		s_latch,/*!< in: whether index->lock is being held */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	mem_heap_t*	offsets_heap)
+				/*!< in/out: memory heap that can be emptied */
 {
 	ulint		n_unique;
-	ulint		i;
 	int		cmp;
 	ulint		n_fields_cmp;
 	btr_pcur_t	pcur;
-	ulint		err		= DB_SUCCESS;
+	dberr_t		err		= DB_SUCCESS;
 	ulint		allow_duplicates;
-	mtr_t		mtr;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	rec_offs_init(offsets_);
+	ulint*		offsets		= NULL;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(s_latch == rw_lock_own(&index->lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
 
 	n_unique = dict_index_get_n_unique(index);
 
@@ -1821,7 +1911,7 @@ row_ins_scan_sec_index_for_duplicate(
 	n_unique first fields is NULL, a unique key violation cannot occur,
 	since we define NULL != NULL in this case */
 
-	for (i = 0; i < n_unique; i++) {
+	for (ulint i = 0; i < n_unique; i++) {
 		if (UNIV_SQL_NULL == dfield_get_len(
 			    dtuple_get_nth_field(entry, i))) {
 
@@ -1829,15 +1919,17 @@ row_ins_scan_sec_index_for_duplicate(
 		}
 	}
 
-	mtr_start(&mtr);
-
 	/* Store old value on n_fields_cmp */
 
 	n_fields_cmp = dtuple_get_n_fields_cmp(entry);
 
-	dtuple_set_n_fields_cmp(entry, dict_index_get_n_unique(index));
+	dtuple_set_n_fields_cmp(entry, n_unique);
 
-	btr_pcur_open(index, entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr);
+	btr_pcur_open(index, entry, PAGE_CUR_GE,
+		      s_latch
+		      ? BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED
+		      : BTR_SEARCH_LEAF,
+		      &pcur, mtr);
 
 	allow_duplicates = thr_get_trx(thr)->duplicates;
 
@@ -1853,9 +1945,12 @@ row_ins_scan_sec_index_for_duplicate(
 		}
 
 		offsets = rec_get_offsets(rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
+					  ULINT_UNDEFINED, &offsets_heap);
 
-		if (allow_duplicates) {
+		if (flags & BTR_NO_LOCKING_FLAG) {
+			/* Set no locks when applying log
+			in online table rebuild. */
+		} else if (allow_duplicates) {
 
 			/* If the SQL-query will update or replace
 			duplicate key we will take X-lock for
@@ -1901,37 +1996,115 @@ row_ins_scan_sec_index_for_duplicate(
 			ut_a(cmp < 0);
 			goto end_scan;
 		}
-	} while (btr_pcur_move_to_next(&pcur, &mtr));
+	} while (btr_pcur_move_to_next(&pcur, mtr));
 
 end_scan:
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-	mtr_commit(&mtr);
-
 	/* Restore old value */
 	dtuple_set_n_fields_cmp(entry, n_fields_cmp);
 
 	return(err);
 }
 
+/** Checks for a duplicate when the table is being rebuilt online.
+@retval DB_SUCCESS		when no duplicate is detected
+@retval DB_SUCCESS_LOCKED_REC	when rec is an exact match of entry or
+a newer version of entry (the entry should not be inserted)
+@retval DB_DUPLICATE_KEY	when entry is a duplicate of rec */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_ins_duplicate_online(
+/*=====================*/
+	ulint		n_uniq,	/*!< in: offset of DB_TRX_ID */
+	const dtuple_t*	entry,	/*!< in: entry that is being inserted */
+	const rec_t*	rec,	/*!< in: clustered index record */
+	ulint*		offsets)/*!< in/out: rec_get_offsets(rec) */
+{
+	ulint	fields	= 0;
+	ulint	bytes	= 0;
+
+	/* During rebuild, there should not be any delete-marked rows
+	in the new table. */
+	ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
+	ut_ad(dtuple_get_n_fields_cmp(entry) == n_uniq);
+
+	/* Compare the PRIMARY KEY fields and the
+	DB_TRX_ID, DB_ROLL_PTR. */
+	cmp_dtuple_rec_with_match_low(
+		entry, rec, offsets, n_uniq + 2, &fields, &bytes);
+
+	if (fields < n_uniq) {
+		/* Not a duplicate. */
+		return(DB_SUCCESS);
+	}
+
+	if (fields == n_uniq + 2) {
+		/* rec is an exact match of entry. */
+		ut_ad(bytes == 0);
+		return(DB_SUCCESS_LOCKED_REC);
+	}
+
+	return(DB_DUPLICATE_KEY);
+}
+
+/** Checks for a duplicate when the table is being rebuilt online.
+@retval DB_SUCCESS		when no duplicate is detected
+@retval DB_SUCCESS_LOCKED_REC	when rec is an exact match of entry or
+a newer version of entry (the entry should not be inserted)
+@retval DB_DUPLICATE_KEY	when entry is a duplicate of rec */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_ins_duplicate_error_in_clust_online(
+/*====================================*/
+	ulint		n_uniq,	/*!< in: offset of DB_TRX_ID */
+	const dtuple_t*	entry,	/*!< in: entry that is being inserted */
+	const btr_cur_t*cursor,	/*!< in: cursor on insert position */
+	ulint**		offsets,/*!< in/out: rec_get_offsets(rec) */
+	mem_heap_t**	heap)	/*!< in/out: heap for offsets */
+{
+	dberr_t		err	= DB_SUCCESS;
+	const rec_t*	rec	= btr_cur_get_rec(cursor);
+
+	if (cursor->low_match >= n_uniq && !page_rec_is_infimum(rec)) {
+		*offsets = rec_get_offsets(rec, cursor->index, *offsets,
+					   ULINT_UNDEFINED, heap);
+		err = row_ins_duplicate_online(n_uniq, entry, rec, *offsets);
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+	}
+
+	rec = page_rec_get_next_const(btr_cur_get_rec(cursor));
+
+	if (cursor->up_match >= n_uniq && !page_rec_is_supremum(rec)) {
+		*offsets = rec_get_offsets(rec, cursor->index, *offsets,
+					   ULINT_UNDEFINED, heap);
+		err = row_ins_duplicate_online(n_uniq, entry, rec, *offsets);
+	}
+
+	return(err);
+}
+
 /***************************************************************//**
 Checks if a unique key violation error would occur at an index entry
 insert. Sets shared locks on possible duplicate records. Works only
 for a clustered index!
-@return DB_SUCCESS if no error, DB_DUPLICATE_KEY if error,
-DB_LOCK_WAIT if we have to wait for a lock on a possible duplicate
-record */
-static
-ulint
+@retval DB_SUCCESS if no error
+@retval DB_DUPLICATE_KEY if error,
+@retval DB_LOCK_WAIT if we have to wait for a lock on a possible duplicate
+record
+@retval DB_SUCCESS_LOCKED_REC if an exact match of the record was found
+in online table rebuild (flags & (BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG)) */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_ins_duplicate_error_in_clust(
 /*=============================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
 	btr_cur_t*	cursor,	/*!< in: B-tree cursor */
 	const dtuple_t*	entry,	/*!< in: entry to insert */
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	ulint	err;
+	dberr_t	err;
 	rec_t*	rec;
 	ulint	n_unique;
 	trx_t*	trx		= thr_get_trx(thr);
@@ -1942,8 +2115,7 @@ row_ins_duplicate_error_in_clust(
 
 	UT_NOT_USED(mtr);
 
-	ut_a(dict_index_is_clust(cursor->index));
-	ut_ad(dict_index_is_unique(cursor->index));
+	ut_ad(dict_index_is_clust(cursor->index));
 
 	/* NOTE: For unique non-clustered indexes there may be any number
 	of delete marked records with the same value for the non-clustered
@@ -2002,6 +2174,7 @@ row_ins_duplicate_error_in_clust(
 
 			if (row_ins_dupl_error_with_rec(
 				    rec, entry, cursor->index, offsets)) {
+duplicate:
 				trx->error_info = cursor->index;
 				err = DB_DUPLICATE_KEY;
 				goto func_exit;
@@ -2046,14 +2219,12 @@ row_ins_duplicate_error_in_clust(
 
 			if (row_ins_dupl_error_with_rec(
 				    rec, entry, cursor->index, offsets)) {
-				trx->error_info = cursor->index;
-				err = DB_DUPLICATE_KEY;
-				goto func_exit;
+				goto duplicate;
 			}
 		}
 
-		ut_a(!dict_index_is_clust(cursor->index));
 		/* This should never happen */
+		ut_error;
 	}
 
 	err = DB_SUCCESS;
@@ -2081,12 +2252,12 @@ row_ins_must_modify_rec(
 /*====================*/
 	const btr_cur_t*	cursor)	/*!< in: B-tree cursor */
 {
-	/* NOTE: (compare to the note in row_ins_duplicate_error) Because node
-	pointers on upper levels of the B-tree may match more to entry than
-	to actual user records on the leaf level, we have to check if the
-	candidate record is actually a user record. In a clustered index
-	node pointers contain index->n_unique first fields, and in the case
-	of a secondary index, all fields of the index. */
+	/* NOTE: (compare to the note in row_ins_duplicate_error_in_clust)
+	Because node pointers on upper levels of the B-tree may match more
+	to entry than to actual user records on the leaf level, we
+	have to check if the candidate record is actually a user record.
+	A clustered index node pointer contains index->n_unique first fields,
+	and a secondary index node pointer contains all index fields. */
 
 	return(cursor->low_match
 	       >= dict_index_get_n_unique_in_tree(cursor->index)
@@ -2094,56 +2265,359 @@ row_ins_must_modify_rec(
 }
 
 /***************************************************************//**
-Tries to insert an index entry to an index. If the index is clustered
-and a record with the same unique key is found, the other record is
-necessarily marked deleted by a committed transaction, or a unique key
-violation error occurs. The delete marked record is then updated to an
-existing record, and we must write an undo log record on the delete
-marked record. If the index is secondary, and a record with exactly the
-same fields is found, the other record is necessarily marked deleted.
-It is then unmarked. Otherwise, the entry is just inserted to the index.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL if pessimistic retry needed,
-or error code */
-static
-ulint
-row_ins_index_entry_low(
-/*====================*/
+Tries to insert an entry into a clustered index, ignoring foreign key
+constraints. If a record with the same unique key is found, the other
+record is necessarily marked deleted by a committed transaction, or a
+unique key violation error occurs. The delete marked record is then
+updated to an existing record, and we must write an undo log record on
+the delete marked record.
+@retval DB_SUCCESS on success
+@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
+@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
+@return error code */
+UNIV_INTERN
+dberr_t
+row_ins_clust_index_entry_low(
+/*==========================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
 	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
 				depending on whether we wish optimistic or
 				pessimistic descent down the index tree */
-	dict_index_t*	index,	/*!< in: index */
+	dict_index_t*	index,	/*!< in: clustered index */
+	ulint		n_uniq,	/*!< in: 0 or index->n_uniq */
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
 	btr_cur_t	cursor;
-	ulint		search_mode;
-	ibool		modify			= FALSE;
-	rec_t*		insert_rec;
-	rec_t*		rec;
-	ulint*		offsets;
-	ulint		err;
-	ulint		n_unique;
-	big_rec_t*	big_rec			= NULL;
+	ulint*		offsets		= NULL;
+	dberr_t		err;
+	big_rec_t*	big_rec		= NULL;
 	mtr_t		mtr;
-	mem_heap_t*	heap			= NULL;
+	mem_heap_t*	offsets_heap	= NULL;
 
-	log_free_check();
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(!dict_index_is_unique(index)
+	      || n_uniq == dict_index_get_n_unique(index));
+	ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index));
 
 	mtr_start(&mtr);
 
+	if (mode == BTR_MODIFY_LEAF && dict_index_is_online_ddl(index)) {
+		mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
+		mtr_s_lock(dict_index_get_lock(index), &mtr);
+	}
+
 	cursor.thr = thr;
 
 	/* Note that we use PAGE_CUR_LE as the search mode, because then
 	the function will return in both low_match and up_match of the
 	cursor sensible values */
 
-	if (dict_index_is_clust(index)) {
-		search_mode = mode;
-	} else if (!(thr_get_trx(thr)->check_unique_secondary)) {
-		search_mode = mode | BTR_INSERT | BTR_IGNORE_SEC_UNIQUE;
+	btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, mode,
+				    &cursor, 0, __FILE__, __LINE__, &mtr);
+
+#ifdef UNIV_DEBUG
+	{
+		page_t*	page = btr_cur_get_page(&cursor);
+		rec_t*	first_rec = page_rec_get_next(
+			page_get_infimum_rec(page));
+
+		ut_ad(page_rec_is_supremum(first_rec)
+		      || rec_get_n_fields(first_rec, index)
+		      == dtuple_get_n_fields(entry));
+	}
+#endif
+
+	if (n_uniq && (cursor.up_match >= n_uniq
+		       || cursor.low_match >= n_uniq)) {
+
+		if (flags
+		    == (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
+			| BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG)) {
+			/* Set no locks when applying log
+			in online table rebuild. Only check for duplicates. */
+			err = row_ins_duplicate_error_in_clust_online(
+				n_uniq, entry, &cursor,
+				&offsets, &offsets_heap);
+
+			switch (err) {
+			case DB_SUCCESS:
+				break;
+			default:
+				ut_ad(0);
+				/* fall through */
+			case DB_SUCCESS_LOCKED_REC:
+			case DB_DUPLICATE_KEY:
+				thr_get_trx(thr)->error_info = cursor.index;
+			}
+		} else {
+			/* Note that the following may return also
+			DB_LOCK_WAIT */
+
+			err = row_ins_duplicate_error_in_clust(
+				flags, &cursor, entry, thr, &mtr);
+		}
+
+		if (err != DB_SUCCESS) {
+err_exit:
+			mtr_commit(&mtr);
+			goto func_exit;
+		}
+	}
+
+	if (row_ins_must_modify_rec(&cursor)) {
+		/* There is already an index entry with a long enough common
+		prefix, we must convert the insert into a modify of an
+		existing record */
+		mem_heap_t*	entry_heap	= mem_heap_create(1024);
+
+		err = row_ins_clust_index_entry_by_modify(
+			flags, mode, &cursor, &offsets, &offsets_heap,
+			entry_heap, &big_rec, entry, thr, &mtr);
+
+		rec_t*		rec		= btr_cur_get_rec(&cursor);
+
+		if (big_rec) {
+			ut_a(err == DB_SUCCESS);
+			/* Write out the externally stored
+			columns while still x-latching
+			index->lock and block->lock. Allocate
+			pages for big_rec in the mtr that
+			modified the B-tree, but be sure to skip
+			any pages that were freed in mtr. We will
+			write out the big_rec pages before
+			committing the B-tree mini-transaction. If
+			the system crashes so that crash recovery
+			will not replay the mtr_commit(&mtr), the
+			big_rec pages will be left orphaned until
+			the pages are allocated for something else.
+
+			TODO: If the allocation extends the
+			tablespace, it will not be redo
+			logged, in either mini-transaction.
+			Tablespace extension should be
+			redo-logged in the big_rec
+			mini-transaction, so that recovery
+			will not fail when the big_rec was
+			written to the extended portion of the
+			file, in case the file was somehow
+			truncated in the crash. */
+
+			DEBUG_SYNC_C_IF_THD(
+				thr_get_trx(thr)->mysql_thd,
+				"before_row_ins_upd_extern");
+			err = btr_store_big_rec_extern_fields(
+				index, btr_cur_get_block(&cursor),
+				rec, offsets, big_rec, &mtr,
+				BTR_STORE_INSERT_UPDATE);
+			DEBUG_SYNC_C_IF_THD(
+				thr_get_trx(thr)->mysql_thd,
+				"after_row_ins_upd_extern");
+			/* If writing big_rec fails (for
+			example, because of DB_OUT_OF_FILE_SPACE),
+			the record will be corrupted. Even if
+			we did not update any externally
+			stored columns, our update could cause
+			the record to grow so that a
+			non-updated column was selected for
+			external storage. This non-update
+			would not have been written to the
+			undo log, and thus the record cannot
+			be rolled back.
+
+			However, because we have not executed
+			mtr_commit(mtr) yet, the update will
+			not be replayed in crash recovery, and
+			the following assertion failure will
+			effectively "roll back" the operation. */
+			ut_a(err == DB_SUCCESS);
+			dtuple_big_rec_free(big_rec);
+		}
+
+		if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
+			row_log_table_insert(rec, index, offsets);
+		}
+
+		mtr_commit(&mtr);
+		mem_heap_free(entry_heap);
 	} else {
-		search_mode = mode | BTR_INSERT;
+		rec_t*	insert_rec;
+
+		if (mode != BTR_MODIFY_TREE) {
+			ut_ad((mode & ~BTR_ALREADY_S_LATCHED)
+			      == BTR_MODIFY_LEAF);
+			err = btr_cur_optimistic_insert(
+				flags, &cursor, &offsets, &offsets_heap,
+				entry, &insert_rec, &big_rec,
+				n_ext, thr, &mtr);
+		} else {
+			if (buf_LRU_buf_pool_running_out()) {
+
+				err = DB_LOCK_TABLE_FULL;
+				goto err_exit;
+			}
+
+			err = btr_cur_optimistic_insert(
+				flags, &cursor,
+				&offsets, &offsets_heap,
+				entry, &insert_rec, &big_rec,
+				n_ext, thr, &mtr);
+
+			if (err == DB_FAIL) {
+				err = btr_cur_pessimistic_insert(
+					flags, &cursor,
+					&offsets, &offsets_heap,
+					entry, &insert_rec, &big_rec,
+					n_ext, thr, &mtr);
+			}
+		}
+
+		if (UNIV_LIKELY_NULL(big_rec)) {
+			mtr_commit(&mtr);
+
+			/* Online table rebuild could read (and
+			ignore) the incomplete record at this point.
+			If online rebuild is in progress, the
+			row_ins_index_entry_big_rec() will write log. */
+
+			DBUG_EXECUTE_IF(
+				"row_ins_extern_checkpoint",
+				log_make_checkpoint_at(
+					IB_ULONGLONG_MAX, TRUE););
+			err = row_ins_index_entry_big_rec(
+				entry, big_rec, offsets, &offsets_heap, index,
+				thr_get_trx(thr)->mysql_thd,
+				__FILE__, __LINE__);
+			dtuple_convert_back_big_rec(index, entry, big_rec);
+		} else {
+			if (err == DB_SUCCESS
+			    && dict_index_is_online_ddl(index)) {
+				row_log_table_insert(
+					insert_rec, index, offsets);
+			}
+
+			mtr_commit(&mtr);
+		}
+	}
+
+func_exit:
+	if (offsets_heap) {
+		mem_heap_free(offsets_heap);
+	}
+
+	return(err);
+}
+
+/***************************************************************//**
+Starts a mini-transaction and checks if the index will be dropped.
+@return true if the index is to be dropped */
+static __attribute__((nonnull, warn_unused_result))
+bool
+row_ins_sec_mtr_start_and_check_if_aborted(
+/*=======================================*/
+	mtr_t*		mtr,	/*!< out: mini-transaction */
+	dict_index_t*	index,	/*!< in/out: secondary index */
+	bool		check,	/*!< in: whether to check */
+	ulint		search_mode)
+				/*!< in: flags */
+{
+	ut_ad(!dict_index_is_clust(index));
+
+	mtr_start(mtr);
+
+	if (!check) {
+		return(false);
+	}
+
+	if (search_mode & BTR_ALREADY_S_LATCHED) {
+		mtr_s_lock(dict_index_get_lock(index), mtr);
+	} else {
+		mtr_x_lock(dict_index_get_lock(index), mtr);
+	}
+
+	switch (index->online_status) {
+	case ONLINE_INDEX_ABORTED:
+	case ONLINE_INDEX_ABORTED_DROPPED:
+		ut_ad(*index->name == TEMP_INDEX_PREFIX);
+		return(true);
+	case ONLINE_INDEX_COMPLETE:
+		return(false);
+	case ONLINE_INDEX_CREATION:
+		break;
+	}
+
+	ut_error;
+	return(true);
+}
+
+/***************************************************************//**
+Tries to insert an entry into a secondary index. If a record with exactly the
+same fields is found, the other record is necessarily marked deleted.
+It is then unmarked. Otherwise, the entry is just inserted to the index.
+@retval DB_SUCCESS on success
+@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
+@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
+@return error code */
+UNIV_INTERN
+dberr_t
+row_ins_sec_index_entry_low(
+/*========================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+				depending on whether we wish optimistic or
+				pessimistic descent down the index tree */
+	dict_index_t*	index,	/*!< in: secondary index */
+	mem_heap_t*	offsets_heap,
+				/*!< in/out: memory heap that can be emptied */
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	dtuple_t*	entry,	/*!< in/out: index entry to insert */
+	trx_id_t	trx_id,	/*!< in: PAGE_MAX_TRX_ID during
+				row_log_table_apply(), or 0 */
+	que_thr_t*	thr)	/*!< in: query thread */
+{
+	btr_cur_t	cursor;
+	ulint		search_mode	= mode | BTR_INSERT;
+	dberr_t		err		= DB_SUCCESS;
+	ulint		n_unique;
+	mtr_t		mtr;
+	ulint*		offsets	= NULL;
+
+	ut_ad(!dict_index_is_clust(index));
+	ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
+
+	cursor.thr = thr;
+	ut_ad(thr_get_trx(thr)->id);
+	mtr_start(&mtr);
+
+	/* Ensure that we acquire index->lock when inserting into an
+	index with index->online_status == ONLINE_INDEX_COMPLETE, but
+	could still be subject to rollback_inplace_alter_table().
+	This prevents a concurrent change of index->online_status.
+	The memory object cannot be freed as long as we have an open
+	reference to the table, or index->table->n_ref_count > 0. */
+	const bool check = *index->name == TEMP_INDEX_PREFIX;
+	if (check) {
+		DEBUG_SYNC_C("row_ins_sec_index_enter");
+		if (mode == BTR_MODIFY_LEAF) {
+			search_mode |= BTR_ALREADY_S_LATCHED;
+			mtr_s_lock(dict_index_get_lock(index), &mtr);
+		} else {
+			mtr_x_lock(dict_index_get_lock(index), &mtr);
+		}
+
+		if (row_log_online_op_try(
+			    index, entry, thr_get_trx(thr)->id)) {
+			goto func_exit;
+		}
+	}
+
+	/* Note that we use PAGE_CUR_LE as the search mode, because then
+	the function will return in both low_match and up_match of the
+	cursor sensible values */
+
+	if (!thr_get_trx(thr)->check_unique_secondary) {
+		search_mode |= BTR_IGNORE_SEC_UNIQUE;
 	}
 
 	btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
@@ -2151,13 +2625,8 @@ row_ins_index_entry_low(
 				    &cursor, 0, __FILE__, __LINE__, &mtr);
 
 	if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
-		/* The insertion was made to the insert buffer already during
-		the search: we are done */
-
-		ut_ad(search_mode & BTR_INSERT);
-		err = DB_SUCCESS;
-
-		goto function_exit;
+		/* The insert was buffered during the search: we are done */
+		goto func_exit;
 	}
 
 #ifdef UNIV_DEBUG
@@ -2174,213 +2643,250 @@ row_ins_index_entry_low(
 
 	n_unique = dict_index_get_n_unique(index);
 
-	if (dict_index_is_unique(index) && (cursor.up_match >= n_unique
-					    || cursor.low_match >= n_unique)) {
+	if (dict_index_is_unique(index)
+	    && (cursor.low_match >= n_unique || cursor.up_match >= n_unique)) {
+		mtr_commit(&mtr);
 
-		if (dict_index_is_clust(index)) {
-			/* Note that the following may return also
-			DB_LOCK_WAIT */
+		DEBUG_SYNC_C("row_ins_sec_index_unique");
 
-			err = row_ins_duplicate_error_in_clust(
-				&cursor, entry, thr, &mtr);
-			if (err != DB_SUCCESS) {
+		if (row_ins_sec_mtr_start_and_check_if_aborted(
+			    &mtr, index, check, search_mode)) {
+			goto func_exit;
+		}
 
-				goto function_exit;
-			}
-		} else {
-			mtr_commit(&mtr);
-			err = row_ins_scan_sec_index_for_duplicate(
-				index, entry, thr);
-			mtr_start(&mtr);
+		err = row_ins_scan_sec_index_for_duplicate(
+			flags, index, entry, thr, check, &mtr, offsets_heap);
 
-			if (err != DB_SUCCESS) {
-				goto function_exit;
+		mtr_commit(&mtr);
+
+		switch (err) {
+		case DB_SUCCESS:
+			break;
+		case DB_DUPLICATE_KEY:
+			if (*index->name == TEMP_INDEX_PREFIX) {
+				ut_ad(!thr_get_trx(thr)
+				      ->dict_operation_lock_mode);
+				mutex_enter(&dict_sys->mutex);
+				dict_set_corrupted_index_cache_only(
+					index, index->table);
+				mutex_exit(&dict_sys->mutex);
+				/* Do not return any error to the
+				caller. The duplicate will be reported
+				by ALTER TABLE or CREATE UNIQUE INDEX.
+				Unfortunately we cannot report the
+				duplicate key value to the DDL thread,
+				because the altered_table object is
+				private to its call stack. */
+				err = DB_SUCCESS;
 			}
+			/* fall through */
+		default:
+			return(err);
+		}
 
-			/* We did not find a duplicate and we have now
-			locked with s-locks the necessary records to
-			prevent any insertion of a duplicate by another
-			transaction. Let us now reposition the cursor and
-			continue the insertion. */
-
-			btr_cur_search_to_nth_level(index, 0, entry,
-						    PAGE_CUR_LE,
-						    mode | BTR_INSERT,
-						    &cursor, 0,
-						    __FILE__, __LINE__, &mtr);
+		if (row_ins_sec_mtr_start_and_check_if_aborted(
+			    &mtr, index, check, search_mode)) {
+			goto func_exit;
 		}
-	}
 
-	modify = row_ins_must_modify_rec(&cursor);
+		/* We did not find a duplicate and we have now
+		locked with s-locks the necessary records to
+		prevent any insertion of a duplicate by another
+		transaction. Let us now reposition the cursor and
+		continue the insertion. */
+
+		btr_cur_search_to_nth_level(
+			index, 0, entry, PAGE_CUR_LE,
+			search_mode & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE),
+			&cursor, 0, __FILE__, __LINE__, &mtr);
+	}
 
-	if (modify) {
+	if (row_ins_must_modify_rec(&cursor)) {
 		/* There is already an index entry with a long enough common
 		prefix, we must convert the insert into a modify of an
 		existing record */
+		offsets = rec_get_offsets(
+			btr_cur_get_rec(&cursor), index, offsets,
+			ULINT_UNDEFINED, &offsets_heap);
 
-		if (dict_index_is_clust(index)) {
-			err = row_ins_clust_index_entry_by_modify(
-				mode, &cursor, &heap, &big_rec, entry,
-				thr, &mtr);
-
-			if (big_rec) {
-				ut_a(err == DB_SUCCESS);
-				/* Write out the externally stored
-				columns while still x-latching
-				index->lock and block->lock. Allocate
-				pages for big_rec in the mtr that
-				modified the B-tree, but be sure to skip
-				any pages that were freed in mtr. We will
-				write out the big_rec pages before
-				committing the B-tree mini-transaction. If
-				the system crashes so that crash recovery
-				will not replay the mtr_commit(&mtr), the
-				big_rec pages will be left orphaned until
-				the pages are allocated for something else.
-
-				TODO: If the allocation extends the
-				tablespace, it will not be redo
-				logged, in either mini-transaction.
-				Tablespace extension should be
-				redo-logged in the big_rec
-				mini-transaction, so that recovery
-				will not fail when the big_rec was
-				written to the extended portion of the
-				file, in case the file was somehow
-				truncated in the crash. */
-
-				rec = btr_cur_get_rec(&cursor);
-				offsets = rec_get_offsets(
-					rec, index, NULL,
-					ULINT_UNDEFINED, &heap);
-
-				DEBUG_SYNC_C_IF_THD((THD*)
-					thr_get_trx(thr)->mysql_thd,
-					"before_row_ins_upd_extern");
-				err = btr_store_big_rec_extern_fields(
-					index, btr_cur_get_block(&cursor),
-					rec, offsets, big_rec, &mtr,
-					BTR_STORE_INSERT_UPDATE);
-				DEBUG_SYNC_C_IF_THD((THD*)
-					thr_get_trx(thr)->mysql_thd,
-					"after_row_ins_upd_extern");
-				/* If writing big_rec fails (for
-				example, because of DB_OUT_OF_FILE_SPACE),
-				the record will be corrupted. Even if
-				we did not update any externally
-				stored columns, our update could cause
-				the record to grow so that a
-				non-updated column was selected for
-				external storage. This non-update
-				would not have been written to the
-				undo log, and thus the record cannot
-				be rolled back.
-
-				However, because we have not executed
-				mtr_commit(mtr) yet, the update will
-				not be replayed in crash recovery, and
-				the following assertion failure will
-				effectively "roll back" the operation. */
-				ut_a(err == DB_SUCCESS);
-				goto stored_big_rec;
-			}
-		} else {
-			ut_ad(!n_ext);
-			err = row_ins_sec_index_entry_by_modify(
-				mode, &cursor, entry, thr, &mtr);
-		}
+		err = row_ins_sec_index_entry_by_modify(
+			flags, mode, &cursor, &offsets,
+			offsets_heap, heap, entry, thr, &mtr);
 	} else {
+		rec_t*		insert_rec;
+		big_rec_t*	big_rec;
+
 		if (mode == BTR_MODIFY_LEAF) {
 			err = btr_cur_optimistic_insert(
-				0, &cursor, entry, &insert_rec, &big_rec,
-				n_ext, thr, &mtr);
+				flags, &cursor, &offsets, &offsets_heap,
+				entry, &insert_rec,
+				&big_rec, 0, thr, &mtr);
 		} else {
-			ut_a(mode == BTR_MODIFY_TREE);
+			ut_ad(mode == BTR_MODIFY_TREE);
 			if (buf_LRU_buf_pool_running_out()) {
 
 				err = DB_LOCK_TABLE_FULL;
-
-				goto function_exit;
+				goto func_exit;
 			}
 
 			err = btr_cur_optimistic_insert(
-				0, &cursor, entry, &insert_rec, &big_rec,
-				n_ext, thr, &mtr);
-
+				flags, &cursor,
+				&offsets, &offsets_heap,
+				entry, &insert_rec,
+				&big_rec, 0, thr, &mtr);
 			if (err == DB_FAIL) {
 				err = btr_cur_pessimistic_insert(
-					0, &cursor, entry, &insert_rec,
-					&big_rec, n_ext, thr, &mtr);
+					flags, &cursor,
+					&offsets, &offsets_heap,
+					entry, &insert_rec,
+					&big_rec, 0, thr, &mtr);
 			}
 		}
+
+		if (err == DB_SUCCESS && trx_id) {
+			page_update_max_trx_id(
+				btr_cur_get_block(&cursor),
+				btr_cur_get_page_zip(&cursor),
+				trx_id, &mtr);
+		}
+
+		ut_ad(!big_rec);
 	}
 
-function_exit:
+func_exit:
 	mtr_commit(&mtr);
+	return(err);
+}
 
-	if (UNIV_LIKELY_NULL(big_rec)) {
-		DBUG_EXECUTE_IF(
-			"row_ins_extern_checkpoint",
-			log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE););
-
-		mtr_start(&mtr);
-
-		DEBUG_SYNC_C_IF_THD((THD*)
-			thr_get_trx(thr)->mysql_thd,
-			"before_row_ins_extern_latch");
-		btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
-					    BTR_MODIFY_TREE, &cursor, 0,
-					    __FILE__, __LINE__, &mtr);
-		rec = btr_cur_get_rec(&cursor);
-		offsets = rec_get_offsets(rec, index, NULL,
-					  ULINT_UNDEFINED, &heap);
-
-		DEBUG_SYNC_C_IF_THD((THD*)
-			thr_get_trx(thr)->mysql_thd,
-			"before_row_ins_extern");
-		err = btr_store_big_rec_extern_fields(
-			index, btr_cur_get_block(&cursor),
-			rec, offsets, big_rec, &mtr, BTR_STORE_INSERT);
-		DEBUG_SYNC_C_IF_THD((THD*)
-			thr_get_trx(thr)->mysql_thd,
-			"after_row_ins_extern");
-
-stored_big_rec:
-		if (modify) {
-			dtuple_big_rec_free(big_rec);
-		} else {
-			dtuple_convert_back_big_rec(index, entry, big_rec);
+/***************************************************************//**
+Tries to insert the externally stored fields (off-page columns)
+of a clustered index entry.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+UNIV_INTERN
+dberr_t
+row_ins_index_entry_big_rec_func(
+/*=============================*/
+	const dtuple_t*		entry,	/*!< in/out: index entry to insert */
+	const big_rec_t*	big_rec,/*!< in: externally stored fields */
+	ulint*			offsets,/*!< in/out: rec offsets */
+	mem_heap_t**		heap,	/*!< in/out: memory heap */
+	dict_index_t*		index,	/*!< in: index */
+	const char*		file,	/*!< in: file name of caller */
+#ifndef DBUG_OFF
+	const void*		thd,	/*!< in: connection, or NULL */
+#endif /* DBUG_OFF */
+	ulint			line)	/*!< in: line number of caller */
+{
+	mtr_t		mtr;
+	btr_cur_t	cursor;
+	rec_t*		rec;
+	dberr_t		error;
+
+	ut_ad(dict_index_is_clust(index));
+
+	DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern_latch");
+
+	mtr_start(&mtr);
+	btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
+				    BTR_MODIFY_TREE, &cursor, 0,
+				    file, line, &mtr);
+	rec = btr_cur_get_rec(&cursor);
+	offsets = rec_get_offsets(rec, index, offsets,
+				  ULINT_UNDEFINED, heap);
+
+	DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern");
+	error = btr_store_big_rec_extern_fields(
+		index, btr_cur_get_block(&cursor),
+		rec, offsets, big_rec, &mtr, BTR_STORE_INSERT);
+	DEBUG_SYNC_C_IF_THD(thd, "after_row_ins_extern");
+
+	if (error == DB_SUCCESS
+	    && dict_index_is_online_ddl(index)) {
+		row_log_table_insert(rec, index, offsets);
+	}
+
+	mtr_commit(&mtr);
+
+	return(error);
+}
+
+/***************************************************************//**
+Inserts an entry into a clustered index. Tries first optimistic,
+then pessimistic descent down the tree. If the entry matches enough
+to a delete marked record, performs the insert by updating or delete
+unmarking the delete marked record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+UNIV_INTERN
+dberr_t
+row_ins_clust_index_entry(
+/*======================*/
+	dict_index_t*	index,	/*!< in: clustered index */
+	dtuple_t*	entry,	/*!< in/out: index entry to insert */
+	que_thr_t*	thr,	/*!< in: query thread */
+	ulint		n_ext)	/*!< in: number of externally stored columns */
+{
+	dberr_t	err;
+	ulint	n_uniq;
+
+	if (UT_LIST_GET_FIRST(index->table->foreign_list)) {
+		err = row_ins_check_foreign_constraints(
+			index->table, index, entry, thr);
+		if (err != DB_SUCCESS) {
+
+			return(err);
 		}
+	}
 
-		mtr_commit(&mtr);
+	n_uniq = dict_index_is_unique(index) ? index->n_uniq : 0;
+
+	/* Try first optimistic descent to the B-tree */
+
+	log_free_check();
+
+	err = row_ins_clust_index_entry_low(
+		0, BTR_MODIFY_LEAF, index, n_uniq, entry, n_ext, thr);
+
+#ifdef UNIV_DEBUG
+	/* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
+	Once it is fixed, remove the 'ifdef', 'if' and this comment. */
+	if (!thr_get_trx(thr)->ddl) {
+		DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+				    "after_row_ins_clust_index_entry_leaf");
 	}
+#endif /* UNIV_DEBUG */
 
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
+	if (err != DB_FAIL) {
+		DEBUG_SYNC_C("row_ins_clust_index_entry_leaf_after");
+		return(err);
 	}
-	return(err);
+
+	/* Try then pessimistic descent to the B-tree */
+
+	log_free_check();
+
+	return(row_ins_clust_index_entry_low(
+		       0, BTR_MODIFY_TREE, index, n_uniq, entry, n_ext, thr));
 }
 
 /***************************************************************//**
-Inserts an index entry to index. Tries first optimistic, then pessimistic
-descent down the tree. If the entry matches enough to a delete marked record,
-performs the insert by updating or delete unmarking the delete marked
-record.
+Inserts an entry into a secondary index. Tries first optimistic,
+then pessimistic descent down the tree. If the entry matches enough
+to a delete marked record, performs the insert by updating or delete
+unmarking the delete marked record.
 @return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
 UNIV_INTERN
-ulint
-row_ins_index_entry(
-/*================*/
-	dict_index_t*	index,	/*!< in: index */
+dberr_t
+row_ins_sec_index_entry(
+/*====================*/
+	dict_index_t*	index,	/*!< in: secondary index */
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
-	ulint		n_ext,	/*!< in: number of externally stored columns */
-	ibool		foreign,/*!< in: TRUE=check foreign key constraints
-				(foreign=FALSE only during CREATE INDEX) */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
-	ulint	err;
+	dberr_t		err;
+	mem_heap_t*	offsets_heap;
+	mem_heap_t*	heap;
 
-	if (foreign && UT_LIST_GET_FIRST(index->table->foreign_list)) {
+	if (UT_LIST_GET_FIRST(index->table->foreign_list)) {
 		err = row_ins_check_foreign_constraints(index->table, index,
 							entry, thr);
 		if (err != DB_SUCCESS) {
@@ -2389,26 +2895,59 @@ row_ins_index_entry(
 		}
 	}
 
+	ut_ad(thr_get_trx(thr)->id);
+
+	offsets_heap = mem_heap_create(1024);
+	heap = mem_heap_create(1024);
+
 	/* Try first optimistic descent to the B-tree */
 
-	err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
-				      n_ext, thr);
-	if (err != DB_FAIL) {
+	log_free_check();
 
-		return(err);
-	}
+	err = row_ins_sec_index_entry_low(
+		0, BTR_MODIFY_LEAF, index, offsets_heap, heap, entry, 0, thr);
+	if (err == DB_FAIL) {
+		mem_heap_empty(heap);
 
-	/* Try then pessimistic descent to the B-tree */
+		/* Try then pessimistic descent to the B-tree */
+
+		log_free_check();
 
-	err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry,
-				      n_ext, thr);
+		err = row_ins_sec_index_entry_low(
+			0, BTR_MODIFY_TREE, index,
+			offsets_heap, heap, entry, 0, thr);
+	}
+
+	mem_heap_free(heap);
+	mem_heap_free(offsets_heap);
 	return(err);
 }
 
+/***************************************************************//**
+Inserts an index entry to index. Tries first optimistic, then pessimistic
+descent down the tree. If the entry matches enough to a delete marked record,
+performs the insert by updating or delete unmarking the delete marked
+record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+static
+dberr_t
+row_ins_index_entry(
+/*================*/
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry,	/*!< in/out: index entry to insert */
+	que_thr_t*	thr)	/*!< in: query thread */
+{
+	if (dict_index_is_clust(index)) {
+		return(row_ins_clust_index_entry(index, entry, thr, 0));
+	} else {
+		return(row_ins_sec_index_entry(index, entry, thr));
+	}
+}
+
 /***********************************************************//**
 Sets the values of the dtuple fields in entry from the values of appropriate
 columns in row. */
-static
+static __attribute__((nonnull))
 void
 row_ins_index_entry_set_vals(
 /*=========================*/
@@ -2419,8 +2958,6 @@ row_ins_index_entry_set_vals(
 	ulint	n_fields;
 	ulint	i;
 
-	ut_ad(entry && row);
-
 	n_fields = dtuple_get_n_fields(entry);
 
 	for (i = 0; i < n_fields; i++) {
@@ -2463,14 +3000,14 @@ row_ins_index_entry_set_vals(
 Inserts a single index entry to the table.
 @return DB_SUCCESS if operation successfully completed, else error
 code or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_ins_index_entry_step(
 /*=====================*/
 	ins_node_t*	node,	/*!< in: row insert node */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
-	ulint	err;
+	dberr_t	err;
 
 	ut_ad(dtuple_check_typed(node->row));
 
@@ -2478,7 +3015,16 @@ row_ins_index_entry_step(
 
 	ut_ad(dtuple_check_typed(node->entry));
 
-	err = row_ins_index_entry(node->index, node->entry, 0, TRUE, thr);
+	err = row_ins_index_entry(node->index, node->entry, thr);
+
+#ifdef UNIV_DEBUG
+	/* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
+	Once it is fixed, remove the 'ifdef', 'if' and this comment. */
+	if (!thr_get_trx(thr)->ddl) {
+		DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+				    "after_row_ins_index_entry_step");
+	}
+#endif /* UNIV_DEBUG */
 
 	return(err);
 }
@@ -2577,16 +3123,14 @@ row_ins_get_row_from_select(
 Inserts a row to a table.
 @return DB_SUCCESS if operation successfully completed, else error
 code or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_ins(
 /*====*/
 	ins_node_t*	node,	/*!< in: row insert node */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
-	ulint	err;
-
-	ut_ad(node && thr);
+	dberr_t	err;
 
 	if (node->state == INS_NODE_ALLOC_ROW_ID) {
 
@@ -2622,6 +3166,10 @@ row_ins(
 		node->index = dict_table_get_next_index(node->index);
 		node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
 
+		DBUG_EXECUTE_IF(
+			"row_ins_skip_sec",
+			node->index = NULL; node->entry = NULL; break;);
+
 		/* Skip corrupted secondary index and its entry */
 		while (node->index && dict_index_is_corrupted(node->index)) {
 
@@ -2651,7 +3199,7 @@ row_ins_step(
 	que_node_t*	parent;
 	sel_node_t*	sel_node;
 	trx_t*		trx;
-	ulint		err;
+	dberr_t		err;
 
 	ut_ad(thr);
 
@@ -2684,6 +3232,8 @@ row_ins_step(
 
 	if (node->state == INS_NODE_SET_IX_LOCK) {
 
+		node->state = INS_NODE_ALLOC_ROW_ID;
+
 		/* It may be that the current session has not yet started
 		its transaction, or it has been committed: */
 
@@ -2695,6 +3245,9 @@ row_ins_step(
 
 		err = lock_table(0, node->table, LOCK_IX, thr);
 
+		DBUG_EXECUTE_IF("ib_row_ins_ix_lock_wait",
+				err = DB_LOCK_WAIT;);
+
 		if (err != DB_SUCCESS) {
 
 			goto error_handling;
@@ -2702,8 +3255,6 @@ row_ins_step(
 
 		node->trx_id = trx->id;
 same_trx:
-		node->state = INS_NODE_ALLOC_ROW_ID;
-
 		if (node->ins_type == INS_SEARCHED) {
 			/* Reset the cursor */
 			sel_node->state = SEL_NODE_OPEN;
@@ -2732,7 +3283,7 @@ same_trx:
 	err = row_ins(node, thr);
 
 error_handling:
-	trx->error_state = static_cast<enum db_err>(err);
+	trx->error_state = err;
 
 	if (err != DB_SUCCESS) {
 		/* err == DB_LOCK_WAIT or SQL error detected */
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
new file mode 100644
index 00000000000..b373b70ab7a
--- /dev/null
+++ b/storage/innobase/row/row0log.cc
@@ -0,0 +1,3219 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0log.cc
+Modification log for online index creation and online table rebuild
+
+Created 2011-05-26 Marko Makela
+*******************************************************/
+
+#include "row0log.h"
+
+#ifdef UNIV_NONINL
+#include "row0log.ic"
+#endif
+
+#include "row0row.h"
+#include "row0ins.h"
+#include "row0upd.h"
+#include "row0merge.h"
+#include "row0ext.h"
+#include "data0data.h"
+#include "que0que.h"
+#include "handler0alter.h"
+
+#include<set>
+
+/** Table row modification operations during online table rebuild.
+Delete-marked records are not copied to the rebuilt table. */
+enum row_tab_op {
+	/** Insert a record */
+	ROW_T_INSERT = 0x41,
+	/** Update a record in place */
+	ROW_T_UPDATE,
+	/** Delete (purge) a record */
+	ROW_T_DELETE
+};
+
+/** Index record modification operations during online index creation */
+enum row_op {
+	/** Insert a record */
+	ROW_OP_INSERT = 0x61,
+	/** Delete a record */
+	ROW_OP_DELETE
+};
+
+#ifdef UNIV_DEBUG
+/** Write information about the applied record to the error log */
+# define ROW_LOG_APPLY_PRINT
+#endif /* UNIV_DEBUG */
+
+#ifdef ROW_LOG_APPLY_PRINT
+/** When set, write information about the applied record to the error log */
+static bool row_log_apply_print;
+#endif /* ROW_LOG_APPLY_PRINT */
+
+/** Size of the modification log entry header, in bytes */
+#define ROW_LOG_HEADER_SIZE 2/*op, extra_size*/
+
+/** Log block for modifications during online index creation */
+struct row_log_buf_t {
+	byte*		block;	/*!< file block buffer */
+	mrec_buf_t	buf;	/*!< buffer for accessing a record
+				that spans two blocks */
+	ulint		blocks; /*!< current position in blocks */
+	ulint		bytes;	/*!< current position within buf */
+};
+
+/** Set of transactions that rolled back inserts of BLOBs during
+online table rebuild */
+typedef std::set<trx_id_t> trx_id_set;
+
+/** @brief Buffer for logging modifications during online index creation
+
+All modifications to an index that is being created will be logged by
+row_log_online_op() to this buffer.
+
+All modifications to a table that is being rebuilt will be logged by
+row_log_table_delete(), row_log_table_update(), row_log_table_insert()
+to this buffer.
+
+When head.blocks == tail.blocks, the reader will access tail.block
+directly. When also head.bytes == tail.bytes, both counts will be
+reset to 0 and the file will be truncated. */
+struct row_log_t {
+	int		fd;	/*!< file descriptor */
+	ib_mutex_t	mutex;	/*!< mutex protecting trx_log, error,
+				max_trx and tail */
+	trx_id_set*	trx_rb;	/*!< set of transactions that rolled back
+				inserts of BLOBs during online table rebuild;
+				protected by mutex */
+	dict_table_t*	table;	/*!< table that is being rebuilt,
+				or NULL when this is a secondary
+				index that is being created online */
+	bool		same_pk;/*!< whether the definition of the PRIMARY KEY
+				has remained the same */
+	const dtuple_t*	add_cols;
+				/*!< default values of added columns, or NULL */
+	const ulint*	col_map;/*!< mapping of old column numbers to
+				new ones, or NULL if !table */
+	dberr_t		error;	/*!< error that occurred during online
+				table rebuild */
+	trx_id_t	max_trx;/*!< biggest observed trx_id in
+				row_log_online_op();
+				protected by mutex and index->lock S-latch,
+				or by index->lock X-latch only */
+	row_log_buf_t	tail;	/*!< writer context;
+				protected by mutex and index->lock S-latch,
+				or by index->lock X-latch only */
+	row_log_buf_t	head;	/*!< reader context; protected by MDL only;
+				modifiable by row_log_apply_ops() */
+	ulint		size;	/*!< allocated size */
+};
+
+/******************************************************//**
+Logs an operation to a secondary index that is (or was) being created. */
+UNIV_INTERN
+void
+row_log_online_op(
+/*==============*/
+	dict_index_t*	index,	/*!< in/out: index, S or X latched */
+	const dtuple_t* tuple,	/*!< in: index tuple */
+	trx_id_t	trx_id)	/*!< in: transaction ID for insert,
+				or 0 for delete */
+{
+	byte*		b;
+	ulint		extra_size;
+	ulint		size;
+	ulint		mrec_size;
+	ulint		avail_size;
+	row_log_t*	log;
+
+	ut_ad(dtuple_validate(tuple));
+	ut_ad(dtuple_get_n_fields(tuple) == dict_index_get_n_fields(index));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
+	      || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	if (dict_index_is_corrupted(index)) {
+		return;
+	}
+
+	ut_ad(dict_index_is_online_ddl(index));
+
+	/* Compute the size of the record. This differs from
+	row_merge_buf_encode(), because here we do not encode
+	extra_size+1 (and reserve 0 as the end-of-chunk marker). */
+
+	size = rec_get_converted_size_temp(
+		index, tuple->fields, tuple->n_fields, &extra_size);
+	ut_ad(size >= extra_size);
+	ut_ad(size <= sizeof log->tail.buf);
+
+	mrec_size = ROW_LOG_HEADER_SIZE
+		+ (extra_size >= 0x80) + size
+		+ (trx_id ? DATA_TRX_ID_LEN : 0);
+
+	log = index->online_log;
+	mutex_enter(&log->mutex);
+
+	if (trx_id > log->max_trx) {
+		log->max_trx = trx_id;
+	}
+
+	UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf);
+
+	ut_ad(log->tail.bytes < srv_sort_buf_size);
+	avail_size = srv_sort_buf_size - log->tail.bytes;
+
+	if (mrec_size > avail_size) {
+		b = log->tail.buf;
+	} else {
+		b = log->tail.block + log->tail.bytes;
+	}
+
+	if (trx_id != 0) {
+		*b++ = ROW_OP_INSERT;
+		trx_write_trx_id(b, trx_id);
+		b += DATA_TRX_ID_LEN;
+	} else {
+		*b++ = ROW_OP_DELETE;
+	}
+
+	if (extra_size < 0x80) {
+		*b++ = (byte) extra_size;
+	} else {
+		ut_ad(extra_size < 0x8000);
+		*b++ = (byte) (0x80 | (extra_size >> 8));
+		*b++ = (byte) extra_size;
+	}
+
+	rec_convert_dtuple_to_temp(
+		b + extra_size, index, tuple->fields, tuple->n_fields);
+	b += size;
+
+	if (mrec_size >= avail_size) {
+		const os_offset_t	byte_offset
+			= (os_offset_t) log->tail.blocks
+			* srv_sort_buf_size;
+		ibool			ret;
+
+		if (byte_offset + srv_sort_buf_size >= srv_online_max_size) {
+			goto write_failed;
+		}
+
+		if (mrec_size == avail_size) {
+			ut_ad(b == &log->tail.block[srv_sort_buf_size]);
+		} else {
+			ut_ad(b == log->tail.buf + mrec_size);
+			memcpy(log->tail.block + log->tail.bytes,
+			       log->tail.buf, avail_size);
+		}
+		UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size);
+		ret = os_file_write(
+			"(modification log)",
+			OS_FILE_FROM_FD(log->fd),
+			log->tail.block, byte_offset, srv_sort_buf_size);
+		log->tail.blocks++;
+		if (!ret) {
+write_failed:
+			/* We set the flag directly instead of invoking
+			dict_set_corrupted_index_cache_only(index) here,
+			because the index is not "public" yet. */
+			index->type |= DICT_CORRUPT;
+		}
+		UNIV_MEM_INVALID(log->tail.block, srv_sort_buf_size);
+		memcpy(log->tail.block, log->tail.buf + avail_size,
+		       mrec_size - avail_size);
+		log->tail.bytes = mrec_size - avail_size;
+	} else {
+		log->tail.bytes += mrec_size;
+		ut_ad(b == log->tail.block + log->tail.bytes);
+	}
+
+	UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf);
+	mutex_exit(&log->mutex);
+}
+
+/******************************************************//**
+Gets the error status of the online index rebuild log.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_log_table_get_error(
+/*====================*/
+	const dict_index_t*	index)	/*!< in: clustered index of a table
+					that is being rebuilt online */
+{
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(dict_index_is_online_ddl(index));
+	return(index->online_log->error);
+}
+
+/******************************************************//**
+Starts logging an operation to a table that is being rebuilt.
+@return pointer to log, or NULL if no logging is necessary */
+static __attribute__((nonnull, warn_unused_result))
+byte*
+row_log_table_open(
+/*===============*/
+	row_log_t*	log,	/*!< in/out: online rebuild log */
+	ulint		size,	/*!< in: size of log record */
+	ulint*		avail)	/*!< out: available size for log record */
+{
+	mutex_enter(&log->mutex);
+
+	UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf);
+
+	if (log->error != DB_SUCCESS) {
+		mutex_exit(&log->mutex);
+		return(NULL);
+	}
+
+	ut_ad(log->tail.bytes < srv_sort_buf_size);
+	*avail = srv_sort_buf_size - log->tail.bytes;
+
+	if (size > *avail) {
+		return(log->tail.buf);
+	} else {
+		return(log->tail.block + log->tail.bytes);
+	}
+}
+
+/******************************************************//**
+Stops logging an operation to a table that is being rebuilt. */
+static __attribute__((nonnull))
+void
+row_log_table_close_func(
+/*=====================*/
+	row_log_t*	log,	/*!< in/out: online rebuild log */
+#ifdef UNIV_DEBUG
+	const byte*	b,	/*!< in: end of log record */
+#endif /* UNIV_DEBUG */
+	ulint		size,	/*!< in: size of log record */
+	ulint		avail)	/*!< in: available size for log record */
+{
+	ut_ad(mutex_own(&log->mutex));
+
+	if (size >= avail) {
+		const os_offset_t	byte_offset
+			= (os_offset_t) log->tail.blocks
+			* srv_sort_buf_size;
+		ibool			ret;
+
+		if (byte_offset + srv_sort_buf_size >= srv_online_max_size) {
+			goto write_failed;
+		}
+
+		if (size == avail) {
+			ut_ad(b == &log->tail.block[srv_sort_buf_size]);
+		} else {
+			ut_ad(b == log->tail.buf + size);
+			memcpy(log->tail.block + log->tail.bytes,
+			       log->tail.buf, avail);
+		}
+		UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size);
+		ret = os_file_write(
+			"(modification log)",
+			OS_FILE_FROM_FD(log->fd),
+			log->tail.block, byte_offset, srv_sort_buf_size);
+		log->tail.blocks++;
+		if (!ret) {
+write_failed:
+			log->error = DB_ONLINE_LOG_TOO_BIG;
+		}
+		UNIV_MEM_INVALID(log->tail.block, srv_sort_buf_size);
+		memcpy(log->tail.block, log->tail.buf + avail, size - avail);
+		log->tail.bytes = size - avail;
+	} else {
+		log->tail.bytes += size;
+		ut_ad(b == log->tail.block + log->tail.bytes);
+	}
+
+	UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf);
+	mutex_exit(&log->mutex);
+}
+
+#ifdef UNIV_DEBUG
+# define row_log_table_close(log, b, size, avail)	\
+	row_log_table_close_func(log, b, size, avail)
+#else /* UNIV_DEBUG */
+# define row_log_table_close(log, b, size, avail)	\
+	row_log_table_close_func(log, size, avail)
+#endif /* UNIV_DEBUG */
+
+/******************************************************//**
+Logs a delete operation to a table that is being rebuilt.
+This will be merged in row_log_table_apply_delete(). */
+UNIV_INTERN
+void
+row_log_table_delete(
+/*=================*/
+	const rec_t*	rec,	/*!< in: clustered index leaf page record,
+				page X-latched */
+	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
+				or X-latched */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
+	trx_id_t	trx_id)	/*!< in: DB_TRX_ID of the record before
+				it was deleted */
+{
+	ulint		old_pk_extra_size;
+	ulint		old_pk_size;
+	ulint		ext_size = 0;
+	ulint		mrec_size;
+	ulint		avail_size;
+	mem_heap_t*	heap		= NULL;
+	const dtuple_t*	old_pk;
+	row_ext_t*	ext;
+
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index));
+	ut_ad(rec_offs_size(offsets) <= sizeof index->online_log->tail.buf);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
+	      || rw_lock_own(&index->lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	if (dict_index_is_corrupted(index)
+	    || !dict_index_is_online_ddl(index)
+	    || index->online_log->error != DB_SUCCESS) {
+		return;
+	}
+
+	dict_table_t* new_table = index->online_log->table;
+	dict_index_t* new_index = dict_table_get_first_index(new_table);
+
+	ut_ad(dict_index_is_clust(new_index));
+	ut_ad(!dict_index_is_online_ddl(new_index));
+
+	/* Create the tuple PRIMARY KEY, DB_TRX_ID in the new_table. */
+	if (index->online_log->same_pk) {
+		byte*		db_trx_id;
+		dtuple_t*	tuple;
+		ut_ad(new_index->n_uniq == index->n_uniq);
+
+		/* The PRIMARY KEY and DB_TRX_ID are in the first
+		fields of the record. */
+		heap = mem_heap_create(
+			DATA_TRX_ID_LEN
+			+ DTUPLE_EST_ALLOC(new_index->n_uniq + 1));
+		old_pk = tuple = dtuple_create(heap, new_index->n_uniq + 1);
+		dict_index_copy_types(tuple, new_index, tuple->n_fields);
+		dtuple_set_n_fields_cmp(tuple, new_index->n_uniq);
+
+		for (ulint i = 0; i < new_index->n_uniq; i++) {
+			ulint		len;
+			const void*	field	= rec_get_nth_field(
+				rec, offsets, i, &len);
+			dfield_t*	dfield	= dtuple_get_nth_field(
+				tuple, i);
+			ut_ad(len != UNIV_SQL_NULL);
+			ut_ad(!rec_offs_nth_extern(offsets, i));
+			dfield_set_data(dfield, field, len);
+		}
+
+		db_trx_id = static_cast<byte*>(
+			mem_heap_alloc(heap, DATA_TRX_ID_LEN));
+		trx_write_trx_id(db_trx_id, trx_id);
+
+		dfield_set_data(dtuple_get_nth_field(tuple, new_index->n_uniq),
+				db_trx_id, DATA_TRX_ID_LEN);
+	} else {
+		/* The PRIMARY KEY has changed. Translate the tuple. */
+		dfield_t*	dfield;
+
+		old_pk = row_log_table_get_pk(rec, index, offsets, &heap);
+
+		if (!old_pk) {
+			ut_ad(index->online_log->error != DB_SUCCESS);
+			return;
+		}
+
+		/* Remove DB_ROLL_PTR. */
+		ut_ad(dtuple_get_n_fields_cmp(old_pk)
+		      == dict_index_get_n_unique(new_index));
+		ut_ad(dtuple_get_n_fields(old_pk)
+		      == dict_index_get_n_unique(new_index) + 2);
+		const_cast<ulint&>(old_pk->n_fields)--;
+
+		/* Overwrite DB_TRX_ID with the old trx_id. */
+		dfield = dtuple_get_nth_field(old_pk, new_index->n_uniq);
+		ut_ad(dfield_get_type(dfield)->mtype == DATA_SYS);
+		ut_ad(dfield_get_type(dfield)->prtype
+		      == (DATA_NOT_NULL | DATA_TRX_ID));
+		ut_ad(dfield_get_len(dfield) == DATA_TRX_ID_LEN);
+		trx_write_trx_id(static_cast<byte*>(dfield->data), trx_id);
+	}
+
+	ut_ad(dtuple_get_n_fields(old_pk) > 1);
+	ut_ad(DATA_TRX_ID_LEN == dtuple_get_nth_field(
+		      old_pk, old_pk->n_fields - 1)->len);
+	old_pk_size = rec_get_converted_size_temp(
+		new_index, old_pk->fields, old_pk->n_fields,
+		&old_pk_extra_size);
+	ut_ad(old_pk_extra_size < 0x100);
+
+	mrec_size = 4 + old_pk_size;
+
+	/* If the row is marked as rollback, we will need to
+	log the enough prefix of the BLOB unless both the
+	old and new table are in COMPACT or REDUNDANT format */
+	if ((dict_table_get_format(index->table) >= UNIV_FORMAT_B
+	     || dict_table_get_format(new_table) >= UNIV_FORMAT_B)
+	    && row_log_table_is_rollback(index, trx_id)) {
+		if (rec_offs_any_extern(offsets)) {
+			/* Build a cache of those off-page column
+			prefixes that are referenced by secondary
+			indexes. It can be that none of the off-page
+			columns are needed. */
+			row_build(ROW_COPY_DATA, index, rec,
+				  offsets, NULL, NULL, NULL, &ext, heap);
+			if (ext) {
+				/* Log the row_ext_t, ext->ext and ext->buf */
+				ext_size = ext->n_ext * ext->max_len
+					+ sizeof(*ext)
+					+ ext->n_ext * sizeof(ulint)
+					+ (ext->n_ext - 1) * sizeof ext->len;
+				mrec_size += ext_size;
+			}
+		}
+	}
+
+	if (byte* b = row_log_table_open(index->online_log,
+					 mrec_size, &avail_size)) {
+		*b++ = ROW_T_DELETE;
+		*b++ = static_cast<byte>(old_pk_extra_size);
+
+		/* Log the size of external prefix we saved */
+		mach_write_to_2(b, ext_size);
+		b += 2;
+
+		rec_convert_dtuple_to_temp(
+			b + old_pk_extra_size, new_index,
+			old_pk->fields, old_pk->n_fields);
+
+		b += old_pk_size;
+
+		if (ext_size) {
+			ulint	cur_ext_size = sizeof(*ext)
+				+ (ext->n_ext - 1) * sizeof ext->len;
+
+			memcpy(b, ext, cur_ext_size);
+			b += cur_ext_size;
+
+			/* Check if we need to col_map to adjust the column
+			number. If columns were added/removed/reordered,
+			adjust the column number. */
+			if (const ulint* col_map =
+				index->online_log->col_map) {
+				for (ulint i = 0; i < ext->n_ext; i++) {
+					const_cast<ulint&>(ext->ext[i]) =
+						col_map[ext->ext[i]];
+				}
+			}
+
+			memcpy(b, ext->ext, ext->n_ext * sizeof(*ext->ext));
+			b += ext->n_ext * sizeof(*ext->ext);
+
+			ext_size -= cur_ext_size
+				 + ext->n_ext * sizeof(*ext->ext);
+			memcpy(b, ext->buf, ext_size);
+			b += ext_size;
+		}
+
+		row_log_table_close(
+			index->online_log, b, mrec_size, avail_size);
+	}
+
+	mem_heap_free(heap);
+}
+
+/******************************************************//**
+Logs an insert or update to a table that is being rebuilt. */
+static __attribute__((nonnull(1,2,3)))
+void
+row_log_table_low_redundant(
+/*========================*/
+	const rec_t*		rec,	/*!< in: clustered index leaf
+					page record in ROW_FORMAT=REDUNDANT,
+					page X-latched */
+	dict_index_t*		index,	/*!< in/out: clustered index, S-latched
+					or X-latched */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec,index) */
+	bool			insert,	/*!< in: true if insert,
+					false if update */
+	const dtuple_t*		old_pk,	/*!< in: old PRIMARY KEY value
+					(if !insert and a PRIMARY KEY
+					is being created) */
+	const dict_index_t*	new_index)
+					/*!< in: clustered index of the
+					new table, not latched */
+{
+	ulint		old_pk_size;
+	ulint		old_pk_extra_size;
+	ulint		size;
+	ulint		extra_size;
+	ulint		mrec_size;
+	ulint		avail_size;
+	mem_heap_t*	heap		= NULL;
+	dtuple_t*	tuple;
+
+	ut_ad(!page_is_comp(page_align(rec)));
+	ut_ad(dict_index_get_n_fields(index) == rec_get_n_fields_old(rec));
+
+	heap = mem_heap_create(DTUPLE_EST_ALLOC(index->n_fields));
+	tuple = dtuple_create(heap, index->n_fields);
+	dict_index_copy_types(tuple, index, index->n_fields);
+	dtuple_set_n_fields_cmp(tuple, dict_index_get_n_unique(index));
+
+	if (rec_get_1byte_offs_flag(rec)) {
+		for (ulint i = 0; i < index->n_fields; i++) {
+			dfield_t*	dfield;
+			ulint		len;
+			const void*	field;
+
+			dfield = dtuple_get_nth_field(tuple, i);
+			field = rec_get_nth_field_old(rec, i, &len);
+
+			dfield_set_data(dfield, field, len);
+		}
+	} else {
+		for (ulint i = 0; i < index->n_fields; i++) {
+			dfield_t*	dfield;
+			ulint		len;
+			const void*	field;
+
+			dfield = dtuple_get_nth_field(tuple, i);
+			field = rec_get_nth_field_old(rec, i, &len);
+
+			dfield_set_data(dfield, field, len);
+
+			if (rec_2_is_field_extern(rec, i)) {
+				dfield_set_ext(dfield);
+			}
+		}
+	}
+
+	size = rec_get_converted_size_temp(
+		index, tuple->fields, tuple->n_fields, &extra_size);
+
+	mrec_size = ROW_LOG_HEADER_SIZE + size + (extra_size >= 0x80);
+
+	if (insert || index->online_log->same_pk) {
+		ut_ad(!old_pk);
+		old_pk_extra_size = old_pk_size = 0;
+	} else {
+		ut_ad(old_pk);
+		ut_ad(old_pk->n_fields == 2 + old_pk->n_fields_cmp);
+		ut_ad(DATA_TRX_ID_LEN == dtuple_get_nth_field(
+			      old_pk, old_pk->n_fields - 2)->len);
+		ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field(
+			      old_pk, old_pk->n_fields - 1)->len);
+
+		old_pk_size = rec_get_converted_size_temp(
+			new_index, old_pk->fields, old_pk->n_fields,
+			&old_pk_extra_size);
+		ut_ad(old_pk_extra_size < 0x100);
+		mrec_size += 1/*old_pk_extra_size*/ + old_pk_size;
+	}
+
+	if (byte* b = row_log_table_open(index->online_log,
+					 mrec_size, &avail_size)) {
+		*b++ = insert ? ROW_T_INSERT : ROW_T_UPDATE;
+
+		if (old_pk_size) {
+			*b++ = static_cast<byte>(old_pk_extra_size);
+
+			rec_convert_dtuple_to_temp(
+				b + old_pk_extra_size, new_index,
+				old_pk->fields, old_pk->n_fields);
+			b += old_pk_size;
+		}
+
+		if (extra_size < 0x80) {
+			*b++ = static_cast<byte>(extra_size);
+		} else {
+			ut_ad(extra_size < 0x8000);
+			*b++ = static_cast<byte>(0x80 | (extra_size >> 8));
+			*b++ = static_cast<byte>(extra_size);
+		}
+
+		rec_convert_dtuple_to_temp(
+			b + extra_size, index, tuple->fields, tuple->n_fields);
+		b += size;
+
+		row_log_table_close(
+			index->online_log, b, mrec_size, avail_size);
+	}
+
+	mem_heap_free(heap);
+}
+
+/******************************************************//**
+Logs an insert or update to a table that is being rebuilt. */
+static __attribute__((nonnull(1,2,3)))
+void
+row_log_table_low(
+/*==============*/
+	const rec_t*	rec,	/*!< in: clustered index leaf page record,
+				page X-latched */
+	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
+				or X-latched */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
+	bool		insert,	/*!< in: true if insert, false if update */
+	const dtuple_t*	old_pk)	/*!< in: old PRIMARY KEY value (if !insert
+				and a PRIMARY KEY is being created) */
+{
+	ulint			omit_size;
+	ulint			old_pk_size;
+	ulint			old_pk_extra_size;
+	ulint			extra_size;
+	ulint			mrec_size;
+	ulint			avail_size;
+	const dict_index_t*	new_index = dict_table_get_first_index(
+		index->online_log->table);
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(dict_index_is_clust(new_index));
+	ut_ad(!dict_index_is_online_ddl(new_index));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index));
+	ut_ad(rec_offs_size(offsets) <= sizeof index->online_log->tail.buf);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
+	      || rw_lock_own(&index->lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
+	ut_ad(page_is_leaf(page_align(rec)));
+	ut_ad(!page_is_comp(page_align(rec)) == !rec_offs_comp(offsets));
+
+	if (dict_index_is_corrupted(index)
+	    || !dict_index_is_online_ddl(index)
+	    || index->online_log->error != DB_SUCCESS) {
+		return;
+	}
+
+	if (!rec_offs_comp(offsets)) {
+		row_log_table_low_redundant(
+			rec, index, offsets, insert, old_pk, new_index);
+		return;
+	}
+
+	ut_ad(page_is_comp(page_align(rec)));
+	ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY);
+
+	omit_size = REC_N_NEW_EXTRA_BYTES;
+
+	extra_size = rec_offs_extra_size(offsets) - omit_size;
+
+	mrec_size = rec_offs_size(offsets) - omit_size
+		+ ROW_LOG_HEADER_SIZE + (extra_size >= 0x80);
+
+	if (insert || index->online_log->same_pk) {
+		ut_ad(!old_pk);
+		old_pk_extra_size = old_pk_size = 0;
+	} else {
+		ut_ad(old_pk);
+		ut_ad(old_pk->n_fields == 2 + old_pk->n_fields_cmp);
+		ut_ad(DATA_TRX_ID_LEN == dtuple_get_nth_field(
+			      old_pk, old_pk->n_fields - 2)->len);
+		ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field(
+			      old_pk, old_pk->n_fields - 1)->len);
+
+		old_pk_size = rec_get_converted_size_temp(
+			new_index, old_pk->fields, old_pk->n_fields,
+			&old_pk_extra_size);
+		ut_ad(old_pk_extra_size < 0x100);
+		mrec_size += 1/*old_pk_extra_size*/ + old_pk_size;
+	}
+
+	if (byte* b = row_log_table_open(index->online_log,
+					 mrec_size, &avail_size)) {
+		*b++ = insert ? ROW_T_INSERT : ROW_T_UPDATE;
+
+		if (old_pk_size) {
+			*b++ = static_cast<byte>(old_pk_extra_size);
+
+			rec_convert_dtuple_to_temp(
+				b + old_pk_extra_size, new_index,
+				old_pk->fields, old_pk->n_fields);
+			b += old_pk_size;
+		}
+
+		if (extra_size < 0x80) {
+			*b++ = static_cast<byte>(extra_size);
+		} else {
+			ut_ad(extra_size < 0x8000);
+			*b++ = static_cast<byte>(0x80 | (extra_size >> 8));
+			*b++ = static_cast<byte>(extra_size);
+		}
+
+		memcpy(b, rec - rec_offs_extra_size(offsets), extra_size);
+		b += extra_size;
+		memcpy(b, rec, rec_offs_data_size(offsets));
+		b += rec_offs_data_size(offsets);
+
+		row_log_table_close(
+			index->online_log, b, mrec_size, avail_size);
+	}
+}
+
+/******************************************************//**
+Logs an update to a table that is being rebuilt.
+This will be merged in row_log_table_apply_update(). */
+UNIV_INTERN
+void
+row_log_table_update(
+/*=================*/
+	const rec_t*	rec,	/*!< in: clustered index leaf page record,
+				page X-latched */
+	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
+				or X-latched */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
+	const dtuple_t*	old_pk)	/*!< in: row_log_table_get_pk()
+				before the update */
+{
+	row_log_table_low(rec, index, offsets, false, old_pk);
+}
+
+/******************************************************//**
+Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
+of a table that is being rebuilt.
+@return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table,
+or NULL if the PRIMARY KEY definition does not change */
+UNIV_INTERN
+const dtuple_t*
+row_log_table_get_pk(
+/*=================*/
+	const rec_t*	rec,	/*!< in: clustered index leaf page record,
+				page X-latched */
+	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
+				or X-latched */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
+	mem_heap_t**	heap)	/*!< in/out: memory heap where allocated */
+{
+	dtuple_t*	tuple	= NULL;
+	row_log_t*	log	= index->online_log;
+
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(dict_index_is_online_ddl(index));
+	ut_ad(!offsets || rec_offs_validate(rec, index, offsets));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
+	      || rw_lock_own(&index->lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(log);
+	ut_ad(log->table);
+
+	if (log->same_pk) {
+		/* The PRIMARY KEY columns are unchanged. */
+		return(NULL);
+	}
+
+	mutex_enter(&log->mutex);
+
+	/* log->error is protected by log->mutex. */
+	if (log->error == DB_SUCCESS) {
+		dict_table_t*	new_table	= log->table;
+		dict_index_t*	new_index
+			= dict_table_get_first_index(new_table);
+		const ulint	new_n_uniq
+			= dict_index_get_n_unique(new_index);
+
+		if (!*heap) {
+			ulint	size = 0;
+
+			if (!offsets) {
+				size += (1 + REC_OFFS_HEADER_SIZE
+					 + index->n_fields)
+					* sizeof *offsets;
+			}
+
+			for (ulint i = 0; i < new_n_uniq; i++) {
+				size += dict_col_get_min_size(
+					dict_index_get_nth_col(new_index, i));
+			}
+
+			*heap = mem_heap_create(
+				DTUPLE_EST_ALLOC(new_n_uniq + 2) + size);
+		}
+
+		if (!offsets) {
+			offsets = rec_get_offsets(rec, index, NULL,
+						  ULINT_UNDEFINED, heap);
+		}
+
+		tuple = dtuple_create(*heap, new_n_uniq + 2);
+		dict_index_copy_types(tuple, new_index, tuple->n_fields);
+		dtuple_set_n_fields_cmp(tuple, new_n_uniq);
+
+		for (ulint new_i = 0; new_i < new_n_uniq; new_i++) {
+			dict_field_t*		ifield;
+			dfield_t*		dfield;
+			const dict_col_t*	new_col;
+			const dict_col_t*	col;
+			ulint			col_no;
+			ulint			i;
+			ulint			len;
+			const byte*		field;
+
+			ifield = dict_index_get_nth_field(new_index, new_i);
+			dfield = dtuple_get_nth_field(tuple, new_i);
+			new_col = dict_field_get_col(ifield);
+			col_no = new_col->ind;
+
+			for (ulint old_i = 0; old_i < index->table->n_cols;
+			     old_i++) {
+				if (col_no == log->col_map[old_i]) {
+					col_no = old_i;
+					goto copy_col;
+				}
+			}
+
+			/* No matching column was found in the old
+			table, so this must be an added column.
+			Copy the default value. */
+			ut_ad(log->add_cols);
+			dfield_copy(dfield,
+				    dtuple_get_nth_field(
+					    log->add_cols, col_no));
+			continue;
+
+copy_col:
+			col = dict_table_get_nth_col(index->table, col_no);
+
+			i = dict_col_get_clust_pos(col, index);
+
+			if (i == ULINT_UNDEFINED) {
+				ut_ad(0);
+				log->error = DB_CORRUPTION;
+				tuple = NULL;
+				goto func_exit;
+			}
+
+			field = rec_get_nth_field(rec, offsets, i, &len);
+
+			if (len == UNIV_SQL_NULL) {
+				log->error = DB_INVALID_NULL;
+				tuple = NULL;
+				goto func_exit;
+			}
+
+			if (rec_offs_nth_extern(offsets, i)) {
+				ulint		field_len = ifield->prefix_len;
+				byte*		blob_field;
+				const ulint	max_len =
+					DICT_MAX_FIELD_LEN_BY_FORMAT(
+						new_table);
+
+				if (!field_len) {
+					field_len = ifield->fixed_len;
+					if (!field_len) {
+						field_len = max_len + 1;
+					}
+				}
+
+				blob_field = static_cast<byte*>(
+					mem_heap_alloc(*heap, field_len));
+
+				len = btr_copy_externally_stored_field_prefix(
+					blob_field, field_len,
+					dict_table_zip_size(index->table),
+					field, len);
+				if (len == max_len + 1) {
+					log->error = DB_TOO_BIG_INDEX_COL;
+					tuple = NULL;
+					goto func_exit;
+				}
+
+				dfield_set_data(dfield, blob_field, len);
+			} else {
+				if (ifield->prefix_len
+				    && ifield->prefix_len < len) {
+					len = ifield->prefix_len;
+				}
+
+				dfield_set_data(
+					dfield,
+					mem_heap_dup(*heap, field, len), len);
+			}
+		}
+
+		const byte* trx_roll = rec
+			+ row_get_trx_id_offset(index, offsets);
+
+		dfield_set_data(dtuple_get_nth_field(tuple, new_n_uniq),
+				trx_roll, DATA_TRX_ID_LEN);
+		dfield_set_data(dtuple_get_nth_field(tuple, new_n_uniq + 1),
+				trx_roll + DATA_TRX_ID_LEN, DATA_ROLL_PTR_LEN);
+	}
+
+func_exit:
+	mutex_exit(&log->mutex);
+	return(tuple);
+}
+
+/******************************************************//**
+Logs an insert to a table that is being rebuilt.
+This will be merged in row_log_table_apply_insert(). */
+UNIV_INTERN
+void
+row_log_table_insert(
+/*=================*/
+	const rec_t*	rec,	/*!< in: clustered index leaf page record,
+				page X-latched */
+	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
+				or X-latched */
+	const ulint*	offsets)/*!< in: rec_get_offsets(rec,index) */
+{
+	row_log_table_low(rec, index, offsets, true, NULL);
+}
+
+/******************************************************//**
+Notes that a transaction is being rolled back. */
+UNIV_INTERN
+void
+row_log_table_rollback(
+/*===================*/
+	dict_index_t*	index,	/*!< in/out: clustered index */
+	trx_id_t	trx_id)	/*!< in: transaction being rolled back */
+{
+	ut_ad(dict_index_is_clust(index));
+#ifdef UNIV_DEBUG
+	ibool	corrupt	= FALSE;
+	ut_ad(trx_rw_is_active(trx_id, &corrupt));
+	ut_ad(!corrupt);
+#endif /* UNIV_DEBUG */
+
+	/* Protect transitions of index->online_status and access to
+	index->online_log. */
+	rw_lock_s_lock(&index->lock);
+
+	if (dict_index_is_online_ddl(index)) {
+		ut_ad(index->online_log);
+		ut_ad(index->online_log->table);
+		mutex_enter(&index->online_log->mutex);
+		trx_id_set*	trxs = index->online_log->trx_rb;
+
+		if (!trxs) {
+			index->online_log->trx_rb = trxs = new trx_id_set();
+		}
+
+		trxs->insert(trx_id);
+
+		mutex_exit(&index->online_log->mutex);
+	}
+
+	rw_lock_s_unlock(&index->lock);
+}
+
+/******************************************************//**
+Check if a transaction rollback has been initiated.
+@return true if inserts of this transaction were rolled back */
+UNIV_INTERN
+bool
+row_log_table_is_rollback(
+/*======================*/
+	const dict_index_t*	index,	/*!< in: clustered index */
+	trx_id_t		trx_id)	/*!< in: transaction id */
+{
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(dict_index_is_online_ddl(index));
+	ut_ad(index->online_log);
+
+	if (const trx_id_set* trxs = index->online_log->trx_rb) {
+		mutex_enter(&index->online_log->mutex);
+		bool is_rollback = trxs->find(trx_id) != trxs->end();
+		mutex_exit(&index->online_log->mutex);
+
+		return(is_rollback);
+	}
+
+	return(false);
+}
+
+/******************************************************//**
+Converts a log record to a table row.
+@return converted row, or NULL if the conversion fails
+or the transaction has been rolled back */
+static __attribute__((nonnull, warn_unused_result))
+const dtuple_t*
+row_log_table_apply_convert_mrec(
+/*=============================*/
+	const mrec_t*		mrec,		/*!< in: merge record */
+	dict_index_t*		index,		/*!< in: index of mrec */
+	const ulint*		offsets,	/*!< in: offsets of mrec */
+	const row_log_t*	log,		/*!< in: rebuild context */
+	mem_heap_t*		heap,		/*!< in/out: memory heap */
+	trx_id_t		trx_id,		/*!< in: DB_TRX_ID of mrec */
+	dberr_t*		error)		/*!< out: DB_SUCCESS or
+						reason of failure */
+{
+	dtuple_t*	row;
+
+#ifdef UNIV_SYNC_DEBUG
+	/* This prevents BLOBs from being freed, in case an insert
+	transaction rollback starts after row_log_table_is_rollback(). */
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	if (row_log_table_is_rollback(index, trx_id)) {
+		row = NULL;
+		goto func_exit;
+	}
+
+	/* This is based on row_build(). */
+	if (log->add_cols) {
+		row = dtuple_copy(log->add_cols, heap);
+		/* dict_table_copy_types() would set the fields to NULL */
+		for (ulint i = 0; i < dict_table_get_n_cols(log->table); i++) {
+			dict_col_copy_type(
+				dict_table_get_nth_col(log->table, i),
+				dfield_get_type(dtuple_get_nth_field(row, i)));
+		}
+	} else {
+		row = dtuple_create(heap, dict_table_get_n_cols(log->table));
+		dict_table_copy_types(row, log->table);
+	}
+
+	for (ulint i = 0; i < rec_offs_n_fields(offsets); i++) {
+		const dict_field_t*	ind_field
+			= dict_index_get_nth_field(index, i);
+
+		if (ind_field->prefix_len) {
+			/* Column prefixes can only occur in key
+			fields, which cannot be stored externally. For
+			a column prefix, there should also be the full
+			field in the clustered index tuple. The row
+			tuple comprises full fields, not prefixes. */
+			ut_ad(!rec_offs_nth_extern(offsets, i));
+			continue;
+		}
+
+		const dict_col_t*	col
+			= dict_field_get_col(ind_field);
+		ulint			col_no
+			= log->col_map[dict_col_get_no(col)];
+
+		if (col_no == ULINT_UNDEFINED) {
+			/* dropped column */
+			continue;
+		}
+
+		dfield_t*		dfield
+			= dtuple_get_nth_field(row, col_no);
+		ulint			len;
+		const void*		data;
+
+		if (rec_offs_nth_extern(offsets, i)) {
+			ut_ad(rec_offs_any_extern(offsets));
+			data = btr_rec_copy_externally_stored_field(
+				mrec, offsets,
+				dict_table_zip_size(index->table),
+				i, &len, heap);
+			ut_a(data);
+		} else {
+			data = rec_get_nth_field(mrec, offsets, i, &len);
+		}
+
+		dfield_set_data(dfield, data, len);
+
+		/* See if any columns were changed to NULL or NOT NULL. */
+		const dict_col_t*	new_col
+			= dict_table_get_nth_col(log->table, col_no);
+		ut_ad(new_col->mtype == col->mtype);
+
+		/* Assert that prtype matches except for nullability. */
+		ut_ad(!((new_col->prtype ^ col->prtype) & ~DATA_NOT_NULL));
+		ut_ad(!((new_col->prtype ^ dfield_get_type(dfield)->prtype)
+			& ~DATA_NOT_NULL));
+
+		if (new_col->prtype == col->prtype) {
+			continue;
+		}
+
+		if ((new_col->prtype & DATA_NOT_NULL)
+		    && dfield_is_null(dfield)) {
+			/* We got a NULL value for a NOT NULL column. */
+			*error = DB_INVALID_NULL;
+			return(NULL);
+		}
+
+		/* Adjust the DATA_NOT_NULL flag in the parsed row. */
+		dfield_get_type(dfield)->prtype = new_col->prtype;
+
+		ut_ad(dict_col_type_assert_equal(new_col,
+						 dfield_get_type(dfield)));
+	}
+
+func_exit:
+	*error = DB_SUCCESS;
+	return(row);
+}
+
+/******************************************************//**
+Replays an insert operation on a table that was rebuilt.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_log_table_apply_insert_low(
+/*===========================*/
+	que_thr_t*		thr,		/*!< in: query graph */
+	const dtuple_t*		row,		/*!< in: table row
+						in the old table definition */
+	trx_id_t		trx_id,		/*!< in: trx_id of the row */
+	mem_heap_t*		offsets_heap,	/*!< in/out: memory heap
+						that can be emptied */
+	mem_heap_t*		heap,		/*!< in/out: memory heap */
+	row_merge_dup_t*	dup)		/*!< in/out: for reporting
+						duplicate key errors */
+{
+	dberr_t		error;
+	dtuple_t*	entry;
+	const row_log_t*log	= dup->index->online_log;
+	dict_index_t*	index	= dict_table_get_first_index(log->table);
+
+	ut_ad(dtuple_validate(row));
+	ut_ad(trx_id);
+
+#ifdef ROW_LOG_APPLY_PRINT
+	if (row_log_apply_print) {
+		fprintf(stderr, "table apply insert "
+			IB_ID_FMT " " IB_ID_FMT "\n",
+			index->table->id, index->id);
+		dtuple_print(stderr, row);
+	}
+#endif /* ROW_LOG_APPLY_PRINT */
+
+	static const ulint	flags
+		= (BTR_CREATE_FLAG
+		   | BTR_NO_LOCKING_FLAG
+		   | BTR_NO_UNDO_LOG_FLAG
+		   | BTR_KEEP_SYS_FLAG);
+
+	entry = row_build_index_entry(row, NULL, index, heap);
+
+	error = row_ins_clust_index_entry_low(
+		flags, BTR_MODIFY_TREE, index, index->n_uniq, entry, 0, thr);
+
+	switch (error) {
+	case DB_SUCCESS:
+		break;
+	case DB_SUCCESS_LOCKED_REC:
+		/* The row had already been copied to the table. */
+		return(DB_SUCCESS);
+	default:
+		return(error);
+	}
+
+	do {
+		if (!(index = dict_table_get_next_index(index))) {
+			break;
+		}
+
+		if (index->type & DICT_FTS) {
+			continue;
+		}
+
+		entry = row_build_index_entry(row, NULL, index, heap);
+		error = row_ins_sec_index_entry_low(
+			flags, BTR_MODIFY_TREE,
+			index, offsets_heap, heap, entry, trx_id, thr);
+	} while (error == DB_SUCCESS);
+
+	return(error);
+}
+
+/******************************************************//**
+Replays an insert operation on a table that was rebuilt.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_log_table_apply_insert(
+/*=======================*/
+	que_thr_t*		thr,		/*!< in: query graph */
+	const mrec_t*		mrec,		/*!< in: record to insert */
+	const ulint*		offsets,	/*!< in: offsets of mrec */
+	mem_heap_t*		offsets_heap,	/*!< in/out: memory heap
+						that can be emptied */
+	mem_heap_t*		heap,		/*!< in/out: memory heap */
+	row_merge_dup_t*	dup,		/*!< in/out: for reporting
+						duplicate key errors */
+	trx_id_t		trx_id)		/*!< in: DB_TRX_ID of mrec */
+{
+	const row_log_t*log	= dup->index->online_log;
+	dberr_t		error;
+	const dtuple_t*	row	= row_log_table_apply_convert_mrec(
+		mrec, dup->index, offsets, log, heap, trx_id, &error);
+
+	ut_ad(error == DB_SUCCESS || !row);
+	/* Handling of duplicate key error requires storing
+	of offending key in a record buffer. */
+	ut_ad(error != DB_DUPLICATE_KEY);
+
+	if (error != DB_SUCCESS)
+		return(error);
+
+	if (row) {
+		error = row_log_table_apply_insert_low(
+			thr, row, trx_id, offsets_heap, heap, dup);
+		if (error != DB_SUCCESS) {
+			/* Report the erroneous row using the new
+			version of the table. */
+			innobase_row_to_mysql(dup->table, log->table, row);
+		}
+	}
+	return(error);
+}
+
+/******************************************************//**
+Deletes a record from a table that is being rebuilt.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull(1, 2, 4, 5), warn_unused_result))
+dberr_t
+row_log_table_apply_delete_low(
+/*===========================*/
+	btr_pcur_t*		pcur,		/*!< in/out: B-tree cursor,
+						will be trashed */
+	const ulint*		offsets,	/*!< in: offsets on pcur */
+	const row_ext_t*	save_ext,	/*!< in: saved external field
+						info, or NULL */
+	mem_heap_t*		heap,		/*!< in/out: memory heap */
+	mtr_t*			mtr)		/*!< in/out: mini-transaction,
+						will be committed */
+{
+	dberr_t		error;
+	row_ext_t*	ext;
+	dtuple_t*	row;
+	dict_index_t*	index	= btr_pcur_get_btr_cur(pcur)->index;
+
+	ut_ad(dict_index_is_clust(index));
+
+#ifdef ROW_LOG_APPLY_PRINT
+	if (row_log_apply_print) {
+		fprintf(stderr, "table apply delete "
+			IB_ID_FMT " " IB_ID_FMT "\n",
+			index->table->id, index->id);
+		rec_print_new(stderr, btr_pcur_get_rec(pcur), offsets);
+	}
+#endif /* ROW_LOG_APPLY_PRINT */
+	if (dict_table_get_next_index(index)) {
+		/* Build a row template for purging secondary index entries. */
+		row = row_build(
+			ROW_COPY_DATA, index, btr_pcur_get_rec(pcur),
+			offsets, NULL, NULL, NULL,
+			save_ext ? NULL : &ext, heap);
+		if (!save_ext) {
+			save_ext = ext;
+		}
+	} else {
+		row = NULL;
+	}
+
+	btr_cur_pessimistic_delete(&error, FALSE, btr_pcur_get_btr_cur(pcur),
+				   BTR_CREATE_FLAG, RB_NONE, mtr);
+	mtr_commit(mtr);
+
+	if (error != DB_SUCCESS) {
+		return(error);
+	}
+
+	while ((index = dict_table_get_next_index(index)) != NULL) {
+		if (index->type & DICT_FTS) {
+			continue;
+		}
+
+		const dtuple_t*	entry = row_build_index_entry(
+			row, save_ext, index, heap);
+		mtr_start(mtr);
+		btr_pcur_open(index, entry, PAGE_CUR_LE,
+			      BTR_MODIFY_TREE, pcur, mtr);
+#ifdef UNIV_DEBUG
+		switch (btr_pcur_get_btr_cur(pcur)->flag) {
+		case BTR_CUR_DELETE_REF:
+		case BTR_CUR_DEL_MARK_IBUF:
+		case BTR_CUR_DELETE_IBUF:
+		case BTR_CUR_INSERT_TO_IBUF:
+			/* We did not request buffering. */
+			break;
+		case BTR_CUR_HASH:
+		case BTR_CUR_HASH_FAIL:
+		case BTR_CUR_BINARY:
+			goto flag_ok;
+		}
+		ut_ad(0);
+flag_ok:
+#endif /* UNIV_DEBUG */
+
+		if (page_rec_is_infimum(btr_pcur_get_rec(pcur))
+		    || btr_pcur_get_low_match(pcur) < index->n_uniq) {
+			/* All secondary index entries should be
+			found, because new_table is being modified by
+			this thread only, and all indexes should be
+			updated in sync. */
+			mtr_commit(mtr);
+			return(DB_INDEX_CORRUPT);
+		}
+
+		btr_cur_pessimistic_delete(&error, FALSE,
+					   btr_pcur_get_btr_cur(pcur),
+					   BTR_CREATE_FLAG, RB_NONE, mtr);
+		mtr_commit(mtr);
+	}
+
+	return(error);
+}
+
+/******************************************************//**
+Replays a delete operation on a table that was rebuilt.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull(1, 3, 4, 5, 6, 7), warn_unused_result))
+dberr_t
+row_log_table_apply_delete(
+/*=======================*/
+	que_thr_t*		thr,		/*!< in: query graph */
+	ulint			trx_id_col,	/*!< in: position of
+						DB_TRX_ID in the new
+						clustered index */
+	const mrec_t*		mrec,		/*!< in: merge record */
+	const ulint*		moffsets,	/*!< in: offsets of mrec */
+	mem_heap_t*		offsets_heap,	/*!< in/out: memory heap
+						that can be emptied */
+	mem_heap_t*		heap,		/*!< in/out: memory heap */
+	dict_table_t*		new_table,	/*!< in: rebuilt table */
+	const row_ext_t*	save_ext)	/*!< in: saved external field
+						info, or NULL */
+{
+	dict_index_t*	index = dict_table_get_first_index(new_table);
+	dtuple_t*	old_pk;
+	mtr_t		mtr;
+	btr_pcur_t	pcur;
+	ulint*		offsets;
+
+	ut_ad(rec_offs_n_fields(moffsets)
+	      == dict_index_get_n_unique(index) + 1);
+	ut_ad(!rec_offs_any_extern(moffsets));
+
+	/* Convert the row to a search tuple. */
+	old_pk = dtuple_create(heap, index->n_uniq + 1);
+	dict_index_copy_types(old_pk, index, old_pk->n_fields);
+	dtuple_set_n_fields_cmp(old_pk, index->n_uniq);
+
+	for (ulint i = 0; i <= index->n_uniq; i++) {
+		ulint		len;
+		const void*	field;
+		field = rec_get_nth_field(mrec, moffsets, i, &len);
+		ut_ad(len != UNIV_SQL_NULL);
+		dfield_set_data(dtuple_get_nth_field(old_pk, i),
+				field, len);
+	}
+
+	mtr_start(&mtr);
+	btr_pcur_open(index, old_pk, PAGE_CUR_LE,
+		      BTR_MODIFY_TREE, &pcur, &mtr);
+#ifdef UNIV_DEBUG
+	switch (btr_pcur_get_btr_cur(&pcur)->flag) {
+	case BTR_CUR_DELETE_REF:
+	case BTR_CUR_DEL_MARK_IBUF:
+	case BTR_CUR_DELETE_IBUF:
+	case BTR_CUR_INSERT_TO_IBUF:
+		/* We did not request buffering. */
+		break;
+	case BTR_CUR_HASH:
+	case BTR_CUR_HASH_FAIL:
+	case BTR_CUR_BINARY:
+		goto flag_ok;
+	}
+	ut_ad(0);
+flag_ok:
+#endif /* UNIV_DEBUG */
+
+	if (page_rec_is_infimum(btr_pcur_get_rec(&pcur))
+	    || btr_pcur_get_low_match(&pcur) < index->n_uniq) {
+all_done:
+		mtr_commit(&mtr);
+		/* The record was not found. All done. */
+		return(DB_SUCCESS);
+	}
+
+	offsets = rec_get_offsets(btr_pcur_get_rec(&pcur), index, NULL,
+				  ULINT_UNDEFINED, &offsets_heap);
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+	ut_a(!rec_offs_any_null_extern(btr_pcur_get_rec(&pcur), offsets));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
+	/* Only remove the record if DB_TRX_ID matches what was
+	buffered. */
+
+	{
+		ulint		len;
+		const void*	mrec_trx_id
+			= rec_get_nth_field(mrec, moffsets, trx_id_col, &len);
+		ut_ad(len == DATA_TRX_ID_LEN);
+		const void*	rec_trx_id
+			= rec_get_nth_field(btr_pcur_get_rec(&pcur), offsets,
+					    trx_id_col, &len);
+		ut_ad(len == DATA_TRX_ID_LEN);
+		if (memcmp(mrec_trx_id, rec_trx_id, DATA_TRX_ID_LEN)) {
+			goto all_done;
+		}
+	}
+
+	return(row_log_table_apply_delete_low(&pcur, offsets, save_ext,
+					      heap, &mtr));
+}
+
+/******************************************************//**
+Replays an update operation on a table that was rebuilt.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_log_table_apply_update(
+/*=======================*/
+	que_thr_t*		thr,		/*!< in: query graph */
+	ulint			trx_id_col,	/*!< in: position of
+						DB_TRX_ID in the
+						old clustered index */
+	ulint			new_trx_id_col,	/*!< in: position of
+						DB_TRX_ID in the new
+						clustered index */
+	const mrec_t*		mrec,		/*!< in: new value */
+	const ulint*		offsets,	/*!< in: offsets of mrec */
+	mem_heap_t*		offsets_heap,	/*!< in/out: memory heap
+						that can be emptied */
+	mem_heap_t*		heap,		/*!< in/out: memory heap */
+	row_merge_dup_t*	dup,		/*!< in/out: for reporting
+						duplicate key errors */
+	trx_id_t		trx_id,		/*!< in: DB_TRX_ID of mrec */
+	const dtuple_t*		old_pk)		/*!< in: PRIMARY KEY and
+						DB_TRX_ID,DB_ROLL_PTR
+						of the old value,
+						or PRIMARY KEY if same_pk */
+{
+	const row_log_t*log	= dup->index->online_log;
+	const dtuple_t*	row;
+	dict_index_t*	index	= dict_table_get_first_index(log->table);
+	mtr_t		mtr;
+	btr_pcur_t	pcur;
+	dberr_t		error;
+
+	ut_ad(dtuple_get_n_fields_cmp(old_pk)
+	      == dict_index_get_n_unique(index));
+	ut_ad(dtuple_get_n_fields(old_pk)
+	      == dict_index_get_n_unique(index)
+	      + (dup->index->online_log->same_pk ? 0 : 2));
+
+	row = row_log_table_apply_convert_mrec(
+		mrec, dup->index, offsets, log, heap, trx_id, &error);
+
+	ut_ad(error == DB_SUCCESS || !row);
+	/* Handling of duplicate key error requires storing
+	of offending key in a record buffer. */
+	ut_ad(error != DB_DUPLICATE_KEY);
+
+	if (!row) {
+		return(error);
+	}
+
+	mtr_start(&mtr);
+	btr_pcur_open(index, old_pk, PAGE_CUR_LE,
+		      BTR_MODIFY_TREE, &pcur, &mtr);
+#ifdef UNIV_DEBUG
+	switch (btr_pcur_get_btr_cur(&pcur)->flag) {
+	case BTR_CUR_DELETE_REF:
+	case BTR_CUR_DEL_MARK_IBUF:
+	case BTR_CUR_DELETE_IBUF:
+	case BTR_CUR_INSERT_TO_IBUF:
+		ut_ad(0);/* We did not request buffering. */
+	case BTR_CUR_HASH:
+	case BTR_CUR_HASH_FAIL:
+	case BTR_CUR_BINARY:
+		break;
+	}
+#endif /* UNIV_DEBUG */
+
+	if (page_rec_is_infimum(btr_pcur_get_rec(&pcur))
+	    || btr_pcur_get_low_match(&pcur) < index->n_uniq) {
+		mtr_commit(&mtr);
+insert:
+		ut_ad(mtr.state == MTR_COMMITTED);
+		/* The row was not found. Insert it. */
+		error = row_log_table_apply_insert_low(
+			thr, row, trx_id, offsets_heap, heap, dup);
+		if (error != DB_SUCCESS) {
+err_exit:
+			/* Report the erroneous row using the new
+			version of the table. */
+			innobase_row_to_mysql(dup->table, log->table, row);
+		}
+
+		return(error);
+	}
+
+	/* Update the record. */
+	ulint*		cur_offsets	= rec_get_offsets(
+		btr_pcur_get_rec(&pcur),
+		index, NULL, ULINT_UNDEFINED, &offsets_heap);
+
+	dtuple_t*	entry	= row_build_index_entry(
+		row, NULL, index, heap);
+	const upd_t*	update	= row_upd_build_difference_binary(
+		index, entry, btr_pcur_get_rec(&pcur), cur_offsets,
+		false, NULL, heap);
+
+	error = DB_SUCCESS;
+
+	if (!update->n_fields) {
+		/* Nothing to do. */
+		goto func_exit;
+	}
+
+	if (rec_offs_any_extern(cur_offsets)) {
+		/* If the record contains any externally stored
+		columns, perform the update by delete and insert,
+		because we will not write any undo log that would
+		allow purge to free any orphaned externally stored
+		columns. */
+delete_insert:
+		error = row_log_table_apply_delete_low(
+			&pcur, cur_offsets, NULL, heap, &mtr);
+		ut_ad(mtr.state == MTR_COMMITTED);
+
+		if (error != DB_SUCCESS) {
+			goto err_exit;
+		}
+
+		goto insert;
+	}
+
+	if (upd_get_nth_field(update, 0)->field_no < new_trx_id_col) {
+		if (dup->index->online_log->same_pk) {
+			/* The ROW_T_UPDATE log record should only be
+			written when the PRIMARY KEY fields of the
+			record did not change in the old table.  We
+			can only get a change of PRIMARY KEY columns
+			in the rebuilt table if the PRIMARY KEY was
+			redefined (!same_pk). */
+			ut_ad(0);
+			error = DB_CORRUPTION;
+			goto func_exit;
+		}
+
+		/* The PRIMARY KEY columns have changed.
+		Delete the record with the old PRIMARY KEY value,
+		provided that it carries the same
+		DB_TRX_ID,DB_ROLL_PTR. Then, insert the new row. */
+		ulint		len;
+		const byte*	cur_trx_roll	= rec_get_nth_field(
+			mrec, offsets, trx_id_col, &len);
+		ut_ad(len == DATA_TRX_ID_LEN);
+		const dfield_t*	new_trx_roll	= dtuple_get_nth_field(
+			old_pk, new_trx_id_col);
+		/* We assume that DB_TRX_ID,DB_ROLL_PTR are stored
+		in one contiguous block. */
+		ut_ad(rec_get_nth_field(mrec, offsets, trx_id_col + 1, &len)
+		      == cur_trx_roll + DATA_TRX_ID_LEN);
+		ut_ad(len == DATA_ROLL_PTR_LEN);
+		ut_ad(new_trx_roll->len == DATA_TRX_ID_LEN);
+		ut_ad(dtuple_get_nth_field(old_pk, new_trx_id_col + 1)
+		      -> len == DATA_ROLL_PTR_LEN);
+		ut_ad(static_cast<const byte*>(
+			      dtuple_get_nth_field(old_pk, new_trx_id_col + 1)
+			      ->data)
+		      == static_cast<const byte*>(new_trx_roll->data)
+		      + DATA_TRX_ID_LEN);
+
+		if (!memcmp(cur_trx_roll, new_trx_roll->data,
+			    DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)) {
+			/* The old row exists. Remove it. */
+			goto delete_insert;
+		}
+
+		/* Unless we called row_log_table_apply_delete_low(),
+		this will likely cause a duplicate key error. */
+		mtr_commit(&mtr);
+		goto insert;
+	}
+
+	dtuple_t*	old_row;
+	row_ext_t*	old_ext;
+
+	if (dict_table_get_next_index(index)) {
+		/* Construct the row corresponding to the old value of
+		the record. */
+		old_row = row_build(
+			ROW_COPY_DATA, index, btr_pcur_get_rec(&pcur),
+			cur_offsets, NULL, NULL, NULL, &old_ext, heap);
+		ut_ad(old_row);
+#ifdef ROW_LOG_APPLY_PRINT
+		if (row_log_apply_print) {
+			fprintf(stderr, "table apply update "
+				IB_ID_FMT " " IB_ID_FMT "\n",
+				index->table->id, index->id);
+			dtuple_print(stderr, old_row);
+			dtuple_print(stderr, row);
+		}
+#endif /* ROW_LOG_APPLY_PRINT */
+	} else {
+		old_row = NULL;
+		old_ext = NULL;
+	}
+
+	big_rec_t*	big_rec;
+
+	error = btr_cur_pessimistic_update(
+		BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
+		| BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG
+		| BTR_KEEP_POS_FLAG,
+		btr_pcur_get_btr_cur(&pcur),
+		&cur_offsets, &offsets_heap, heap, &big_rec,
+		update, 0, NULL, 0, &mtr);
+
+	if (big_rec) {
+		if (error == DB_SUCCESS) {
+			error = btr_store_big_rec_extern_fields(
+				index, btr_pcur_get_block(&pcur),
+				btr_pcur_get_rec(&pcur), cur_offsets,
+				big_rec, &mtr, BTR_STORE_UPDATE);
+		}
+
+		dtuple_big_rec_free(big_rec);
+	}
+
+	while ((index = dict_table_get_next_index(index)) != NULL) {
+		if (error != DB_SUCCESS) {
+			break;
+		}
+
+		if (index->type & DICT_FTS) {
+			continue;
+		}
+
+		if (!row_upd_changes_ord_field_binary(
+			    index, update, thr, old_row, NULL)) {
+			continue;
+		}
+
+		mtr_commit(&mtr);
+
+		entry = row_build_index_entry(old_row, old_ext, index, heap);
+		if (!entry) {
+			ut_ad(0);
+			return(DB_CORRUPTION);
+		}
+
+		mtr_start(&mtr);
+
+		if (ROW_FOUND != row_search_index_entry(
+			    index, entry, BTR_MODIFY_TREE, &pcur, &mtr)) {
+			ut_ad(0);
+			error = DB_CORRUPTION;
+			break;
+		}
+
+		btr_cur_pessimistic_delete(
+			&error, FALSE, btr_pcur_get_btr_cur(&pcur),
+			BTR_CREATE_FLAG, RB_NONE, &mtr);
+
+		if (error != DB_SUCCESS) {
+			break;
+		}
+
+		mtr_commit(&mtr);
+
+		entry = row_build_index_entry(row, NULL, index, heap);
+		error = row_ins_sec_index_entry_low(
+			BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
+			| BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG,
+			BTR_MODIFY_TREE, index, offsets_heap, heap,
+			entry, trx_id, thr);
+
+		mtr_start(&mtr);
+	}
+
+func_exit:
+	mtr_commit(&mtr);
+	if (error != DB_SUCCESS) {
+		goto err_exit;
+	}
+
+	return(error);
+}
+
+/******************************************************//**
+Applies an operation to a table that was rebuilt.
+@return NULL on failure (mrec corruption) or when out of data;
+pointer to next record on success */
+static __attribute__((nonnull, warn_unused_result))
+const mrec_t*
+row_log_table_apply_op(
+/*===================*/
+	que_thr_t*		thr,		/*!< in: query graph */
+	ulint			trx_id_col,	/*!< in: position of
+						DB_TRX_ID in old index */
+	ulint			new_trx_id_col,	/*!< in: position of
+						DB_TRX_ID in new index */
+	row_merge_dup_t*	dup,		/*!< in/out: for reporting
+						duplicate key errors */
+	dberr_t*		error,		/*!< out: DB_SUCCESS
+						or error code */
+	mem_heap_t*		offsets_heap,	/*!< in/out: memory heap
+						that can be emptied */
+	mem_heap_t*		heap,		/*!< in/out: memory heap */
+	const mrec_t*		mrec,		/*!< in: merge record */
+	const mrec_t*		mrec_end,	/*!< in: end of buffer */
+	ulint*			offsets)	/*!< in/out: work area
+						for parsing mrec */
+{
+	const row_log_t*log	= dup->index->online_log;
+	dict_index_t*	new_index = dict_table_get_first_index(log->table);
+	ulint		extra_size;
+	const mrec_t*	next_mrec;
+	dtuple_t*	old_pk;
+	row_ext_t*	ext;
+	ulint		ext_size;
+
+	ut_ad(dict_index_is_clust(dup->index));
+	ut_ad(dup->index->table != log->table);
+
+	*error = DB_SUCCESS;
+
+	/* 3 = 1 (op type) + 1 (ext_size) + at least 1 byte payload */
+	if (mrec + 3 >= mrec_end) {
+		return(NULL);
+	}
+
+	switch (*mrec++) {
+	default:
+		ut_ad(0);
+		*error = DB_CORRUPTION;
+		return(NULL);
+	case ROW_T_INSERT:
+		extra_size = *mrec++;
+
+		if (extra_size >= 0x80) {
+			/* Read another byte of extra_size. */
+
+			extra_size = (extra_size & 0x7f) << 8;
+			extra_size |= *mrec++;
+		}
+
+		mrec += extra_size;
+
+		if (mrec > mrec_end) {
+			return(NULL);
+		}
+
+		rec_offs_set_n_fields(offsets, dup->index->n_fields);
+		rec_init_offsets_temp(mrec, dup->index, offsets);
+
+		next_mrec = mrec + rec_offs_data_size(offsets);
+
+		if (next_mrec > mrec_end) {
+			return(NULL);
+		} else {
+			ulint		len;
+			const byte*	db_trx_id
+				= rec_get_nth_field(
+					mrec, offsets, trx_id_col, &len);
+			ut_ad(len == DATA_TRX_ID_LEN);
+			*error = row_log_table_apply_insert(
+				thr, mrec, offsets, offsets_heap,
+				heap, dup, trx_read_trx_id(db_trx_id));
+		}
+		break;
+
+	case ROW_T_DELETE:
+		/* 1 (extra_size) + 2 (ext_size) + at least 1 (payload) */
+		if (mrec + 4 >= mrec_end) {
+			return(NULL);
+		}
+
+		extra_size = *mrec++;
+		ext_size = mach_read_from_2(mrec);
+		mrec += 2;
+		ut_ad(mrec < mrec_end);
+
+		/* We assume extra_size < 0x100 for the PRIMARY KEY prefix.
+		For fixed-length PRIMARY key columns, it is 0. */
+		mrec += extra_size;
+
+		rec_offs_set_n_fields(offsets, new_index->n_uniq + 1);
+		rec_init_offsets_temp(mrec, new_index, offsets);
+		next_mrec = mrec + rec_offs_data_size(offsets) + ext_size;
+		if (next_mrec > mrec_end) {
+			return(NULL);
+		}
+
+		/* If there are external fields, retrieve those logged
+		prefix info and reconstruct the row_ext_t */
+		if (ext_size) {
+			/* We use memcpy to avoid unaligned
+			access on some non-x86 platforms.*/
+			ext = static_cast<row_ext_t*>(
+				mem_heap_dup(heap,
+					     mrec + rec_offs_data_size(offsets),
+					     ext_size));
+
+			byte*	ext_start = reinterpret_cast<byte*>(ext);
+
+			ulint	ext_len = sizeof(*ext)
+				+ (ext->n_ext - 1) * sizeof ext->len;
+
+			ext->ext = reinterpret_cast<ulint*>(ext_start + ext_len);
+			ext_len += ext->n_ext * sizeof(*ext->ext);
+
+			ext->buf = static_cast<byte*>(ext_start + ext_len);
+		} else {
+			ext = NULL;
+		}
+
+		*error = row_log_table_apply_delete(
+			thr, new_trx_id_col,
+			mrec, offsets, offsets_heap, heap,
+			log->table, ext);
+		break;
+
+	case ROW_T_UPDATE:
+		/* Logically, the log entry consists of the
+		(PRIMARY KEY,DB_TRX_ID) of the old value (converted
+		to the new primary key definition) followed by
+		the new value in the old table definition. If the
+		definition of the columns belonging to PRIMARY KEY
+		is not changed, the log will only contain
+		DB_TRX_ID,new_row. */
+
+		if (dup->index->online_log->same_pk) {
+			ut_ad(new_index->n_uniq == dup->index->n_uniq);
+
+			extra_size = *mrec++;
+
+			if (extra_size >= 0x80) {
+				/* Read another byte of extra_size. */
+
+				extra_size = (extra_size & 0x7f) << 8;
+				extra_size |= *mrec++;
+			}
+
+			mrec += extra_size;
+
+			if (mrec > mrec_end) {
+				return(NULL);
+			}
+
+			rec_offs_set_n_fields(offsets, dup->index->n_fields);
+			rec_init_offsets_temp(mrec, dup->index, offsets);
+
+			next_mrec = mrec + rec_offs_data_size(offsets);
+
+			if (next_mrec > mrec_end) {
+				return(NULL);
+			}
+
+			old_pk = dtuple_create(heap, new_index->n_uniq);
+			dict_index_copy_types(
+				old_pk, new_index, old_pk->n_fields);
+
+			/* Copy the PRIMARY KEY fields from mrec to old_pk. */
+			for (ulint i = 0; i < new_index->n_uniq; i++) {
+				const void*	field;
+				ulint		len;
+				dfield_t*	dfield;
+
+				ut_ad(!rec_offs_nth_extern(offsets, i));
+
+				field = rec_get_nth_field(
+					mrec, offsets, i, &len);
+				ut_ad(len != UNIV_SQL_NULL);
+
+				dfield = dtuple_get_nth_field(old_pk, i);
+				dfield_set_data(dfield, field, len);
+			}
+		} else {
+			/* We assume extra_size < 0x100
+			for the PRIMARY KEY prefix. */
+			mrec += *mrec + 1;
+
+			if (mrec > mrec_end) {
+				return(NULL);
+			}
+
+			/* Get offsets for PRIMARY KEY,
+			DB_TRX_ID, DB_ROLL_PTR. */
+			rec_offs_set_n_fields(offsets, new_index->n_uniq + 2);
+			rec_init_offsets_temp(mrec, new_index, offsets);
+
+			next_mrec = mrec + rec_offs_data_size(offsets);
+			if (next_mrec + 2 > mrec_end) {
+				return(NULL);
+			}
+
+			/* Copy the PRIMARY KEY fields and
+			DB_TRX_ID, DB_ROLL_PTR from mrec to old_pk. */
+			old_pk = dtuple_create(heap, new_index->n_uniq + 2);
+			dict_index_copy_types(old_pk, new_index,
+					      old_pk->n_fields);
+
+			for (ulint i = 0;
+			     i < dict_index_get_n_unique(new_index) + 2;
+			     i++) {
+				const void*	field;
+				ulint		len;
+				dfield_t*	dfield;
+
+				ut_ad(!rec_offs_nth_extern(offsets, i));
+
+				field = rec_get_nth_field(
+					mrec, offsets, i, &len);
+				ut_ad(len != UNIV_SQL_NULL);
+
+				dfield = dtuple_get_nth_field(old_pk, i);
+				dfield_set_data(dfield, field, len);
+			}
+
+			mrec = next_mrec;
+
+			/* Fetch the new value of the row as it was
+			in the old table definition. */
+			extra_size = *mrec++;
+
+			if (extra_size >= 0x80) {
+				/* Read another byte of extra_size. */
+
+				extra_size = (extra_size & 0x7f) << 8;
+				extra_size |= *mrec++;
+			}
+
+			mrec += extra_size;
+
+			if (mrec > mrec_end) {
+				return(NULL);
+			}
+
+			rec_offs_set_n_fields(offsets, dup->index->n_fields);
+			rec_init_offsets_temp(mrec, dup->index, offsets);
+
+			next_mrec = mrec + rec_offs_data_size(offsets);
+
+			if (next_mrec > mrec_end) {
+				return(NULL);
+			}
+		}
+
+		ut_ad(next_mrec <= mrec_end);
+		dtuple_set_n_fields_cmp(old_pk, new_index->n_uniq);
+
+		{
+			ulint		len;
+			const byte*	db_trx_id
+				= rec_get_nth_field(
+					mrec, offsets, trx_id_col, &len);
+			ut_ad(len == DATA_TRX_ID_LEN);
+			*error = row_log_table_apply_update(
+				thr, trx_id_col, new_trx_id_col,
+				mrec, offsets, offsets_heap,
+				heap, dup, trx_read_trx_id(db_trx_id), old_pk);
+		}
+
+		break;
+	}
+
+	mem_heap_empty(offsets_heap);
+	mem_heap_empty(heap);
+	return(next_mrec);
+}
+
+/******************************************************//**
+Applies operations to a table was rebuilt.
+@return DB_SUCCESS, or error code on failure */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_log_table_apply_ops(
+/*====================*/
+	que_thr_t*	thr,	/*!< in: query graph */
+	row_merge_dup_t*dup)	/*!< in/out: for reporting duplicate key
+				errors */
+{
+	dberr_t		error;
+	const mrec_t*	mrec		= NULL;
+	const mrec_t*	next_mrec;
+	const mrec_t*	mrec_end	= NULL; /* silence bogus warning */
+	const mrec_t*	next_mrec_end;
+	mem_heap_t*	heap;
+	mem_heap_t*	offsets_heap;
+	ulint*		offsets;
+	bool		has_index_lock;
+	dict_index_t*	index		= const_cast<dict_index_t*>(
+		dup->index);
+	dict_table_t*	new_table	= index->online_log->table;
+	dict_index_t*	new_index	= dict_table_get_first_index(
+		new_table);
+	const ulint	i		= 1 + REC_OFFS_HEADER_SIZE
+		+ ut_max(dict_index_get_n_fields(index),
+			 dict_index_get_n_unique(new_index) + 2);
+	const ulint	trx_id_col	= dict_col_get_clust_pos(
+		dict_table_get_sys_col(index->table, DATA_TRX_ID), index);
+	const ulint	new_trx_id_col	= dict_col_get_clust_pos(
+		dict_table_get_sys_col(new_table, DATA_TRX_ID), new_index);
+	trx_t*		trx		= thr_get_trx(thr);
+
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(dict_index_is_online_ddl(index));
+	ut_ad(trx->mysql_thd);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!dict_index_is_online_ddl(new_index));
+	ut_ad(trx_id_col > 0);
+	ut_ad(trx_id_col != ULINT_UNDEFINED);
+	ut_ad(new_trx_id_col > 0);
+	ut_ad(new_trx_id_col != ULINT_UNDEFINED);
+
+	UNIV_MEM_INVALID(&mrec_end, sizeof mrec_end);
+
+	offsets = static_cast<ulint*>(ut_malloc(i * sizeof *offsets));
+	offsets[0] = i;
+	offsets[1] = dict_index_get_n_fields(index);
+
+	heap = mem_heap_create(UNIV_PAGE_SIZE);
+	offsets_heap = mem_heap_create(UNIV_PAGE_SIZE);
+	has_index_lock = true;
+
+next_block:
+	ut_ad(has_index_lock);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(index->online_log->head.bytes == 0);
+
+	if (trx_is_interrupted(trx)) {
+		goto interrupted;
+	}
+
+	if (dict_index_is_corrupted(index)) {
+		error = DB_INDEX_CORRUPT;
+		goto func_exit;
+	}
+
+	ut_ad(dict_index_is_online_ddl(index));
+
+	error = index->online_log->error;
+
+	if (error != DB_SUCCESS) {
+		goto func_exit;
+	}
+
+	if (UNIV_UNLIKELY(index->online_log->head.blocks
+			  > index->online_log->tail.blocks)) {
+unexpected_eof:
+		fprintf(stderr, "InnoDB: unexpected end of temporary file"
+			" for table %s\n", index->table_name);
+corruption:
+		error = DB_CORRUPTION;
+		goto func_exit;
+	}
+
+	if (index->online_log->head.blocks
+	    == index->online_log->tail.blocks) {
+		if (index->online_log->head.blocks) {
+#ifdef HAVE_FTRUNCATE
+			/* Truncate the file in order to save space. */
+			ftruncate(index->online_log->fd, 0);
+#endif /* HAVE_FTRUNCATE */
+			index->online_log->head.blocks
+				= index->online_log->tail.blocks = 0;
+		}
+
+		next_mrec = index->online_log->tail.block;
+		next_mrec_end = next_mrec + index->online_log->tail.bytes;
+
+		if (next_mrec_end == next_mrec) {
+			/* End of log reached. */
+all_done:
+			ut_ad(has_index_lock);
+			ut_ad(index->online_log->head.blocks == 0);
+			ut_ad(index->online_log->tail.blocks == 0);
+			index->online_log->head.bytes = 0;
+			index->online_log->tail.bytes = 0;
+			error = DB_SUCCESS;
+			goto func_exit;
+		}
+	} else {
+		os_offset_t	ofs;
+		ibool		success;
+
+		ofs = (os_offset_t) index->online_log->head.blocks
+			* srv_sort_buf_size;
+
+		ut_ad(has_index_lock);
+		has_index_lock = false;
+		rw_lock_x_unlock(dict_index_get_lock(index));
+
+		log_free_check();
+
+		ut_ad(dict_index_is_online_ddl(index));
+
+		success = os_file_read_no_error_handling(
+			OS_FILE_FROM_FD(index->online_log->fd),
+			index->online_log->head.block, ofs,
+			srv_sort_buf_size);
+
+		if (!success) {
+			fprintf(stderr, "InnoDB: unable to read temporary file"
+				" for table %s\n", index->table_name);
+			goto corruption;
+		}
+
+#ifdef POSIX_FADV_DONTNEED
+		/* Each block is read exactly once.  Free up the file cache. */
+		posix_fadvise(index->online_log->fd,
+			      ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
+#endif /* POSIX_FADV_DONTNEED */
+#ifdef FALLOC_FL_PUNCH_HOLE
+		/* Try to deallocate the space for the file on disk.
+		This should work on ext4 on Linux 2.6.39 and later,
+		and be ignored when the operation is unsupported. */
+		fallocate(index->online_log->fd,
+			  FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+			  ofs, srv_buf_size);
+#endif /* FALLOC_FL_PUNCH_HOLE */
+
+		next_mrec = index->online_log->head.block;
+		next_mrec_end = next_mrec + srv_sort_buf_size;
+	}
+
+	/* This read is not protected by index->online_log->mutex for
+	performance reasons. We will eventually notice any error that
+	was flagged by a DML thread. */
+	error = index->online_log->error;
+
+	if (error != DB_SUCCESS) {
+		goto func_exit;
+	}
+
+	if (mrec) {
+		/* A partial record was read from the previous block.
+		Copy the temporary buffer full, as we do not know the
+		length of the record. Parse subsequent records from
+		the bigger buffer index->online_log->head.block
+		or index->online_log->tail.block. */
+
+		ut_ad(mrec == index->online_log->head.buf);
+		ut_ad(mrec_end > mrec);
+		ut_ad(mrec_end < (&index->online_log->head.buf)[1]);
+
+		memcpy((mrec_t*) mrec_end, next_mrec,
+		       (&index->online_log->head.buf)[1] - mrec_end);
+		mrec = row_log_table_apply_op(
+			thr, trx_id_col, new_trx_id_col,
+			dup, &error, offsets_heap, heap,
+			index->online_log->head.buf,
+			(&index->online_log->head.buf)[1], offsets);
+		if (error != DB_SUCCESS) {
+			goto func_exit;
+		} else if (UNIV_UNLIKELY(mrec == NULL)) {
+			/* The record was not reassembled properly. */
+			goto corruption;
+		}
+		/* The record was previously found out to be
+		truncated. Now that the parse buffer was extended,
+		it should proceed beyond the old end of the buffer. */
+		ut_a(mrec > mrec_end);
+
+		index->online_log->head.bytes = mrec - mrec_end;
+		next_mrec += index->online_log->head.bytes;
+	}
+
+	ut_ad(next_mrec <= next_mrec_end);
+	/* The following loop must not be parsing the temporary
+	buffer, but head.block or tail.block. */
+
+	/* mrec!=NULL means that the next record starts from the
+	middle of the block */
+	ut_ad((mrec == NULL) == (index->online_log->head.bytes == 0));
+
+#ifdef UNIV_DEBUG
+	if (next_mrec_end == index->online_log->head.block
+	    + srv_sort_buf_size) {
+		/* If tail.bytes == 0, next_mrec_end can also be at
+		the end of tail.block. */
+		if (index->online_log->tail.bytes == 0) {
+			ut_ad(next_mrec == next_mrec_end);
+			ut_ad(index->online_log->tail.blocks == 0);
+			ut_ad(index->online_log->head.blocks == 0);
+			ut_ad(index->online_log->head.bytes == 0);
+		} else {
+			ut_ad(next_mrec == index->online_log->head.block
+			      + index->online_log->head.bytes);
+			ut_ad(index->online_log->tail.blocks
+			      > index->online_log->head.blocks);
+		}
+	} else if (next_mrec_end == index->online_log->tail.block
+		   + index->online_log->tail.bytes) {
+		ut_ad(next_mrec == index->online_log->tail.block
+		      + index->online_log->head.bytes);
+		ut_ad(index->online_log->tail.blocks == 0);
+		ut_ad(index->online_log->head.blocks == 0);
+		ut_ad(index->online_log->head.bytes
+		      <= index->online_log->tail.bytes);
+	} else {
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
+
+	mrec_end = next_mrec_end;
+
+	while (!trx_is_interrupted(trx)) {
+		mrec = next_mrec;
+		ut_ad(mrec < mrec_end);
+
+		if (!has_index_lock) {
+			/* We are applying operations from a different
+			block than the one that is being written to.
+			We do not hold index->lock in order to
+			allow other threads to concurrently buffer
+			modifications. */
+			ut_ad(mrec >= index->online_log->head.block);
+			ut_ad(mrec_end == index->online_log->head.block
+			      + srv_sort_buf_size);
+			ut_ad(index->online_log->head.bytes
+			      < srv_sort_buf_size);
+
+			/* Take the opportunity to do a redo log
+			checkpoint if needed. */
+			log_free_check();
+		} else {
+			/* We are applying operations from the last block.
+			Do not allow other threads to buffer anything,
+			so that we can finally catch up and synchronize. */
+			ut_ad(index->online_log->head.blocks == 0);
+			ut_ad(index->online_log->tail.blocks == 0);
+			ut_ad(mrec_end == index->online_log->tail.block
+			      + index->online_log->tail.bytes);
+			ut_ad(mrec >= index->online_log->tail.block);
+		}
+
+		/* This read is not protected by index->online_log->mutex
+		for performance reasons. We will eventually notice any
+		error that was flagged by a DML thread. */
+		error = index->online_log->error;
+
+		if (error != DB_SUCCESS) {
+			goto func_exit;
+		}
+
+		next_mrec = row_log_table_apply_op(
+			thr, trx_id_col, new_trx_id_col,
+			dup, &error, offsets_heap, heap,
+			mrec, mrec_end, offsets);
+
+		if (error != DB_SUCCESS) {
+			goto func_exit;
+		} else if (next_mrec == next_mrec_end) {
+			/* The record happened to end on a block boundary.
+			Do we have more blocks left? */
+			if (has_index_lock) {
+				/* The index will be locked while
+				applying the last block. */
+				goto all_done;
+			}
+
+			mrec = NULL;
+process_next_block:
+			rw_lock_x_lock(dict_index_get_lock(index));
+			has_index_lock = true;
+
+			index->online_log->head.bytes = 0;
+			index->online_log->head.blocks++;
+			goto next_block;
+		} else if (next_mrec != NULL) {
+			ut_ad(next_mrec < next_mrec_end);
+			index->online_log->head.bytes += next_mrec - mrec;
+		} else if (has_index_lock) {
+			/* When mrec is within tail.block, it should
+			be a complete record, because we are holding
+			index->lock and thus excluding the writer. */
+			ut_ad(index->online_log->tail.blocks == 0);
+			ut_ad(mrec_end == index->online_log->tail.block
+			      + index->online_log->tail.bytes);
+			ut_ad(0);
+			goto unexpected_eof;
+		} else {
+			memcpy(index->online_log->head.buf, mrec,
+			       mrec_end - mrec);
+			mrec_end += index->online_log->head.buf - mrec;
+			mrec = index->online_log->head.buf;
+			goto process_next_block;
+		}
+	}
+
+interrupted:
+	error = DB_INTERRUPTED;
+func_exit:
+	if (!has_index_lock) {
+		rw_lock_x_lock(dict_index_get_lock(index));
+	}
+
+	mem_heap_free(offsets_heap);
+	mem_heap_free(heap);
+	ut_free(offsets);
+	return(error);
+}
+
+/******************************************************//**
+Apply the row_log_table log to a table upon completing rebuild.
+@return DB_SUCCESS, or error code on failure */
+UNIV_INTERN
+dberr_t
+row_log_table_apply(
+/*================*/
+	que_thr_t*	thr,	/*!< in: query graph */
+	dict_table_t*	old_table,
+				/*!< in: old table */
+	struct TABLE*	table)	/*!< in/out: MySQL table
+				(for reporting duplicates) */
+{
+	dberr_t		error;
+	dict_index_t*	clust_index;
+
+	thr_get_trx(thr)->error_key_num = 0;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
+	clust_index = dict_table_get_first_index(old_table);
+
+	rw_lock_x_lock(dict_index_get_lock(clust_index));
+
+	if (!clust_index->online_log) {
+		ut_ad(dict_index_get_online_status(clust_index)
+		      == ONLINE_INDEX_COMPLETE);
+		/* This function should not be called unless
+		rebuilding a table online. Build in some fault
+		tolerance. */
+		ut_ad(0);
+		error = DB_ERROR;
+	} else {
+		row_merge_dup_t	dup = {
+			clust_index, table,
+			clust_index->online_log->col_map, 0
+		};
+
+		error = row_log_table_apply_ops(thr, &dup);
+	}
+
+	rw_lock_x_unlock(dict_index_get_lock(clust_index));
+	return(error);
+}
+
+/******************************************************//**
+Allocate the row log for an index and flag the index
+for online creation.
+@retval true if success, false if not */
+UNIV_INTERN
+bool
+row_log_allocate(
+/*=============*/
+	dict_index_t*	index,	/*!< in/out: index */
+	dict_table_t*	table,	/*!< in/out: new table being rebuilt,
+				or NULL when creating a secondary index */
+	bool		same_pk,/*!< in: whether the definition of the
+				PRIMARY KEY has remained the same */
+	const dtuple_t*	add_cols,
+				/*!< in: default values of
+				added columns, or NULL */
+	const ulint*	col_map)/*!< in: mapping of old column
+				numbers to new ones, or NULL if !table */
+{
+	byte*		buf;
+	row_log_t*	log;
+	ulint		size;
+
+	ut_ad(!dict_index_is_online_ddl(index));
+	ut_ad(dict_index_is_clust(index) == !!table);
+	ut_ad(!table || index->table != table);
+	ut_ad(same_pk || table);
+	ut_ad(!table || col_map);
+	ut_ad(!add_cols || col_map);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+	size = 2 * srv_sort_buf_size + sizeof *log;
+	buf = (byte*) os_mem_alloc_large(&size);
+	if (!buf) {
+		return(false);
+	}
+
+	log = (row_log_t*) &buf[2 * srv_sort_buf_size];
+	log->size = size;
+	log->fd = row_merge_file_create_low();
+	if (log->fd < 0) {
+		os_mem_free_large(buf, size);
+		return(false);
+	}
+	mutex_create(index_online_log_key, &log->mutex,
+		     SYNC_INDEX_ONLINE_LOG);
+	log->trx_rb = NULL;
+	log->table = table;
+	log->same_pk = same_pk;
+	log->add_cols = add_cols;
+	log->col_map = col_map;
+	log->error = DB_SUCCESS;
+	log->max_trx = 0;
+	log->head.block = buf;
+	log->tail.block = buf + srv_sort_buf_size;
+	log->tail.blocks = log->tail.bytes = 0;
+	log->head.blocks = log->head.bytes = 0;
+	dict_index_set_online_status(index, ONLINE_INDEX_CREATION);
+	index->online_log = log;
+
+	/* While we might be holding an exclusive data dictionary lock
+	here, in row_log_abort_sec() we will not always be holding it. Use
+	atomic operations in both cases. */
+	MONITOR_ATOMIC_INC(MONITOR_ONLINE_CREATE_INDEX);
+
+	return(true);
+}
+
+/******************************************************//**
+Free the row log for an index that was being created online. */
+UNIV_INTERN
+void
+row_log_free(
+/*=========*/
+	row_log_t*&	log)	/*!< in,own: row log */
+{
+	MONITOR_ATOMIC_DEC(MONITOR_ONLINE_CREATE_INDEX);
+
+	delete log->trx_rb;
+	row_merge_file_destroy_low(log->fd);
+	mutex_free(&log->mutex);
+	os_mem_free_large(log->head.block, log->size);
+	log = 0;
+}
+
+/******************************************************//**
+Get the latest transaction ID that has invoked row_log_online_op()
+during online creation.
+@return latest transaction ID, or 0 if nothing was logged */
+UNIV_INTERN
+trx_id_t
+row_log_get_max_trx(
+/*================*/
+	dict_index_t*	index)	/*!< in: index, must be locked */
+{
+	ut_ad(dict_index_get_online_status(index) == ONLINE_INDEX_CREATION);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad((rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
+	       && mutex_own(&index->online_log->mutex))
+	      || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+	return(index->online_log->max_trx);
+}
+
+/******************************************************//**
+Applies an operation to a secondary index that was being created. */
+static __attribute__((nonnull))
+void
+row_log_apply_op_low(
+/*=================*/
+	dict_index_t*	index,		/*!< in/out: index */
+	row_merge_dup_t*dup,		/*!< in/out: for reporting
+					duplicate key errors */
+	dberr_t*	error,		/*!< out: DB_SUCCESS or error code */
+	mem_heap_t*	offsets_heap,	/*!< in/out: memory heap for
+					allocating offsets; can be emptied */
+	bool		has_index_lock, /*!< in: true if holding index->lock
+					in exclusive mode */
+	enum row_op	op,		/*!< in: operation being applied */
+	trx_id_t	trx_id,		/*!< in: transaction identifier */
+	const dtuple_t*	entry)		/*!< in: row */
+{
+	mtr_t		mtr;
+	btr_cur_t	cursor;
+	ulint*		offsets = NULL;
+
+	ut_ad(!dict_index_is_clust(index));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)
+	      == has_index_lock);
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!dict_index_is_corrupted(index));
+	ut_ad(trx_id != 0 || op == ROW_OP_DELETE);
+
+	mtr_start(&mtr);
+
+	/* We perform the pessimistic variant of the operations if we
+	already hold index->lock exclusively. First, search the
+	record. The operation may already have been performed,
+	depending on when the row in the clustered index was
+	scanned. */
+	btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
+				    has_index_lock
+				    ? BTR_MODIFY_TREE
+				    : BTR_MODIFY_LEAF,
+				    &cursor, 0, __FILE__, __LINE__,
+				    &mtr);
+
+	ut_ad(dict_index_get_n_unique(index) > 0);
+	/* This test is somewhat similar to row_ins_must_modify_rec(),
+	but not identical for unique secondary indexes. */
+	if (cursor.low_match >= dict_index_get_n_unique(index)
+	    && !page_rec_is_infimum(btr_cur_get_rec(&cursor))) {
+		/* We have a matching record. */
+		bool	exists	= (cursor.low_match
+				   == dict_index_get_n_fields(index));
+#ifdef UNIV_DEBUG
+		rec_t*	rec	= btr_cur_get_rec(&cursor);
+		ut_ad(page_rec_is_user_rec(rec));
+		ut_ad(!rec_get_deleted_flag(rec, page_rec_is_comp(rec)));
+#endif /* UNIV_DEBUG */
+
+		ut_ad(exists || dict_index_is_unique(index));
+
+		switch (op) {
+		case ROW_OP_DELETE:
+			if (!exists) {
+				/* The record was already deleted. */
+				goto func_exit;
+			}
+
+			if (btr_cur_optimistic_delete(
+				    &cursor, BTR_CREATE_FLAG, &mtr)) {
+				*error = DB_SUCCESS;
+				break;
+			}
+
+			if (!has_index_lock) {
+				/* This needs a pessimistic operation.
+				Lock the index tree exclusively. */
+				mtr_commit(&mtr);
+				mtr_start(&mtr);
+				btr_cur_search_to_nth_level(
+					index, 0, entry, PAGE_CUR_LE,
+					BTR_MODIFY_TREE, &cursor, 0,
+					__FILE__, __LINE__, &mtr);
+
+				/* No other thread than the current one
+				is allowed to modify the index tree.
+				Thus, the record should still exist. */
+				ut_ad(cursor.low_match
+				      >= dict_index_get_n_fields(index));
+				ut_ad(page_rec_is_user_rec(
+					      btr_cur_get_rec(&cursor)));
+			}
+
+			/* As there are no externally stored fields in
+			a secondary index record, the parameter
+			rb_ctx = RB_NONE will be ignored. */
+
+			btr_cur_pessimistic_delete(
+				error, FALSE, &cursor,
+				BTR_CREATE_FLAG, RB_NONE, &mtr);
+			break;
+		case ROW_OP_INSERT:
+			if (exists) {
+				/* The record already exists. There
+				is nothing to be inserted. */
+				goto func_exit;
+			}
+
+			if (dtuple_contains_null(entry)) {
+				/* The UNIQUE KEY columns match, but
+				there is a NULL value in the key, and
+				NULL!=NULL. */
+				goto insert_the_rec;
+			}
+
+			/* Duplicate key error */
+			ut_ad(dict_index_is_unique(index));
+			row_merge_dup_report(dup, entry->fields);
+			goto func_exit;
+		}
+	} else {
+		switch (op) {
+			rec_t*		rec;
+			big_rec_t*	big_rec;
+		case ROW_OP_DELETE:
+			/* The record does not exist. */
+			goto func_exit;
+		case ROW_OP_INSERT:
+			if (dict_index_is_unique(index)
+			    && (cursor.up_match
+				>= dict_index_get_n_unique(index)
+				|| cursor.low_match
+				>= dict_index_get_n_unique(index))
+			    && (!index->n_nullable
+				|| !dtuple_contains_null(entry))) {
+				/* Duplicate key */
+				row_merge_dup_report(dup, entry->fields);
+				goto func_exit;
+			}
+insert_the_rec:
+			/* Insert the record. As we are inserting into
+			a secondary index, there cannot be externally
+			stored columns (!big_rec). */
+			*error = btr_cur_optimistic_insert(
+				BTR_NO_UNDO_LOG_FLAG
+				| BTR_NO_LOCKING_FLAG
+				| BTR_CREATE_FLAG,
+				&cursor, &offsets, &offsets_heap,
+				const_cast<dtuple_t*>(entry),
+				&rec, &big_rec, 0, NULL, &mtr);
+			ut_ad(!big_rec);
+			if (*error != DB_FAIL) {
+				break;
+			}
+
+			if (!has_index_lock) {
+				/* This needs a pessimistic operation.
+				Lock the index tree exclusively. */
+				mtr_commit(&mtr);
+				mtr_start(&mtr);
+				btr_cur_search_to_nth_level(
+					index, 0, entry, PAGE_CUR_LE,
+					BTR_MODIFY_TREE, &cursor, 0,
+					__FILE__, __LINE__, &mtr);
+			}
+
+			/* We already determined that the
+			record did not exist. No other thread
+			than the current one is allowed to
+			modify the index tree. Thus, the
+			record should still not exist. */
+
+			*error = btr_cur_pessimistic_insert(
+				BTR_NO_UNDO_LOG_FLAG
+				| BTR_NO_LOCKING_FLAG
+				| BTR_CREATE_FLAG,
+				&cursor, &offsets, &offsets_heap,
+				const_cast<dtuple_t*>(entry),
+				&rec, &big_rec,
+				0, NULL, &mtr);
+			ut_ad(!big_rec);
+			break;
+		}
+		mem_heap_empty(offsets_heap);
+	}
+
+	if (*error == DB_SUCCESS && trx_id) {
+		page_update_max_trx_id(btr_cur_get_block(&cursor),
+				       btr_cur_get_page_zip(&cursor),
+				       trx_id, &mtr);
+	}
+
+func_exit:
+	mtr_commit(&mtr);
+}
+
+/******************************************************//**
+Applies an operation to a secondary index that was being created.
+@return NULL on failure (mrec corruption) or when out of data;
+pointer to next record on success */
+static __attribute__((nonnull, warn_unused_result))
+const mrec_t*
+row_log_apply_op(
+/*=============*/
+	dict_index_t*	index,		/*!< in/out: index */
+	row_merge_dup_t*dup,		/*!< in/out: for reporting
+					duplicate key errors */
+	dberr_t*	error,		/*!< out: DB_SUCCESS or error code */
+	mem_heap_t*	offsets_heap,	/*!< in/out: memory heap for
+					allocating offsets; can be emptied */
+	mem_heap_t*	heap,		/*!< in/out: memory heap for
+					allocating data tuples */
+	bool		has_index_lock, /*!< in: true if holding index->lock
+					in exclusive mode */
+	const mrec_t*	mrec,		/*!< in: merge record */
+	const mrec_t*	mrec_end,	/*!< in: end of buffer */
+	ulint*		offsets)	/*!< in/out: work area for
+					rec_init_offsets_temp() */
+
+{
+	enum row_op	op;
+	ulint		extra_size;
+	ulint		data_size;
+	ulint		n_ext;
+	dtuple_t*	entry;
+	trx_id_t	trx_id;
+
+	/* Online index creation is only used for secondary indexes. */
+	ut_ad(!dict_index_is_clust(index));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)
+	      == has_index_lock);
+#endif /* UNIV_SYNC_DEBUG */
+
+	if (dict_index_is_corrupted(index)) {
+		*error = DB_INDEX_CORRUPT;
+		return(NULL);
+	}
+
+	*error = DB_SUCCESS;
+
+	if (mrec + ROW_LOG_HEADER_SIZE >= mrec_end) {
+		return(NULL);
+	}
+
+	switch (*mrec) {
+	case ROW_OP_INSERT:
+		if (ROW_LOG_HEADER_SIZE + DATA_TRX_ID_LEN + mrec >= mrec_end) {
+			return(NULL);
+		}
+
+		op = static_cast<enum row_op>(*mrec++);
+		trx_id = trx_read_trx_id(mrec);
+		mrec += DATA_TRX_ID_LEN;
+		break;
+	case ROW_OP_DELETE:
+		op = static_cast<enum row_op>(*mrec++);
+		trx_id = 0;
+		break;
+	default:
+corrupted:
+		ut_ad(0);
+		*error = DB_CORRUPTION;
+		return(NULL);
+	}
+
+	extra_size = *mrec++;
+
+	ut_ad(mrec < mrec_end);
+
+	if (extra_size >= 0x80) {
+		/* Read another byte of extra_size. */
+
+		extra_size = (extra_size & 0x7f) << 8;
+		extra_size |= *mrec++;
+	}
+
+	mrec += extra_size;
+
+	if (mrec > mrec_end) {
+		return(NULL);
+	}
+
+	rec_init_offsets_temp(mrec, index, offsets);
+
+	if (rec_offs_any_extern(offsets)) {
+		/* There should never be any externally stored fields
+		in a secondary index, which is what online index
+		creation is used for. Therefore, the log file must be
+		corrupted. */
+		goto corrupted;
+	}
+
+	data_size = rec_offs_data_size(offsets);
+
+	mrec += data_size;
+
+	if (mrec > mrec_end) {
+		return(NULL);
+	}
+
+	entry = row_rec_to_index_entry_low(
+		mrec - data_size, index, offsets, &n_ext, heap);
+	/* Online index creation is only implemented for secondary
+	indexes, which never contain off-page columns. */
+	ut_ad(n_ext == 0);
+#ifdef ROW_LOG_APPLY_PRINT
+	if (row_log_apply_print) {
+		fprintf(stderr, "apply " IB_ID_FMT " " TRX_ID_FMT " %u %u ",
+			index->id, trx_id,
+			unsigned (op), unsigned (has_index_lock));
+		for (const byte* m = mrec - data_size; m < mrec; m++) {
+			fprintf(stderr, "%02x", *m);
+		}
+		putc('\n', stderr);
+	}
+#endif /* ROW_LOG_APPLY_PRINT */
+	row_log_apply_op_low(index, dup, error, offsets_heap,
+			     has_index_lock, op, trx_id, entry);
+	return(mrec);
+}
+
+/******************************************************//**
+Applies operations to a secondary index that was being created.
+@return DB_SUCCESS, or error code on failure */
+static __attribute__((nonnull))
+dberr_t
+row_log_apply_ops(
+/*==============*/
+	trx_t*		trx,	/*!< in: transaction (for checking if
+				the operation was interrupted) */
+	dict_index_t*	index,	/*!< in/out: index */
+	row_merge_dup_t*dup)	/*!< in/out: for reporting duplicate key
+				errors */
+{
+	dberr_t		error;
+	const mrec_t*	mrec	= NULL;
+	const mrec_t*	next_mrec;
+	const mrec_t*	mrec_end= NULL; /* silence bogus warning */
+	const mrec_t*	next_mrec_end;
+	mem_heap_t*	offsets_heap;
+	mem_heap_t*	heap;
+	ulint*		offsets;
+	bool		has_index_lock;
+	const ulint	i	= 1 + REC_OFFS_HEADER_SIZE
+		+ dict_index_get_n_fields(index);
+
+	ut_ad(dict_index_is_online_ddl(index));
+	ut_ad(*index->name == TEMP_INDEX_PREFIX);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(index->online_log);
+	UNIV_MEM_INVALID(&mrec_end, sizeof mrec_end);
+
+	offsets = static_cast<ulint*>(ut_malloc(i * sizeof *offsets));
+	offsets[0] = i;
+	offsets[1] = dict_index_get_n_fields(index);
+
+	offsets_heap = mem_heap_create(UNIV_PAGE_SIZE);
+	heap = mem_heap_create(UNIV_PAGE_SIZE);
+	has_index_lock = true;
+
+next_block:
+	ut_ad(has_index_lock);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(index->online_log->head.bytes == 0);
+
+	if (trx_is_interrupted(trx)) {
+		goto interrupted;
+	}
+
+	if (dict_index_is_corrupted(index)) {
+		error = DB_INDEX_CORRUPT;
+		goto func_exit;
+	}
+
+	if (UNIV_UNLIKELY(index->online_log->head.blocks
+			  > index->online_log->tail.blocks)) {
+unexpected_eof:
+		fprintf(stderr, "InnoDB: unexpected end of temporary file"
+			" for index %s\n", index->name + 1);
+corruption:
+		error = DB_CORRUPTION;
+		goto func_exit;
+	}
+
+	if (index->online_log->head.blocks
+	    == index->online_log->tail.blocks) {
+		if (index->online_log->head.blocks) {
+#ifdef HAVE_FTRUNCATE
+			/* Truncate the file in order to save space. */
+			ftruncate(index->online_log->fd, 0);
+#endif /* HAVE_FTRUNCATE */
+			index->online_log->head.blocks
+				= index->online_log->tail.blocks = 0;
+		}
+
+		next_mrec = index->online_log->tail.block;
+		next_mrec_end = next_mrec + index->online_log->tail.bytes;
+
+		if (next_mrec_end == next_mrec) {
+			/* End of log reached. */
+all_done:
+			ut_ad(has_index_lock);
+			ut_ad(index->online_log->head.blocks == 0);
+			ut_ad(index->online_log->tail.blocks == 0);
+			error = DB_SUCCESS;
+			goto func_exit;
+		}
+	} else {
+		os_offset_t	ofs;
+		ibool		success;
+
+		ofs = (os_offset_t) index->online_log->head.blocks
+			* srv_sort_buf_size;
+
+		ut_ad(has_index_lock);
+		has_index_lock = false;
+		rw_lock_x_unlock(dict_index_get_lock(index));
+
+		log_free_check();
+
+		success = os_file_read_no_error_handling(
+			OS_FILE_FROM_FD(index->online_log->fd),
+			index->online_log->head.block, ofs,
+			srv_sort_buf_size);
+
+		if (!success) {
+			fprintf(stderr, "InnoDB: unable to read temporary file"
+				" for index %s\n", index->name + 1);
+			goto corruption;
+		}
+
+#ifdef POSIX_FADV_DONTNEED
+		/* Each block is read exactly once.  Free up the file cache. */
+		posix_fadvise(index->online_log->fd,
+			      ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
+#endif /* POSIX_FADV_DONTNEED */
+#ifdef FALLOC_FL_PUNCH_HOLE
+		/* Try to deallocate the space for the file on disk.
+		This should work on ext4 on Linux 2.6.39 and later,
+		and be ignored when the operation is unsupported. */
+		fallocate(index->online_log->fd,
+			  FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+			  ofs, srv_buf_size);
+#endif /* FALLOC_FL_PUNCH_HOLE */
+
+		next_mrec = index->online_log->head.block;
+		next_mrec_end = next_mrec + srv_sort_buf_size;
+	}
+
+	if (mrec) {
+		/* A partial record was read from the previous block.
+		Copy the temporary buffer full, as we do not know the
+		length of the record. Parse subsequent records from
+		the bigger buffer index->online_log->head.block
+		or index->online_log->tail.block. */
+
+		ut_ad(mrec == index->online_log->head.buf);
+		ut_ad(mrec_end > mrec);
+		ut_ad(mrec_end < (&index->online_log->head.buf)[1]);
+
+		memcpy((mrec_t*) mrec_end, next_mrec,
+		       (&index->online_log->head.buf)[1] - mrec_end);
+		mrec = row_log_apply_op(
+			index, dup, &error, offsets_heap, heap,
+			has_index_lock, index->online_log->head.buf,
+			(&index->online_log->head.buf)[1], offsets);
+		if (error != DB_SUCCESS) {
+			goto func_exit;
+		} else if (UNIV_UNLIKELY(mrec == NULL)) {
+			/* The record was not reassembled properly. */
+			goto corruption;
+		}
+		/* The record was previously found out to be
+		truncated. Now that the parse buffer was extended,
+		it should proceed beyond the old end of the buffer. */
+		ut_a(mrec > mrec_end);
+
+		index->online_log->head.bytes = mrec - mrec_end;
+		next_mrec += index->online_log->head.bytes;
+	}
+
+	ut_ad(next_mrec <= next_mrec_end);
+	/* The following loop must not be parsing the temporary
+	buffer, but head.block or tail.block. */
+
+	/* mrec!=NULL means that the next record starts from the
+	middle of the block */
+	ut_ad((mrec == NULL) == (index->online_log->head.bytes == 0));
+
+#ifdef UNIV_DEBUG
+	if (next_mrec_end == index->online_log->head.block
+	    + srv_sort_buf_size) {
+		/* If tail.bytes == 0, next_mrec_end can also be at
+		the end of tail.block. */
+		if (index->online_log->tail.bytes == 0) {
+			ut_ad(next_mrec == next_mrec_end);
+			ut_ad(index->online_log->tail.blocks == 0);
+			ut_ad(index->online_log->head.blocks == 0);
+			ut_ad(index->online_log->head.bytes == 0);
+		} else {
+			ut_ad(next_mrec == index->online_log->head.block
+			      + index->online_log->head.bytes);
+			ut_ad(index->online_log->tail.blocks
+			      > index->online_log->head.blocks);
+		}
+	} else if (next_mrec_end == index->online_log->tail.block
+		   + index->online_log->tail.bytes) {
+		ut_ad(next_mrec == index->online_log->tail.block
+		      + index->online_log->head.bytes);
+		ut_ad(index->online_log->tail.blocks == 0);
+		ut_ad(index->online_log->head.blocks == 0);
+		ut_ad(index->online_log->head.bytes
+		      <= index->online_log->tail.bytes);
+	} else {
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
+
+	mrec_end = next_mrec_end;
+
+	while (!trx_is_interrupted(trx)) {
+		mrec = next_mrec;
+		ut_ad(mrec < mrec_end);
+
+		if (!has_index_lock) {
+			/* We are applying operations from a different
+			block than the one that is being written to.
+			We do not hold index->lock in order to
+			allow other threads to concurrently buffer
+			modifications. */
+			ut_ad(mrec >= index->online_log->head.block);
+			ut_ad(mrec_end == index->online_log->head.block
+			      + srv_sort_buf_size);
+			ut_ad(index->online_log->head.bytes
+			      < srv_sort_buf_size);
+
+			/* Take the opportunity to do a redo log
+			checkpoint if needed. */
+			log_free_check();
+		} else {
+			/* We are applying operations from the last block.
+			Do not allow other threads to buffer anything,
+			so that we can finally catch up and synchronize. */
+			ut_ad(index->online_log->head.blocks == 0);
+			ut_ad(index->online_log->tail.blocks == 0);
+			ut_ad(mrec_end == index->online_log->tail.block
+			      + index->online_log->tail.bytes);
+			ut_ad(mrec >= index->online_log->tail.block);
+		}
+
+		next_mrec = row_log_apply_op(
+			index, dup, &error, offsets_heap, heap,
+			has_index_lock, mrec, mrec_end, offsets);
+
+		if (error != DB_SUCCESS) {
+			goto func_exit;
+		} else if (next_mrec == next_mrec_end) {
+			/* The record happened to end on a block boundary.
+			Do we have more blocks left? */
+			if (has_index_lock) {
+				/* The index will be locked while
+				applying the last block. */
+				goto all_done;
+			}
+
+			mrec = NULL;
+process_next_block:
+			rw_lock_x_lock(dict_index_get_lock(index));
+			has_index_lock = true;
+
+			index->online_log->head.bytes = 0;
+			index->online_log->head.blocks++;
+			goto next_block;
+		} else if (next_mrec != NULL) {
+			ut_ad(next_mrec < next_mrec_end);
+			index->online_log->head.bytes += next_mrec - mrec;
+		} else if (has_index_lock) {
+			/* When mrec is within tail.block, it should
+			be a complete record, because we are holding
+			index->lock and thus excluding the writer. */
+			ut_ad(index->online_log->tail.blocks == 0);
+			ut_ad(mrec_end == index->online_log->tail.block
+			      + index->online_log->tail.bytes);
+			ut_ad(0);
+			goto unexpected_eof;
+		} else {
+			memcpy(index->online_log->head.buf, mrec,
+			       mrec_end - mrec);
+			mrec_end += index->online_log->head.buf - mrec;
+			mrec = index->online_log->head.buf;
+			goto process_next_block;
+		}
+	}
+
+interrupted:
+	error = DB_INTERRUPTED;
+func_exit:
+	if (!has_index_lock) {
+		rw_lock_x_lock(dict_index_get_lock(index));
+	}
+
+	switch (error) {
+	case DB_SUCCESS:
+		break;
+	case DB_INDEX_CORRUPT:
+		if (((os_offset_t) index->online_log->tail.blocks + 1)
+		    * srv_sort_buf_size >= srv_online_max_size) {
+			/* The log file grew too big. */
+			error = DB_ONLINE_LOG_TOO_BIG;
+		}
+		/* fall through */
+	default:
+		/* We set the flag directly instead of invoking
+		dict_set_corrupted_index_cache_only(index) here,
+		because the index is not "public" yet. */
+		index->type |= DICT_CORRUPT;
+	}
+
+	mem_heap_free(heap);
+	mem_heap_free(offsets_heap);
+	ut_free(offsets);
+	return(error);
+}
+
+/******************************************************//**
+Apply the row log to the index upon completing index creation.
+@return DB_SUCCESS, or error code on failure */
+UNIV_INTERN
+dberr_t
+row_log_apply(
+/*==========*/
+	trx_t*		trx,	/*!< in: transaction (for checking if
+				the operation was interrupted) */
+	dict_index_t*	index,	/*!< in/out: secondary index */
+	struct TABLE*	table)	/*!< in/out: MySQL table
+				(for reporting duplicates) */
+{
+	dberr_t		error;
+	row_log_t*	log;
+	row_merge_dup_t	dup = { index, table, NULL, 0 };
+
+	ut_ad(dict_index_is_online_ddl(index));
+	ut_ad(!dict_index_is_clust(index));
+
+	log_free_check();
+
+	rw_lock_x_lock(dict_index_get_lock(index));
+
+	if (!dict_table_is_corrupted(index->table)) {
+		error = row_log_apply_ops(trx, index, &dup);
+	} else {
+		error = DB_SUCCESS;
+	}
+
+	if (error != DB_SUCCESS || dup.n_dup) {
+		ut_a(!dict_table_is_discarded(index->table));
+		/* We set the flag directly instead of invoking
+		dict_set_corrupted_index_cache_only(index) here,
+		because the index is not "public" yet. */
+		index->type |= DICT_CORRUPT;
+		index->table->drop_aborted = TRUE;
+
+		if (error == DB_SUCCESS) {
+			error = DB_DUPLICATE_KEY;
+		}
+
+		dict_index_set_online_status(index, ONLINE_INDEX_ABORTED);
+	} else {
+		dict_index_set_online_status(index, ONLINE_INDEX_COMPLETE);
+	}
+
+	log = index->online_log;
+	index->online_log = NULL;
+	/* We could remove the TEMP_INDEX_PREFIX and update the data
+	dictionary to say that this index is complete, if we had
+	access to the .frm file here.  If the server crashes before
+	all requested indexes have been created, this completed index
+	will be dropped. */
+	rw_lock_x_unlock(dict_index_get_lock(index));
+
+	row_log_free(log);
+
+	return(error);
+}
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index cf662cb1f88..a509e2c5ca8 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,40 +26,18 @@ Completed by Sunny Bains and Marko Makela
 
 #include "row0merge.h"
 #include "row0ext.h"
-#include "row0row.h"
-#include "row0upd.h"
+#include "row0log.h"
 #include "row0ins.h"
 #include "row0sel.h"
-#include "dict0dict.h"
-#include "dict0mem.h"
-#include "dict0boot.h"
 #include "dict0crea.h"
-#include "dict0load.h"
-#include "btr0btr.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
 #include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "rem0cmp.h"
-#include "read0read.h"
-#include "os0file.h"
 #include "lock0lock.h"
-#include "data0data.h"
-#include "data0type.h"
-#include "que0que.h"
 #include "pars0pars.h"
-#include "mem0mem.h"
-#include "log0log.h"
 #include "ut0sort.h"
-#include "handler0alter.h"
-#include "fts0fts.h"
-#include "fts0types.h"
-#include "fts0priv.h"
 #include "row0ftsort.h"
+#include "row0import.h"
+#include "handler0alter.h"
+#include "ha_prototypes.h"
 
 /* Ignore posix_fadvise() on those platforms where it does not exist */
 #if defined __WIN__
@@ -69,8 +47,6 @@ Completed by Sunny Bains and Marko Makela
 #ifdef UNIV_DEBUG
 /** Set these in order ot enable debug printout. */
 /* @{ */
-/** Log the outcome of each row_merge_cmp() call, comparing records. */
-static ibool	row_merge_print_cmp;
 /** Log each record read from temporary file. */
 static ibool	row_merge_print_read;
 /** Log each record write to temporary file. */
@@ -86,39 +62,23 @@ static ibool	row_merge_print_block_write;
 #endif /* UNIV_DEBUG */
 
 /* Whether to disable file system cache */
-UNIV_INTERN char        srv_disable_sort_file_cache;
-
-/********************************************************************//**
-Read sorted file containing index data tuples and insert these data
-tuples to the index
-@return	DB_SUCCESS or error number */
-static
-ulint
-row_merge_insert_index_tuples(
-/*==========================*/
-	trx_t*			trx,	/*!< in: transaction */
-	dict_index_t*		index,	/*!< in: index */
-	dict_table_t*		table,	/*!< in: new table */
-	ulint			zip_size,/*!< in: compressed page size of
-					 the old table, or 0 if uncompressed */
-	int			fd,	/*!< in: file descriptor */
-	row_merge_block_t*	block);	/*!< in/out: file buffer */
+UNIV_INTERN char	srv_disable_sort_file_cache;
 
 #ifdef UNIV_DEBUG
 /******************************************************//**
 Display a merge tuple. */
-static
+static __attribute__((nonnull))
 void
 row_merge_tuple_print(
 /*==================*/
 	FILE*		f,	/*!< in: output stream */
-	const dfield_t*	entry,	/*!< in: tuple to print */
+	const mtuple_t*	entry,	/*!< in: tuple to print */
 	ulint		n_fields)/*!< in: number of fields in the tuple */
 {
 	ulint	j;
 
 	for (j = 0; j < n_fields; j++) {
-		const dfield_t*	field = &entry[j];
+		const dfield_t*	field = &entry->fields[j];
 
 		if (dfield_is_null(field)) {
 			fputs("\n NULL;", f);
@@ -141,16 +101,54 @@ row_merge_tuple_print(
 #endif /* UNIV_DEBUG */
 
 /******************************************************//**
+Encode an index record. */
+static __attribute__((nonnull))
+void
+row_merge_buf_encode(
+/*=================*/
+	byte**			b,		/*!< in/out: pointer to
+						current end of output buffer */
+	const dict_index_t*	index,		/*!< in: index */
+	const mtuple_t*		entry,		/*!< in: index fields
+						of the record to encode */
+	ulint			n_fields)	/*!< in: number of fields
+						in the entry */
+{
+	ulint	size;
+	ulint	extra_size;
+
+	size = rec_get_converted_size_temp(
+		index, entry->fields, n_fields, &extra_size);
+	ut_ad(size >= extra_size);
+
+	/* Encode extra_size + 1 */
+	if (extra_size + 1 < 0x80) {
+		*(*b)++ = (byte) (extra_size + 1);
+	} else {
+		ut_ad((extra_size + 1) < 0x8000);
+		*(*b)++ = (byte) (0x80 | ((extra_size + 1) >> 8));
+		*(*b)++ = (byte) (extra_size + 1);
+	}
+
+	rec_convert_dtuple_to_temp(*b + extra_size, index,
+				   entry->fields, n_fields);
+
+	*b += size;
+}
+
+/******************************************************//**
 Allocate a sort buffer.
 @return	own: sort buffer */
-static
+static __attribute__((malloc, nonnull))
 row_merge_buf_t*
 row_merge_buf_create_low(
 /*=====================*/
 	mem_heap_t*	heap,		/*!< in: heap where allocated */
 	dict_index_t*	index,		/*!< in: secondary index */
-	ulint		max_tuples,	/*!< in: maximum number of data tuples */
-	ulint		buf_size)	/*!< in: size of the buffer, in bytes */
+	ulint		max_tuples,	/*!< in: maximum number of
+					data tuples */
+	ulint		buf_size)	/*!< in: size of the buffer,
+					in bytes */
 {
 	row_merge_buf_t*	buf;
 
@@ -162,7 +160,7 @@ row_merge_buf_create_low(
 	buf->heap = heap;
 	buf->index = index;
 	buf->max_tuples = max_tuples;
-	buf->tuples = static_cast<const dfield_t**>(
+	buf->tuples = static_cast<mtuple_t*>(
 		ut_malloc(2 * max_tuples * sizeof *buf->tuples));
 	buf->tmp_tuples = buf->tuples + max_tuples;
 
@@ -204,13 +202,11 @@ row_merge_buf_empty(
 /*================*/
 	row_merge_buf_t*	buf)	/*!< in,own: sort buffer */
 {
-	ulint		buf_size;
+	ulint		buf_size	= sizeof *buf;
 	ulint		max_tuples	= buf->max_tuples;
 	mem_heap_t*	heap		= buf->heap;
 	dict_index_t*	index		= buf->index;
-	void*		tuple		= buf->tuples;
-
-	buf_size = (sizeof *buf);;
+	mtuple_t*	tuples		= buf->tuples;
 
 	mem_heap_empty(heap);
 
@@ -218,7 +214,7 @@ row_merge_buf_empty(
 	buf->heap = heap;
 	buf->index = index;
 	buf->max_tuples = max_tuples;
-	buf->tuples = static_cast<const dfield_t**>(tuple);
+	buf->tuples = tuples;
 	buf->tmp_tuples = buf->tuples + max_tuples;
 
 	return(buf);
@@ -230,7 +226,7 @@ UNIV_INTERN
 void
 row_merge_buf_free(
 /*===============*/
-	row_merge_buf_t*	buf)	/*!< in,own: sort buffer, to be freed */
+	row_merge_buf_t*	buf)	/*!< in,own: sort buffer to be freed */
 {
 	ut_free(buf->tuples);
 	mem_heap_free(buf->heap);
@@ -244,19 +240,18 @@ ulint
 row_merge_buf_add(
 /*==============*/
 	row_merge_buf_t*	buf,	/*!< in/out: sort buffer */
-	dict_index_t*		fts_index,/*!< fts index to be
-					created */
+	dict_index_t*		fts_index,/*!< in: fts index to be created */
+	const dict_table_t*	old_table,/*!< in: original table */
 	fts_psort_t*		psort_info, /*!< in: parallel sort info */
-	const dtuple_t*		row,	/*!< in: row in clustered index */
+	const dtuple_t*		row,	/*!< in: table row */
 	const row_ext_t*	ext,	/*!< in: cache of externally stored
 					column prefixes, or NULL */
 	doc_id_t*		doc_id)	/*!< in/out: Doc ID if we are
 					creating FTS index */
-
 {
 	ulint			i;
 	const dict_index_t*	index;
-	dfield_t*		entry;
+	mtuple_t*		entry;
 	dfield_t*		field;
 	const dict_field_t*	ifield;
 	ulint			n_fields;
@@ -267,9 +262,13 @@ row_merge_buf_add(
 	ulint			n_row_added = 0;
 
 	if (buf->n_tuples >= buf->max_tuples) {
-		return(FALSE);
+		return(0);
 	}
 
+	DBUG_EXECUTE_IF(
+		"ib_row_merge_buf_add_two",
+		if (buf->n_tuples >= 2) return(0););
+
 	UNIV_PREFETCH_R(row->fields);
 
 	/* If we are building FTS index, buf->index points to
@@ -279,11 +278,9 @@ row_merge_buf_add(
 
 	n_fields = dict_index_get_n_fields(index);
 
-	entry = static_cast<dfield_t*>(
-		mem_heap_alloc(buf->heap, n_fields * sizeof *entry));
-
-	buf->tuples[buf->n_tuples] = entry;
-	field = entry;
+	entry = &buf->tuples[buf->n_tuples];
+	field = entry->fields = static_cast<dfield_t*>(
+		mem_heap_alloc(buf->heap, n_fields * sizeof *entry->fields));
 
 	data_size = 0;
 	extra_size = UT_BITS_IN_BYTES(index->n_nullable);
@@ -294,31 +291,15 @@ row_merge_buf_add(
 		ulint			len;
 		const dict_col_t*	col;
 		ulint			col_no;
+		ulint			fixed_len;
 		const dfield_t*		row_field;
-		ibool			col_adjusted;
 
 		col = ifield->col;
 		col_no = dict_col_get_no(col);
-		col_adjusted = FALSE;
-
-		/* If we are creating a FTS index, a new Doc
-		ID column is being added, so we need to adjust
-		any column number positioned after this Doc ID */
-		if (*doc_id > 0
-		    && DICT_TF2_FLAG_IS_SET(index->table,
-                    			    DICT_TF2_FTS_ADD_DOC_ID)
-		    && col_no > index->table->fts->doc_col) {
-
-			ut_ad(index->table->fts);
-
-			col_no--;
-			col_adjusted = TRUE;
-		}
 
 		/* Process the Doc ID column */
 		if (*doc_id > 0
-		    && col_no == index->table->fts->doc_col
-		    && !col_adjusted) {
+		    && col_no == index->table->fts->doc_col) {
 			fts_write_doc_id((byte*) &write_doc_id, *doc_id);
 
 			/* Note: field->data now points to a value on the
@@ -435,9 +416,30 @@ row_merge_buf_add(
 
 		ut_ad(len <= col->len || col->mtype == DATA_BLOB);
 
-		if (ifield->fixed_len) {
-			ut_ad(len == ifield->fixed_len);
+		fixed_len = ifield->fixed_len;
+		if (fixed_len && !dict_table_is_comp(index->table)
+		    && DATA_MBMINLEN(col->mbminmaxlen)
+		    != DATA_MBMAXLEN(col->mbminmaxlen)) {
+			/* CHAR in ROW_FORMAT=REDUNDANT is always
+			fixed-length, but in the temporary file it is
+			variable-length for variable-length character
+			sets. */
+			fixed_len = 0;
+		}
+
+		if (fixed_len) {
+#ifdef UNIV_DEBUG
+			ulint	mbminlen = DATA_MBMINLEN(col->mbminmaxlen);
+			ulint	mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen);
+
+			/* len should be between size calcualted base on
+			mbmaxlen and mbminlen */
+			ut_ad(len <= fixed_len);
+			ut_ad(!mbmaxlen || len >= mbminlen
+			      * (fixed_len / mbmaxlen));
+
 			ut_ad(!dfield_is_ext(field));
+#endif /* UNIV_DEBUG */
 		} else if (dfield_is_ext(field)) {
 			extra_size += 2;
 		} else if (len < 128
@@ -464,12 +466,11 @@ row_merge_buf_add(
 		ulint	size;
 		ulint	extra;
 
-		size = rec_get_converted_size_comp(index,
-						   REC_STATUS_ORDINARY,
-						   entry, n_fields, &extra);
+		size = rec_get_converted_size_temp(
+			index, entry->fields, n_fields, &extra);
 
-		ut_ad(data_size + extra_size + REC_N_NEW_EXTRA_BYTES == size);
-		ut_ad(extra_size + REC_N_NEW_EXTRA_BYTES == extra);
+		ut_ad(data_size + extra_size == size);
+		ut_ad(extra_size == extra);
 	}
 #endif /* UNIV_DEBUG */
 
@@ -479,12 +480,6 @@ row_merge_buf_add(
 	of extra_size. */
 	data_size += (extra_size + 1) + ((extra_size + 1) >= 0x80);
 
-	/* The following assertion may fail if row_merge_block_t is
-	declared very small and a PRIMARY KEY is being created with
-	many prefix columns.  In that case, the record may exceed the
-	page_zip_rec_needs_ext() limit.  However, no further columns
-	will be moved to external storage until the record is inserted
-	to the clustered index B-tree. */
 	ut_ad(data_size < srv_sort_buf_size);
 
 	/* Reserve one byte for the end marker of row_merge_block_t. */
@@ -496,7 +491,7 @@ row_merge_buf_add(
 	buf->n_tuples++;
 	n_row_added++;
 
-	field = entry;
+	field = entry->fields;
 
 	/* Copy the data fields. */
 
@@ -509,118 +504,120 @@ row_merge_buf_add(
 
 /*************************************************************//**
 Report a duplicate key. */
-static
+UNIV_INTERN
 void
 row_merge_dup_report(
 /*=================*/
 	row_merge_dup_t*	dup,	/*!< in/out: for reporting duplicates */
 	const dfield_t*		entry)	/*!< in: duplicate index entry */
 {
-	mrec_buf_t* 		buf;
-	const dtuple_t*		tuple;
-	dtuple_t		tuple_store;
-	const rec_t*		rec;
-	const dict_index_t*	index	= dup->index;
-	ulint			n_fields= dict_index_get_n_fields(index);
-	mem_heap_t*		heap;
-	ulint*			offsets;
-	ulint			n_ext;
-
-	if (dup->n_dup++) {
+	if (!dup->n_dup++) {
 		/* Only report the first duplicate record,
 		but count all duplicate records. */
-		return;
+		innobase_fields_to_mysql(dup->table, dup->index, entry);
 	}
-
-	/* Convert the tuple to a record and then to MySQL format. */
-	heap = mem_heap_create((1 + REC_OFFS_HEADER_SIZE + n_fields)
-			       * sizeof *offsets
-			       + sizeof *buf);
-
-	buf = static_cast<mrec_buf_t*>(mem_heap_alloc(heap, sizeof *buf));
-
-	tuple = dtuple_from_fields(&tuple_store, entry, n_fields);
-	n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;
-
-	rec = rec_convert_dtuple_to_rec(*buf, index, tuple, n_ext);
-	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
-	innobase_rec_to_mysql(dup->table, rec, index, offsets);
-
-	mem_heap_free(heap);
 }
 
 /*************************************************************//**
 Compare two tuples.
 @return	1, 0, -1 if a is greater, equal, less, respectively, than b */
-static
+static __attribute__((warn_unused_result))
 int
 row_merge_tuple_cmp(
 /*================*/
+	ulint			n_uniq,	/*!< in: number of unique fields */
 	ulint			n_field,/*!< in: number of fields */
-	const dfield_t*		a,	/*!< in: first tuple to be compared */
-	const dfield_t*		b,	/*!< in: second tuple to be compared */
-	row_merge_dup_t*	dup)	/*!< in/out: for reporting duplicates */
+	const mtuple_t&		a,	/*!< in: first tuple to be compared */
+	const mtuple_t&		b,	/*!< in: second tuple to be compared */
+	row_merge_dup_t*	dup)	/*!< in/out: for reporting duplicates,
+					NULL if non-unique index */
 {
 	int		cmp;
-	const dfield_t*	field	= a;
+	const dfield_t*	af	= a.fields;
+	const dfield_t*	bf	= b.fields;
+	ulint		n	= n_uniq;
+
+	ut_ad(n_uniq > 0);
+	ut_ad(n_uniq <= n_field);
 
 	/* Compare the fields of the tuples until a difference is
 	found or we run out of fields to compare.  If !cmp at the
 	end, the tuples are equal. */
 	do {
-		cmp = cmp_dfield_dfield(a++, b++);
-	} while (!cmp && --n_field);
+		cmp = cmp_dfield_dfield(af++, bf++);
+	} while (!cmp && --n);
+
+	if (cmp) {
+		return(cmp);
+	}
 
-	if (UNIV_UNLIKELY(!cmp) && UNIV_LIKELY_NULL(dup)) {
+	if (dup) {
 		/* Report a duplicate value error if the tuples are
 		logically equal.  NULL columns are logically inequal,
 		although they are equal in the sorting order.  Find
 		out if any of the fields are NULL. */
-		for (b = field; b != a; b++) {
-			if (dfield_is_null(b)) {
-
-				goto func_exit;
+		for (const dfield_t* df = a.fields; df != af; df++) {
+			if (dfield_is_null(df)) {
+				goto no_report;
 			}
 		}
 
-		row_merge_dup_report(dup, field);
+		row_merge_dup_report(dup, a.fields);
 	}
 
-func_exit:
+no_report:
+	/* The n_uniq fields were equal, but we compare all fields so
+	that we will get the same (internal) order as in the B-tree. */
+	for (n = n_field - n_uniq + 1; --n; ) {
+		cmp = cmp_dfield_dfield(af++, bf++);
+		if (cmp) {
+			return(cmp);
+		}
+	}
+
+	/* This should never be reached, except in a secondary index
+	when creating a secondary index and a PRIMARY KEY, and there
+	is a duplicate in the PRIMARY KEY that has not been detected
+	yet. Internally, an index must never contain duplicates. */
 	return(cmp);
 }
 
 /** Wrapper for row_merge_tuple_sort() to inject some more context to
 UT_SORT_FUNCTION_BODY().
-@param a	array of tuples that being sorted
-@param b	aux (work area), same size as tuples[]
-@param c	lower bound of the sorting area, inclusive
-@param d	upper bound of the sorting area, inclusive */
-#define row_merge_tuple_sort_ctx(a,b,c,d) \
-	row_merge_tuple_sort(n_field, dup, a, b, c, d)
+@param tuples	array of tuples that being sorted
+@param aux	work area, same size as tuples[]
+@param low	lower bound of the sorting area, inclusive
+@param high	upper bound of the sorting area, inclusive */
+#define row_merge_tuple_sort_ctx(tuples, aux, low, high)		\
+	row_merge_tuple_sort(n_uniq, n_field, dup, tuples, aux, low, high)
 /** Wrapper for row_merge_tuple_cmp() to inject some more context to
 UT_SORT_FUNCTION_BODY().
 @param a	first tuple to be compared
 @param b	second tuple to be compared
 @return	1, 0, -1 if a is greater, equal, less, respectively, than b */
-#define row_merge_tuple_cmp_ctx(a,b) row_merge_tuple_cmp(n_field, a, b, dup)
+#define row_merge_tuple_cmp_ctx(a,b)			\
+	row_merge_tuple_cmp(n_uniq, n_field, a, b, dup)
 
 /**********************************************************************//**
 Merge sort the tuple buffer in main memory. */
-static
+static __attribute__((nonnull(4,5)))
 void
 row_merge_tuple_sort(
 /*=================*/
+	ulint			n_uniq,	/*!< in: number of unique fields */
 	ulint			n_field,/*!< in: number of fields */
-	row_merge_dup_t*	dup,	/*!< in/out: for reporting duplicates */
-	const dfield_t**	tuples,	/*!< in/out: tuples */
-	const dfield_t**	aux,	/*!< in/out: work area */
+	row_merge_dup_t*	dup,	/*!< in/out: reporter of duplicates
+					(NULL if non-unique index) */
+	mtuple_t*		tuples,	/*!< in/out: tuples */
+	mtuple_t*		aux,	/*!< in/out: work area */
 	ulint			low,	/*!< in: lower bound of the
 					sorting area, inclusive */
 	ulint			high)	/*!< in: upper bound of the
 					sorting area, exclusive */
 {
+	ut_ad(n_field > 0);
+	ut_ad(n_uniq <= n_field);
+
 	UT_SORT_FUNCTION_BODY(row_merge_tuple_sort_ctx,
 			      tuples, aux, low, high, row_merge_tuple_cmp_ctx);
 }
@@ -632,9 +629,12 @@ void
 row_merge_buf_sort(
 /*===============*/
 	row_merge_buf_t*	buf,	/*!< in/out: sort buffer */
-	row_merge_dup_t*	dup)	/*!< in/out: for reporting duplicates */
+	row_merge_dup_t*	dup)	/*!< in/out: reporter of duplicates
+					(NULL if non-unique index) */
 {
-	row_merge_tuple_sort(dict_index_get_n_unique(buf->index), dup,
+	row_merge_tuple_sort(dict_index_get_n_unique(buf->index),
+			     dict_index_get_n_fields(buf->index),
+			     dup,
 			     buf->tuples, buf->tmp_tuples, 0, buf->n_tuples);
 }
 
@@ -653,39 +653,11 @@ row_merge_buf_write(
 	ulint			n_fields= dict_index_get_n_fields(index);
 	byte*			b	= &block[0];
 
-	ulint		i;
-
-	for (i = 0; i < buf->n_tuples; i++) {
-		ulint		size;
-		ulint		extra_size;
-		const dfield_t*	entry		= buf->tuples[i];
-
-		size = rec_get_converted_size_comp(index,
-						   REC_STATUS_ORDINARY,
-						   entry, n_fields,
-						   &extra_size);
-		ut_ad(size >= extra_size);
-		ut_ad(extra_size >= REC_N_NEW_EXTRA_BYTES);
-		extra_size -= REC_N_NEW_EXTRA_BYTES;
-		size -= REC_N_NEW_EXTRA_BYTES;
-
-		/* Encode extra_size + 1 */
-		if (extra_size + 1 < 0x80) {
-			*b++ = (byte) (extra_size + 1);
-		} else {
-			ut_ad((extra_size + 1) < 0x8000);
-			*b++ = (byte) (0x80 | ((extra_size + 1) >> 8));
-			*b++ = (byte) (extra_size + 1);
-		}
-
-		ut_ad(b + size < &block[srv_sort_buf_size]);
-
-		rec_convert_dtuple_to_rec_comp(b + extra_size, 0, index,
-					       REC_STATUS_ORDINARY,
-					       entry, n_fields);
-
-		b += size;
+	for (ulint i = 0; i < buf->n_tuples; i++) {
+		const mtuple_t*	entry	= &buf->tuples[i];
 
+		row_merge_buf_encode(&b, index, entry, n_fields);
+		ut_ad(b < &block[srv_sort_buf_size]);
 #ifdef UNIV_DEBUG
 		if (row_merge_print_write) {
 			fprintf(stderr, "row_merge_buf_write %p,%d,%lu %lu",
@@ -744,36 +716,6 @@ row_merge_heap_create(
 	return(heap);
 }
 
-/**********************************************************************//**
-Search an index object by name and column names.  If several indexes match,
-return the index with the max id.
-@return	matching index, NULL if not found */
-static
-dict_index_t*
-row_merge_dict_table_get_index(
-/*===========================*/
-	dict_table_t*		table,		/*!< in: table */
-	const merge_index_def_t*index_def)	/*!< in: index definition */
-{
-	ulint		i;
-	dict_index_t*	index;
-	const char**	column_names;
-
-	column_names = static_cast<const char**>(
-		mem_alloc(index_def->n_fields * sizeof *column_names));
-
-	for (i = 0; i < index_def->n_fields; ++i) {
-		column_names[i] = index_def->fields[i].field_name;
-	}
-
-	index = dict_table_get_index_by_max_id(
-		table, index_def->name, column_names, index_def->n_fields);
-
-	mem_free((void*) column_names);
-
-	return(index);
-}
-
 /********************************************************************//**
 Read a merge block from the file system.
 @return	TRUE if request was successful, FALSE if fail */
@@ -790,6 +732,8 @@ row_merge_read(
 	os_offset_t	ofs = ((os_offset_t) offset) * srv_sort_buf_size;
 	ibool		success;
 
+	DBUG_EXECUTE_IF("row_merge_read_failure", return(FALSE););
+
 #ifdef UNIV_DEBUG
 	if (row_merge_print_block_read) {
 		fprintf(stderr, "row_merge_read fd=%d ofs=%lu\n",
@@ -837,6 +781,8 @@ row_merge_write(
 	os_offset_t	ofs = buf_len * (os_offset_t) offset;
 	ibool		ret;
 
+	DBUG_EXECUTE_IF("row_merge_write_failure", return(FALSE););
+
 	ret = os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf, ofs, buf_len);
 
 #ifdef UNIV_DEBUG
@@ -858,7 +804,7 @@ row_merge_write(
 /********************************************************************//**
 Read a merge record.
 @return	pointer to next record, or NULL on I/O error or end of list */
-UNIV_INTERN __attribute__((nonnull))
+UNIV_INTERN
 const byte*
 row_merge_read_rec(
 /*===============*/
@@ -934,7 +880,7 @@ err_exit:
 		case. */
 
 		avail_size = &block[srv_sort_buf_size] - b;
-
+		ut_ad(avail_size < sizeof *buf);
 		memcpy(*buf, b, avail_size);
 
 		if (!row_merge_read(fd, ++(*foffs), block)) {
@@ -951,7 +897,7 @@ err_exit:
 
 		*mrec = *buf + extra_size;
 
-		rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
+		rec_init_offsets_temp(*mrec, index, offsets);
 
 		data_size = rec_offs_data_size(offsets);
 
@@ -970,7 +916,7 @@ err_exit:
 
 	*mrec = b + extra_size;
 
-	rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
+	rec_init_offsets_temp(*mrec, index, offsets);
 
 	data_size = rec_offs_data_size(offsets);
 	ut_ad(extra_size + data_size < sizeof *buf);
@@ -1174,46 +1120,12 @@ row_merge_write_eof(
 	return(&block[0]);
 }
 
-/*************************************************************//**
-Compare two merge records.
-@return	1, 0, -1 if mrec1 is greater, equal, less, respectively, than mrec2 */
-UNIV_INTERN
-int
-row_merge_cmp(
-/*==========*/
-	const mrec_t*		mrec1,		/*!< in: first merge
-						record to be compared */
-	const mrec_t*		mrec2,		/*!< in: second merge
-						record to be compared */
-	const ulint*		offsets1,	/*!< in: first record offsets */
-	const ulint*		offsets2,	/*!< in: second record offsets */
-	const dict_index_t*	index,		/*!< in: index */
-	ibool*			null_eq)	/*!< out: set to TRUE if
-						found matching null values */
-{
-	int	cmp;
-
-	cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index,
-				 null_eq);
-
-#ifdef UNIV_DEBUG
-	if (row_merge_print_cmp) {
-		fputs("row_merge_cmp1 ", stderr);
-		rec_print_comp(stderr, mrec1, offsets1);
-		fputs("\nrow_merge_cmp2 ", stderr);
-		rec_print_comp(stderr, mrec2, offsets2);
-		fprintf(stderr, "\nrow_merge_cmp=%d\n", cmp);
-	}
-#endif /* UNIV_DEBUG */
-
-	return(cmp);
-}
 /********************************************************************//**
 Reads clustered index of the table and create temporary files
 containing the index entries for the indexes to be built.
 @return	DB_SUCCESS or error */
-static __attribute__((nonnull))
-ulint
+static __attribute__((nonnull(1,2,3,4,6,9,10,16), warn_unused_result))
+dberr_t
 row_merge_read_clustered_index(
 /*===========================*/
 	trx_t*			trx,	/*!< in: transaction */
@@ -1224,23 +1136,40 @@ row_merge_read_clustered_index(
 	const dict_table_t*	new_table,/*!< in: table where indexes are
 					created; identical to old_table
 					unless creating a PRIMARY KEY */
+	bool			online,	/*!< in: true if creating indexes
+					online */
 	dict_index_t**		index,	/*!< in: indexes to be created */
 	dict_index_t*		fts_sort_idx,
-					/*!< in: indexes to be created */
-	fts_psort_t*		psort_info, /*!< in: parallel sort info */
+					/*!< in: full-text index to be created,
+					or NULL */
+	fts_psort_t*		psort_info,
+					/*!< in: parallel sort info for
+					fts_sort_idx creation, or NULL */
 	merge_file_t*		files,	/*!< in: temporary files */
+	const ulint*		key_numbers,
+					/*!< in: MySQL key numbers to create */
 	ulint			n_index,/*!< in: number of indexes to create */
+	const dtuple_t*		add_cols,
+					/*!< in: default values of
+					added columns, or NULL */
+	const ulint*		col_map,/*!< in: mapping of old column
+					numbers to new ones, or NULL
+					if old_table == new_table */
+	ulint			add_autoinc,
+					/*!< in: number of added
+					AUTO_INCREMENT column, or
+					ULINT_UNDEFINED if none is added */
+	ib_sequence_t&		sequence,/*!< in/out: autoinc sequence */
 	row_merge_block_t*	block)	/*!< in/out: file buffer */
 {
 	dict_index_t*		clust_index;	/* Clustered index */
 	mem_heap_t*		row_heap;	/* Heap memory to create
-						clustered index records */
+						clustered index tuples */
 	row_merge_buf_t**	merge_buf;	/* Temporary list for records*/
-	btr_pcur_t		pcur;		/* Persistent cursor on the
-						clustered index */
+	btr_pcur_t		pcur;		/* Cursor on the clustered
+						index */
 	mtr_t			mtr;		/* Mini transaction */
-	ulint			err = DB_SUCCESS;/* Return code */
-	ulint			i;
+	dberr_t			err = DB_SUCCESS;/* Return code */
 	ulint			n_nonnull = 0;	/* number of columns
 						changed to NOT NULL */
 	ulint*			nonnull = NULL;	/* NOT NULL columns */
@@ -1252,13 +1181,10 @@ row_merge_read_clustered_index(
 	ibool			fts_pll_sort = FALSE;
 	ib_int64_t		sig_count = 0;
 
-	trx->op_info = "reading clustered index";
+	ut_ad((old_table == new_table) == !col_map);
+	ut_ad(!add_cols || col_map);
 
-	ut_ad(trx);
-	ut_ad(old_table);
-	ut_ad(new_table);
-	ut_ad(index);
-	ut_ad(files);
+	trx->op_info = "reading clustered index";
 
 #ifdef FTS_INTERNAL_DIAG_PRINT
 	DEBUG_FTS_SORT_PRINT("FTS_SORT: Start Create Index\n");
@@ -1269,8 +1195,7 @@ row_merge_read_clustered_index(
 	merge_buf = static_cast<row_merge_buf_t**>(
 		mem_alloc(n_index * sizeof *merge_buf));
 
-
-	for (i = 0; i < n_index; i++) {
+	for (ulint i = 0; i < n_index; i++) {
 		if (index[i]->type & DICT_FTS) {
 
 			/* We are building a FT index, make sure
@@ -1282,14 +1207,14 @@ row_merge_read_clustered_index(
 			merge_buf[i] = row_merge_buf_create(fts_sort_idx);
 
 			add_doc_id = DICT_TF2_FLAG_IS_SET(
-				old_table, DICT_TF2_FTS_ADD_DOC_ID);
+				new_table, DICT_TF2_FTS_ADD_DOC_ID);
 
 			/* If Doc ID does not exist in the table itself,
 			fetch the first FTS Doc ID */
 			if (add_doc_id) {
 				fts_get_next_doc_id(
 					(dict_table_t*) new_table,
-					 &doc_id);
+					&doc_id);
 				ut_ad(doc_id > 0);
 			}
 
@@ -1310,35 +1235,34 @@ row_merge_read_clustered_index(
 	clust_index = dict_table_get_first_index(old_table);
 
 	btr_pcur_open_at_index_side(
-		TRUE, clust_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
-
-	if (UNIV_UNLIKELY(old_table != new_table)) {
-		ulint	n_cols = dict_table_get_n_cols(old_table);
+		true, clust_index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
 
-		/* A primary key will be created.  Identify the
-		columns that were flagged NOT NULL in the new table,
-		so that we can quickly check that the records in the
-		(old) clustered index do not violate the added NOT
-		NULL constraints. */
-
-		if (!fts_sort_idx) {
-			ut_a(n_cols == dict_table_get_n_cols(new_table));
-		}
+	if (old_table != new_table) {
+		/* The table is being rebuilt.  Identify the columns
+		that were flagged NOT NULL in the new table, so that
+		we can quickly check that the records in the old table
+		do not violate the added NOT NULL constraints. */
 
 		nonnull = static_cast<ulint*>(
-			mem_alloc(n_cols * sizeof *nonnull));
+			mem_alloc(dict_table_get_n_cols(new_table)
+				  * sizeof *nonnull));
 
-		for (i = 0; i < n_cols; i++) {
+		for (ulint i = 0; i < dict_table_get_n_cols(old_table); i++) {
 			if (dict_table_get_nth_col(old_table, i)->prtype
 			    & DATA_NOT_NULL) {
+				continue;
+			}
+
+			const ulint j = col_map[i];
 
+			if (j == ULINT_UNDEFINED) {
+				/* The column was dropped. */
 				continue;
 			}
 
-			if (dict_table_get_nth_col(new_table, i)->prtype
+			if (dict_table_get_nth_col(new_table, j)->prtype
 			    & DATA_NOT_NULL) {
-
-				nonnull[n_nonnull++] = i;
+				nonnull[n_nonnull++] = j;
 			}
 		}
 
@@ -1354,81 +1278,221 @@ row_merge_read_clustered_index(
 	for (;;) {
 		const rec_t*	rec;
 		ulint*		offsets;
-		dtuple_t*	row		= NULL;
+		const dtuple_t*	row;
 		row_ext_t*	ext;
-		ibool		has_next	= TRUE;
-
-		btr_pcur_move_to_next_on_page(&pcur);
+		page_cur_t*	cur	= btr_pcur_get_page_cur(&pcur);
 
-		/* When switching pages, commit the mini-transaction
-		in order to release the latch on the old page. */
+		page_cur_move_to_next(cur);
 
-		if (btr_pcur_is_after_last_on_page(&pcur)) {
+		if (page_cur_is_after_last(cur)) {
 			if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
 				err = DB_INTERRUPTED;
 				trx->error_key_num = 0;
 				goto func_exit;
 			}
 
-			/* Store the cursor position on the last user
-			record on the page. */
-			btr_pcur_move_to_prev_on_page(&pcur);
-			/* Leaf pages must never be empty, unless
-			this is the only page in the index tree. */
-			ut_ad(btr_pcur_is_on_user_rec(&pcur)
-			      || buf_block_get_page_no(
-				      btr_pcur_get_block(&pcur))
-			      == clust_index->page);
-
-			btr_pcur_store_position(&pcur, &mtr);
-			mtr_commit(&mtr);
-			mtr_start(&mtr);
-			/* Restore position on the record, or its
-			predecessor if the record was purged
-			meanwhile. */
-			btr_pcur_restore_position(BTR_SEARCH_LEAF,
-						  &pcur, &mtr);
-			/* Move to the successor of the original record. */
-			has_next = btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+			if (online && old_table != new_table) {
+				err = row_log_table_get_error(clust_index);
+				if (err != DB_SUCCESS) {
+					trx->error_key_num = 0;
+					goto func_exit;
+				}
+			}
+#ifdef DBUG_OFF
+# define dbug_run_purge	false
+#else /* DBUG_OFF */
+			bool	dbug_run_purge = false;
+#endif /* DBUG_OFF */
+			DBUG_EXECUTE_IF(
+				"ib_purge_on_create_index_page_switch",
+				dbug_run_purge = true;);
+
+			if (dbug_run_purge
+			    || rw_lock_get_waiters(
+				    dict_index_get_lock(clust_index))) {
+				/* There are waiters on the clustered
+				index tree lock, likely the purge
+				thread. Store and restore the cursor
+				position, and yield so that scanning a
+				large table will not starve other
+				threads. */
+
+				/* Store the cursor position on the last user
+				record on the page. */
+				btr_pcur_move_to_prev_on_page(&pcur);
+				/* Leaf pages must never be empty, unless
+				this is the only page in the index tree. */
+				ut_ad(btr_pcur_is_on_user_rec(&pcur)
+				      || buf_block_get_page_no(
+					      btr_pcur_get_block(&pcur))
+				      == clust_index->page);
+
+				btr_pcur_store_position(&pcur, &mtr);
+				mtr_commit(&mtr);
+
+				if (dbug_run_purge) {
+					/* This is for testing
+					purposes only (see
+					DBUG_EXECUTE_IF above).  We
+					signal the purge thread and
+					hope that the purge batch will
+					complete before we execute
+					btr_pcur_restore_position(). */
+					trx_purge_run();
+					os_thread_sleep(1000000);
+				}
+
+				/* Give the waiters a chance to proceed. */
+				os_thread_yield();
+
+				mtr_start(&mtr);
+				/* Restore position on the record, or its
+				predecessor if the record was purged
+				meanwhile. */
+				btr_pcur_restore_position(
+					BTR_SEARCH_LEAF, &pcur, &mtr);
+				/* Move to the successor of the
+				original record. */
+				if (!btr_pcur_move_to_next_user_rec(
+					    &pcur, &mtr)) {
+end_of_index:
+					row = NULL;
+					mtr_commit(&mtr);
+					mem_heap_free(row_heap);
+					if (nonnull) {
+						mem_free(nonnull);
+					}
+					goto write_buffers;
+				}
+			} else {
+				ulint		next_page_no;
+				buf_block_t*	block;
+
+				next_page_no = btr_page_get_next(
+					page_cur_get_page(cur), &mtr);
+
+				if (next_page_no == FIL_NULL) {
+					goto end_of_index;
+				}
+
+				block = page_cur_get_block(cur);
+				block = btr_block_get(
+					buf_block_get_space(block),
+					buf_block_get_zip_size(block),
+					next_page_no, BTR_SEARCH_LEAF,
+					clust_index, &mtr);
+
+				btr_leaf_page_release(page_cur_get_block(cur),
+						      BTR_SEARCH_LEAF, &mtr);
+				page_cur_set_before_first(block, cur);
+				page_cur_move_to_next(cur);
+
+				ut_ad(!page_cur_is_after_last(cur));
+			}
 		}
 
-		if (UNIV_LIKELY(has_next)) {
-			rec = btr_pcur_get_rec(&pcur);
-			offsets = rec_get_offsets(rec, clust_index, NULL,
-						  ULINT_UNDEFINED, &row_heap);
+		rec = page_cur_get_rec(cur);
+
+		offsets = rec_get_offsets(rec, clust_index, NULL,
+					  ULINT_UNDEFINED, &row_heap);
+
+		if (online && new_table != old_table) {
+			/* When rebuilding the table online, perform a
+			REPEATABLE READ, so that row_log_table_apply()
+			will not see a newer state of the table when
+			applying the log.  This is mainly to prevent
+			false duplicate key errors, because the log
+			will identify records by the PRIMARY KEY. */
+			ut_ad(trx->read_view);
+
+			if (!read_view_sees_trx_id(
+				    trx->read_view,
+				    row_get_rec_trx_id(
+					    rec, clust_index, offsets))) {
+				rec_t*	old_vers;
+
+				row_vers_build_for_consistent_read(
+					rec, &mtr, clust_index, &offsets,
+					trx->read_view, &row_heap,
+					row_heap, &old_vers);
+
+				rec = old_vers;
+
+				if (!rec) {
+					continue;
+				}
+			}
 
-			/* Skip delete marked records. */
 			if (rec_get_deleted_flag(
-				    rec, dict_table_is_comp(old_table))) {
+				    rec,
+				    dict_table_is_comp(old_table))) {
+				/* This record was deleted in the latest
+				committed version, or it was deleted and
+				then reinserted-by-update before purge
+				kicked in. Skip it. */
 				continue;
 			}
 
-			srv_n_rows_inserted++;
+			ut_ad(!rec_offs_any_null_extern(rec, offsets));
+		} else if (rec_get_deleted_flag(
+				   rec, dict_table_is_comp(old_table))) {
+			/* Skip delete-marked records.
+
+			Skipping delete-marked records will make the
+			created indexes unuseable for transactions
+			whose read views were created before the index
+			creation completed, but preserving the history
+			would make it tricky to detect duplicate
+			keys. */
+			continue;
+		} else if (UNIV_LIKELY_NULL(rec_offs_any_null_extern(
+						    rec, offsets))) {
+			/* This is essentially a READ UNCOMMITTED to
+			fetch the most recent version of the record. */
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+			trx_id_t	trx_id;
+			ulint		trx_id_offset;
+
+			/* It is possible that the record was
+			just inserted and the off-page columns
+			have not yet been written. We will
+			ignore the record if this is the case,
+			because it should be covered by the
+			index->info.online log in that case. */
+
+			trx_id_offset = clust_index->trx_id_offset;
+			if (!trx_id_offset) {
+				trx_id_offset = row_get_trx_id_offset(
+					clust_index, offsets);
+			}
 
-			/* Build a row based on the clustered index. */
+			trx_id = trx_read_trx_id(rec + trx_id_offset);
+			ut_a(trx_rw_is_active(trx_id, NULL));
+			ut_a(trx_undo_trx_id_is_insert(rec + trx_id_offset));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
-			row = row_build(ROW_COPY_POINTERS, clust_index,
-					rec, offsets,
-					new_table, &ext, row_heap);
+			/* When !online, we are holding an X-lock on
+			old_table, preventing any inserts. */
+			ut_ad(online);
+			continue;
+		}
 
-			if (UNIV_LIKELY_NULL(nonnull)) {
-				for (i = 0; i < n_nonnull; i++) {
-					dfield_t*	field
-						= &row->fields[nonnull[i]];
-					dtype_t*	field_type
-						= dfield_get_type(field);
+		/* Build a row based on the clustered index. */
 
-					ut_a(!(field_type->prtype
-					       & DATA_NOT_NULL));
+		row = row_build(ROW_COPY_POINTERS, clust_index,
+				rec, offsets, new_table,
+				add_cols, col_map, &ext, row_heap);
+		ut_ad(row);
 
-					if (dfield_is_null(field)) {
-						err = DB_PRIMARY_KEY_IS_NULL;
-						trx->error_key_num = 0;
-						goto func_exit;
-					}
+		for (ulint i = 0; i < n_nonnull; i++) {
+			const dfield_t*	field	= &row->fields[nonnull[i]];
 
-					field_type->prtype |= DATA_NOT_NULL;
-				}
+			ut_ad(dfield_get_type(field)->prtype & DATA_NOT_NULL);
+
+			if (dfield_is_null(field)) {
+				err = DB_INVALID_NULL;
+				trx->error_key_num = 0;
+				goto func_exit;
 			}
 		}
 
@@ -1439,19 +1503,72 @@ row_merge_read_clustered_index(
 			doc_id = 0;
 		}
 
+		if (add_autoinc != ULINT_UNDEFINED) {
+
+			ut_ad(add_autoinc
+			      < dict_table_get_n_user_cols(new_table));
+
+			const dfield_t*	dfield;
+
+			dfield = dtuple_get_nth_field(row, add_autoinc);
+			if (dfield_is_null(dfield)) {
+				goto write_buffers;
+			}
+
+			const dtype_t*  dtype = dfield_get_type(dfield);
+			byte*	b = static_cast<byte*>(dfield_get_data(dfield));
+
+			if (sequence.eof()) {
+				err = DB_ERROR;
+				trx->error_key_num = 0;
+
+				ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+					ER_AUTOINC_READ_FAILED, "[NULL]");
+
+				goto func_exit;
+			}
+
+			ulonglong	value = sequence++;
+
+			switch (dtype_get_mtype(dtype)) {
+			case DATA_INT: {
+				ibool	usign;
+				ulint	len = dfield_get_len(dfield);
+
+				usign = dtype_get_prtype(dtype) & DATA_UNSIGNED;
+				mach_write_ulonglong(b, value, len, usign);
+
+				break;
+				}
+
+			case DATA_FLOAT:
+				mach_float_write(
+					b, static_cast<float>(value));
+				break;
+
+			case DATA_DOUBLE:
+				mach_double_write(
+					b, static_cast<double>(value));
+				break;
+
+			default:
+				ut_ad(0);
+			}
+		}
+
+write_buffers:
 		/* Build all entries for all the indexes to be created
 		in a single scan of the clustered index. */
 
-		for (i = 0; i < n_index; i++) {
+		for (ulint i = 0; i < n_index; i++) {
 			row_merge_buf_t*	buf	= merge_buf[i];
 			merge_file_t*		file	= &files[i];
-			const dict_index_t*	index	= buf->index;
 			ulint			rows_added = 0;
 
 			if (UNIV_LIKELY
 			    (row && (rows_added = row_merge_buf_add(
-				buf, fts_index, psort_info,
-				row, ext, &doc_id)))) {
+					buf, fts_index, old_table,
+					psort_info, row, ext, &doc_id)))) {
 
 				/* If we are creating FTS index,
 				a single row can generate more
@@ -1464,35 +1581,60 @@ row_merge_read_clustered_index(
 				continue;
 			}
 
-			if ((!row || !doc_id)
-			    && index->type & DICT_FTS) {
+			if ((buf->index->type & DICT_FTS)
+			    && (!row || !doc_id)) {
 				continue;
 			}
 
 			/* The buffer must be sufficiently large
-			to hold at least one record. */
-			ut_ad(buf->n_tuples || !has_next);
+			to hold at least one record. It may only
+			be empty when we reach the end of the
+			clustered index. row_merge_buf_add()
+			must not have been called in this loop. */
+			ut_ad(buf->n_tuples || row == NULL);
 
 			/* We have enough data tuples to form a block.
 			Sort them and write to disk. */
 
 			if (buf->n_tuples) {
-				if (dict_index_is_unique(index)) {
-					row_merge_dup_t	dup;
-					dup.index = buf->index;
-					dup.table = table;
-					dup.n_dup = 0;
+				if (dict_index_is_unique(buf->index)) {
+					row_merge_dup_t	dup = {
+						buf->index, table, col_map, 0};
 
 					row_merge_buf_sort(buf, &dup);
 
 					if (dup.n_dup) {
 						err = DB_DUPLICATE_KEY;
-						trx->error_key_num = i;
-						goto func_exit;
+						trx->error_key_num
+							= key_numbers[i];
+						break;
 					}
 				} else {
 					row_merge_buf_sort(buf, NULL);
 				}
+			} else if (online && new_table == old_table) {
+				/* Note the newest transaction that
+				modified this index when the scan was
+				completed. We prevent older readers
+				from accessing this index, to ensure
+				read consistency. */
+
+				trx_id_t	max_trx_id;
+
+				ut_a(row == NULL);
+				rw_lock_x_lock(
+					dict_index_get_lock(buf->index));
+				ut_a(dict_index_get_online_status(buf->index)
+				     == ONLINE_INDEX_CREATION);
+
+				max_trx_id = row_log_get_max_trx(buf->index);
+
+				if (max_trx_id > buf->index->trx_id) {
+					buf->index->trx_id = max_trx_id;
+				}
+
+				rw_lock_x_unlock(
+					dict_index_get_lock(buf->index));
 			}
 
 			row_merge_buf_write(buf, file, block);
@@ -1501,7 +1643,7 @@ row_merge_read_clustered_index(
 					     block)) {
 				err = DB_OUT_OF_FILE_SPACE;
 				trx->error_key_num = i;
-				goto func_exit;
+				break;
 			}
 
 			UNIV_MEM_INVALID(&block[0], srv_sort_buf_size);
@@ -1514,14 +1656,11 @@ row_merge_read_clustered_index(
 
 				if (UNIV_UNLIKELY
 				    (!(rows_added = row_merge_buf_add(
-					buf, fts_index, psort_info, row,
-					ext, &doc_id)))) {
+						buf, fts_index, old_table,
+						psort_info, row, ext,
+						&doc_id)))) {
 					/* An empty buffer should have enough
-					room for at least one record.
-					TODO: for FTS index building, we'll
-					need to prepared for coping with very
-					large text/blob data in a single row
-					that could fill up the merge file */
+					room for at least one record. */
 					ut_error;
 				}
 
@@ -1529,27 +1668,40 @@ row_merge_read_clustered_index(
 			}
 		}
 
-		mem_heap_empty(row_heap);
+		if (row == NULL) {
+			goto all_done;
+		}
 
-		if (UNIV_UNLIKELY(!has_next)) {
+		if (err != DB_SUCCESS) {
 			goto func_exit;
 		}
+
+		mem_heap_empty(row_heap);
 	}
 
 func_exit:
+	mtr_commit(&mtr);
+	mem_heap_free(row_heap);
+
+	if (nonnull) {
+		mem_free(nonnull);
+	}
+
+all_done:
 #ifdef FTS_INTERNAL_DIAG_PRINT
 	DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Scan Table\n");
 #endif
 	if (fts_pll_sort) {
-		for (i = 0; i < fts_sort_pll_degree; i++) {
+		for (ulint i = 0; i < fts_sort_pll_degree; i++) {
 			psort_info[i].state = FTS_PARENT_COMPLETE;
 		}
 wait_again:
 		os_event_wait_time_low(fts_parallel_sort_event,
 				       1000000, sig_count);
 
-		for (i = 0; i < fts_sort_pll_degree; i++) {
-			if (psort_info[i].child_status != FTS_CHILD_COMPLETE) {
+		for (ulint i = 0; i < fts_sort_pll_degree; i++) {
+			if (psort_info[i].child_status != FTS_CHILD_COMPLETE
+			    && psort_info[i].child_status != FTS_CHILD_EXITING) {
 				sig_count = os_event_reset(
 					fts_parallel_sort_event);
 				goto wait_again;
@@ -1560,17 +1712,7 @@ wait_again:
 #ifdef FTS_INTERNAL_DIAG_PRINT
 	DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Tokenization\n");
 #endif
-
-	btr_pcur_close(&pcur);
-	mtr_commit(&mtr);
-	mem_heap_free(row_heap);
-
-	if (UNIV_LIKELY_NULL(nonnull)) {
-		mem_free(nonnull);
-	}
-
-
-	for (i = 0; i < n_index; i++) {
+	for (ulint i = 0; i < n_index; i++) {
 		row_merge_buf_free(merge_buf[i]);
 	}
 
@@ -1578,10 +1720,13 @@ wait_again:
 
 	mem_free(merge_buf);
 
+	btr_pcur_close(&pcur);
+
 	/* Update the next Doc ID we used. Table should be locked, so
 	no concurrent DML */
 	if (max_doc_id) {
-		fts_update_next_doc_id(new_table, old_table->name, max_doc_id);
+		fts_update_next_doc_id(
+			0, new_table, old_table->name, max_doc_id);
 	}
 
 	trx->op_info = "";
@@ -1590,24 +1735,20 @@ wait_again:
 }
 
 /** Write a record via buffer 2 and read the next record to buffer N.
-@param M	FTS merge info structure
-@param N	index into array of merge info structure
-@param INDEX	the FTS index */
-
-
-/** Write a record via buffer 2 and read the next record to buffer N.
 @param N	number of the buffer (0 or 1)
+@param INDEX	record descriptor
 @param AT_END	statement to execute at end of input */
-#define ROW_MERGE_WRITE_GET_NEXT(N, AT_END)				\
+#define ROW_MERGE_WRITE_GET_NEXT(N, INDEX, AT_END)			\
 	do {								\
-		b2 = row_merge_write_rec(&block[2 * srv_sort_buf_size], &buf[2], b2,	\
+		b2 = row_merge_write_rec(&block[2 * srv_sort_buf_size], \
+					 &buf[2], b2,			\
 					 of->fd, &of->offset,		\
 					 mrec##N, offsets##N);		\
 		if (UNIV_UNLIKELY(!b2 || ++of->n_rec > file->n_rec)) {	\
 			goto corrupt;					\
 		}							\
-		b##N = row_merge_read_rec(&block[N * srv_sort_buf_size], &buf[N],		\
-					  b##N, index,			\
+		b##N = row_merge_read_rec(&block[N * srv_sort_buf_size],\
+					  &buf[N], b##N, INDEX,		\
 					  file->fd, foffs##N,		\
 					  &mrec##N, offsets##N);	\
 		if (UNIV_UNLIKELY(!b##N)) {				\
@@ -1621,11 +1762,12 @@ wait_again:
 /*************************************************************//**
 Merge two blocks of records on disk and write a bigger block.
 @return	DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_merge_blocks(
 /*=============*/
-	const dict_index_t*	index,	/*!< in: index being created */
+	const row_merge_dup_t*	dup,	/*!< in: descriptor of
+					index being created */
 	const merge_file_t*	file,	/*!< in: file containing
 					index entries */
 	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
@@ -1633,20 +1775,18 @@ row_merge_blocks(
 					source list in the file */
 	ulint*			foffs1,	/*!< in/out: offset of second
 					source list in the file */
-	merge_file_t*		of,	/*!< in/out: output file */
-	struct TABLE*		table)	/*!< in/out: MySQL table, for
-					reporting erroneous key value
-					if applicable */
+	merge_file_t*		of)	/*!< in/out: output file */
 {
 	mem_heap_t*	heap;	/*!< memory heap for offsets0, offsets1 */
 
 	mrec_buf_t*	buf;	/*!< buffer for handling
 				split mrec in block[] */
 	const byte*	b0;	/*!< pointer to block[0] */
-	const byte*	b1;	/*!< pointer to block[1] */
-	byte*		b2;	/*!< pointer to block[2] */
+	const byte*	b1;	/*!< pointer to block[srv_sort_buf_size] */
+	byte*		b2;	/*!< pointer to block[2 * srv_sort_buf_size] */
 	const mrec_t*	mrec0;	/*!< merge rec, points to block[0] or buf[0] */
-	const mrec_t*	mrec1;	/*!< merge rec, points to block[1] or buf[1] */
+	const mrec_t*	mrec1;	/*!< merge rec, points to
+				block[srv_sort_buf_size] or buf[1] */
 	ulint*		offsets0;/* offsets of mrec0 */
 	ulint*		offsets1;/* offsets of mrec1 */
 
@@ -1661,7 +1801,7 @@ row_merge_blocks(
 	}
 #endif /* UNIV_DEBUG */
 
-	heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
+	heap = row_merge_heap_create(dup->index, &buf, &offsets0, &offsets1);
 
 	/* Write a record and read the next record.  Split the output
 	file in two halves, which can be merged on the following pass. */
@@ -1677,10 +1817,13 @@ corrupt:
 	b1 = &block[srv_sort_buf_size];
 	b2 = &block[2 * srv_sort_buf_size];
 
-	b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd,
-				foffs0, &mrec0, offsets0);
-	b1 = row_merge_read_rec(&block[srv_sort_buf_size], &buf[srv_sort_buf_size], b1, index, file->fd,
-				foffs1, &mrec1, offsets1);
+	b0 = row_merge_read_rec(
+		&block[0], &buf[0], b0, dup->index,
+		file->fd, foffs0, &mrec0, offsets0);
+	b1 = row_merge_read_rec(
+		&block[srv_sort_buf_size],
+		&buf[srv_sort_buf_size], b1, dup->index,
+		file->fd, foffs1, &mrec1, offsets1);
 	if (UNIV_UNLIKELY(!b0 && mrec0)
 	    || UNIV_UNLIKELY(!b1 && mrec1)) {
 
@@ -1688,56 +1831,49 @@ corrupt:
 	}
 
 	while (mrec0 && mrec1) {
-		ibool	null_eq = FALSE;
-		switch (row_merge_cmp(mrec0, mrec1,
-				      offsets0, offsets1, index,
-				      &null_eq)) {
+		switch (cmp_rec_rec_simple(
+				mrec0, mrec1, offsets0, offsets1,
+				dup->index, dup->table)) {
 		case 0:
-			if (UNIV_UNLIKELY
-			    (dict_index_is_unique(index) && !null_eq)) {
-				innobase_rec_to_mysql(table, mrec0,
-						      index, offsets0);
-				mem_heap_free(heap);
-				return(DB_DUPLICATE_KEY);
-			}
-			/* fall through */
+			mem_heap_free(heap);
+			return(DB_DUPLICATE_KEY);
 		case -1:
-			ROW_MERGE_WRITE_GET_NEXT(0, goto merged);
+			ROW_MERGE_WRITE_GET_NEXT(0, dup->index, goto merged);
 			break;
 		case 1:
-			ROW_MERGE_WRITE_GET_NEXT(1, goto merged);
+			ROW_MERGE_WRITE_GET_NEXT(1, dup->index, goto merged);
 			break;
 		default:
 			ut_error;
 		}
-
 	}
 
 merged:
 	if (mrec0) {
 		/* append all mrec0 to output */
 		for (;;) {
-			ROW_MERGE_WRITE_GET_NEXT(0, goto done0);
+			ROW_MERGE_WRITE_GET_NEXT(0, dup->index, goto done0);
 		}
 	}
 done0:
 	if (mrec1) {
 		/* append all mrec1 to output */
 		for (;;) {
-			ROW_MERGE_WRITE_GET_NEXT(1, goto done1);
+			ROW_MERGE_WRITE_GET_NEXT(1, dup->index, goto done1);
 		}
 	}
 done1:
 
 	mem_heap_free(heap);
-	b2 = row_merge_write_eof(&block[2 * srv_sort_buf_size], b2, of->fd, &of->offset);
+	b2 = row_merge_write_eof(&block[2 * srv_sort_buf_size],
+				 b2, of->fd, &of->offset);
 	return(b2 ? DB_SUCCESS : DB_CORRUPTION);
 }
 
 /*************************************************************//**
 Copy a block of index entries.
 @return	TRUE on success, FALSE on failure */
-static __attribute__((nonnull))
+static __attribute__((nonnull, warn_unused_result))
 ibool
 row_merge_blocks_copy(
 /*==================*/
@@ -1752,7 +1888,7 @@ row_merge_blocks_copy(
 	mrec_buf_t*	buf;	/*!< buffer for handling
 				split mrec in block[] */
 	const byte*	b0;	/*!< pointer to block[0] */
-	byte*		b2;	/*!< pointer to block[2] */
+	byte*		b2;	/*!< pointer to block[2 * srv_sort_buf_size] */
 	const mrec_t*	mrec0;	/*!< merge rec, points to block[0] */
 	ulint*		offsets0;/* offsets of mrec0 */
 	ulint*		offsets1;/* dummy offsets */
@@ -1782,8 +1918,8 @@ corrupt:
 
 	b2 = &block[2 * srv_sort_buf_size];
 
-	b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd,
-				foffs0, &mrec0, offsets0);
+	b0 = row_merge_read_rec(&block[0], &buf[0], b0, index,
+				file->fd, foffs0, &mrec0, offsets0);
 	if (UNIV_UNLIKELY(!b0 && mrec0)) {
 
 		goto corrupt;
@@ -1792,7 +1928,7 @@ corrupt:
 	if (mrec0) {
 		/* append all mrec0 to output */
 		for (;;) {
-			ROW_MERGE_WRITE_GET_NEXT(0, goto done0);
+			ROW_MERGE_WRITE_GET_NEXT(0, index, goto done0);
 		}
 	}
 done0:
@@ -1802,7 +1938,8 @@ done0:
 	(*foffs0)++;
 
 	mem_heap_free(heap);
-	return(row_merge_write_eof(&block[2 * srv_sort_buf_size], b2, of->fd, &of->offset)
+	return(row_merge_write_eof(&block[2 * srv_sort_buf_size],
+				   b2, of->fd, &of->offset)
 	       != NULL);
 }
 
@@ -1810,18 +1947,16 @@ done0:
 Merge disk files.
 @return	DB_SUCCESS or error code */
 static __attribute__((nonnull))
-ulint
+dberr_t
 row_merge(
 /*======*/
 	trx_t*			trx,	/*!< in: transaction */
-	const dict_index_t*	index,	/*!< in: index being created */
+	const row_merge_dup_t*	dup,	/*!< in: descriptor of
+					index being created */
 	merge_file_t*		file,	/*!< in/out: file containing
 					index entries */
 	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
 	int*			tmpfd,	/*!< in/out: temporary file handle */
-	struct TABLE*		table,	/*!< in/out: MySQL table, for
-					reporting erroneous key value
-					if applicable */
 	ulint*			num_run,/*!< in/out: Number of runs remain
 					to be merged */
 	ulint*			run_offset) /*!< in/out: Array contains the
@@ -1830,7 +1965,7 @@ row_merge(
 {
 	ulint		foffs0;	/*!< first input offset */
 	ulint		foffs1;	/*!< second input offset */
-	ulint		error;	/*!< error code */
+	dberr_t		error;	/*!< error code */
 	merge_file_t	of;	/*!< output file */
 	const ulint	ihalf	= run_offset[*num_run / 2];
 				/*!< half the input file */
@@ -1861,15 +1996,15 @@ row_merge(
 
 	for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) {
 
-		if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
+		if (trx_is_interrupted(trx)) {
 			return(DB_INTERRUPTED);
 		}
 
 		/* Remember the offset number for this run */
 		run_offset[n_run++] = of.offset;
 
-		error = row_merge_blocks(index, file, block,
-					 &foffs0, &foffs1, &of, table);
+		error = row_merge_blocks(dup, file, block,
+					 &foffs0, &foffs1, &of);
 
 		if (error != DB_SUCCESS) {
 			return(error);
@@ -1887,7 +2022,8 @@ row_merge(
 		/* Remember the offset number for this run */
 		run_offset[n_run++] = of.offset;
 
-		if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) {
+		if (!row_merge_blocks_copy(dup->index, file, block,
+					   &foffs0, &of)) {
 			return(DB_CORRUPTION);
 		}
 	}
@@ -1895,14 +2031,15 @@ row_merge(
 	ut_ad(foffs0 == ihalf);
 
 	while (foffs1 < file->offset) {
-		if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
+		if (trx_is_interrupted(trx)) {
 			return(DB_INTERRUPTED);
 		}
 
 		/* Remember the offset number for this run */
 		run_offset[n_run++] = of.offset;
 
-		if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) {
+		if (!row_merge_blocks_copy(dup->index, file, block,
+					   &foffs1, &of)) {
 			return(DB_CORRUPTION);
 		}
 	}
@@ -1940,23 +2077,21 @@ row_merge(
 Merge disk files.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 row_merge_sort(
 /*===========*/
 	trx_t*			trx,	/*!< in: transaction */
-	const dict_index_t*	index,	/*!< in: index being created */
+	const row_merge_dup_t*	dup,	/*!< in: descriptor of
+					index being created */
 	merge_file_t*		file,	/*!< in/out: file containing
 					index entries */
 	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
-	int*			tmpfd,	/*!< in/out: temporary file handle */
-	struct TABLE*		table)	/*!< in/out: MySQL table, for
-					reporting erroneous key value
-					if applicable */
+	int*			tmpfd)	/*!< in/out: temporary file handle */
 {
-	ulint	half = file->offset / 2;
-	ulint	num_runs;
-	ulint*	run_offset;
-	ulint	error = DB_SUCCESS;
+	const ulint	half	= file->offset / 2;
+	ulint		num_runs;
+	ulint*		run_offset;
+	dberr_t		error	= DB_SUCCESS;
 
 	/* Record the number of merge runs we need to perform */
 	num_runs = file->offset;
@@ -1979,14 +2114,14 @@ row_merge_sort(
 
 	/* Merge the runs until we have one big run */
 	do {
-		error = row_merge(trx, index, file, block, tmpfd,
-				  table, &num_runs, run_offset);
-
-		UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset);
+		error = row_merge(trx, dup, file, block, tmpfd,
+				  &num_runs, run_offset);
 
 		if (error != DB_SUCCESS) {
 			break;
 		}
+
+		UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset);
 	} while (num_runs > 1);
 
 	mem_free(run_offset);
@@ -1995,8 +2130,25 @@ row_merge_sort(
 }
 
 /*************************************************************//**
+Set blob fields empty */
+static __attribute__((nonnull))
+void
+row_merge_set_blob_empty(
+/*=====================*/
+	dtuple_t*	tuple)	/*!< in/out: data tuple */
+{
+	for (ulint i = 0; i < dtuple_get_n_fields(tuple); i++) {
+		dfield_t*	field = dtuple_get_nth_field(tuple, i);
+
+		if (dfield_is_ext(field)) {
+			dfield_set_data(field, NULL, 0);
+		}
+	}
+}
+
+/*************************************************************//**
 Copy externally stored columns to the data tuple. */
-static
+static __attribute__((nonnull))
 void
 row_merge_copy_blobs(
 /*=================*/
@@ -2006,10 +2158,9 @@ row_merge_copy_blobs(
 	dtuple_t*	tuple,	/*!< in/out: data tuple */
 	mem_heap_t*	heap)	/*!< in/out: memory heap */
 {
-	ulint	i;
-	ulint	n_fields = dtuple_get_n_fields(tuple);
+	ut_ad(rec_offs_any_extern(offsets));
 
-	for (i = 0; i < n_fields; i++) {
+	for (ulint i = 0; i < dtuple_get_n_fields(tuple); i++) {
 		ulint		len;
 		const void*	data;
 		dfield_t*	field = dtuple_get_nth_field(tuple, i);
@@ -2020,11 +2171,12 @@ row_merge_copy_blobs(
 
 		ut_ad(!dfield_is_null(field));
 
-		/* The table is locked during index creation.
-		Therefore, externally stored columns cannot possibly
-		be freed between the time the BLOB pointers are read
-		(row_merge_read_clustered_index()) and dereferenced
-		(below). */
+		/* During the creation of a PRIMARY KEY, the table is
+		X-locked, and we skip copying records that have been
+		marked for deletion. Therefore, externally stored
+		columns cannot possibly be freed between the time the
+		BLOB pointers are read (row_merge_read_clustered_index())
+		and dereferenced (below). */
 		data = btr_rec_copy_externally_stored_field(
 			mrec, offsets, zip_size, i, &len, heap);
 		/* Because we have locked the table, any records
@@ -2041,54 +2193,38 @@ row_merge_copy_blobs(
 Read sorted file containing index data tuples and insert these data
 tuples to the index
 @return	DB_SUCCESS or error number */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_merge_insert_index_tuples(
 /*==========================*/
-	trx_t*			trx,	/*!< in: transaction */
+	trx_id_t		trx_id,	/*!< in: transaction identifier */
 	dict_index_t*		index,	/*!< in: index */
-	dict_table_t*		table,	/*!< in: new table */
-	ulint			zip_size,/*!< in: compressed page size of
-					 the old table, or 0 if uncompressed */
+	const dict_table_t*	old_table,/*!< in: old table */
 	int			fd,	/*!< in: file descriptor */
 	row_merge_block_t*	block)	/*!< in/out: file buffer */
 {
 	const byte*		b;
-	que_thr_t*		thr;
-	ins_node_t*		node;
+	mem_heap_t*		heap;
 	mem_heap_t*		tuple_heap;
-	mem_heap_t*		graph_heap;
-	ulint			error = DB_SUCCESS;
+	mem_heap_t*		ins_heap;
+	dberr_t			error = DB_SUCCESS;
 	ulint			foffs = 0;
 	ulint*			offsets;
+	mrec_buf_t*		buf;
 
-	ut_ad(trx);
-	ut_ad(index);
-	ut_ad(table);
-
+	ut_ad(!srv_read_only_mode);
 	ut_ad(!(index->type & DICT_FTS));
-
-	/* We use the insert query graph as the dummy graph
-	needed in the row module call */
-
-	trx->op_info = "inserting index entries";
-
-	graph_heap = mem_heap_create(500 + sizeof(mrec_buf_t));
-	node = ins_node_create(INS_DIRECT, table, graph_heap);
-
-	thr = pars_complete_graph_for_exec(node, trx, graph_heap);
-
-	que_thr_move_to_run_state_for_mysql(thr, trx);
+	ut_ad(trx_id);
 
 	tuple_heap = mem_heap_create(1000);
 
 	{
 		ulint i	= 1 + REC_OFFS_HEADER_SIZE
 			+ dict_index_get_n_fields(index);
-
+		heap = mem_heap_create(sizeof *buf + i * sizeof *offsets);
+		ins_heap = mem_heap_create(sizeof *buf + i * sizeof *offsets);
 		offsets = static_cast<ulint*>(
-			mem_heap_alloc(graph_heap, i * sizeof *offsets));
-
+			mem_heap_alloc(heap, i * sizeof *offsets));
 		offsets[0] = i;
 		offsets[1] = dict_index_get_n_fields(index);
 	}
@@ -2098,15 +2234,17 @@ row_merge_insert_index_tuples(
 	if (!row_merge_read(fd, foffs, block)) {
 		error = DB_CORRUPTION;
 	} else {
-		mrec_buf_t*	buf;
-
 		buf = static_cast<mrec_buf_t*>(
-			mem_heap_alloc(graph_heap, sizeof *buf));
+			mem_heap_alloc(heap, sizeof *buf));
 
 		for (;;) {
 			const mrec_t*	mrec;
 			dtuple_t*	dtuple;
 			ulint		n_ext;
+			big_rec_t*	big_rec;
+			rec_t*		rec;
+			btr_cur_t	cursor;
+			mtr_t		mtr;
 
 			b = row_merge_read_rec(block, buf, b, index,
 					       fd, &foffs, &mrec, offsets);
@@ -2118,55 +2256,164 @@ row_merge_insert_index_tuples(
 				break;
 			}
 
+			dict_index_t*	old_index
+				= dict_table_get_first_index(old_table);
+
+			if (dict_index_is_clust(index)
+			    && dict_index_is_online_ddl(old_index)) {
+				error = row_log_table_get_error(old_index);
+				if (error != DB_SUCCESS) {
+					break;
+				}
+			}
+
 			dtuple = row_rec_to_index_entry_low(
 				mrec, index, offsets, &n_ext, tuple_heap);
 
-			if (UNIV_UNLIKELY(n_ext)) {
-				row_merge_copy_blobs(mrec, offsets, zip_size,
-						     dtuple, tuple_heap);
-			}
+			if (!n_ext) {
+				/* There are no externally stored columns. */
+			} else if (!dict_index_is_online_ddl(old_index)) {
+				ut_ad(dict_index_is_clust(index));
+				/* Modifications to the table are
+				blocked while we are not rebuilding it
+				or creating indexes. Off-page columns
+				can be fetched safely. */
+				row_merge_copy_blobs(
+					mrec, offsets,
+					dict_table_zip_size(old_table),
+					dtuple, tuple_heap);
+			} else {
+				ut_ad(dict_index_is_clust(index));
 
-			node->row = dtuple;
-			node->table = table;
-			node->trx_id = trx->id;
+				ulint	offset = index->trx_id_offset;
 
-			ut_ad(dtuple_validate(dtuple));
+				if (!offset) {
+					offset = row_get_trx_id_offset(
+						index, offsets);
+				}
 
-			do {
-				thr->run_node = thr;
-				thr->prev_node = thr->common.parent;
+				/* Copy the off-page columns while
+				holding old_index->lock, so
+				that they cannot be freed by
+				a rollback of a fresh insert. */
+				rw_lock_s_lock(&old_index->lock);
+
+				if (row_log_table_is_rollback(
+					    old_index,
+					    trx_read_trx_id(mrec + offset))) {
+					/* The row and BLOB could
+					already be freed. They
+					will be deleted by
+					row_undo_ins_remove_clust_rec
+					when rolling back a fresh
+					insert. So, no need to retrieve
+					the off-page column. */
+					row_merge_set_blob_empty(
+						dtuple);
+				} else {
+					row_merge_copy_blobs(
+						mrec, offsets,
+						dict_table_zip_size(old_table),
+						dtuple, tuple_heap);
+				}
 
-				error = row_ins_index_entry(index, dtuple,
-							    0, FALSE, thr);
+				rw_lock_s_unlock(&old_index->lock);
+			}
 
-				if (UNIV_LIKELY(error == DB_SUCCESS)) {
+			ut_ad(dtuple_validate(dtuple));
+			log_free_check();
 
-					goto next_rec;
-				}
+			mtr_start(&mtr);
+			/* Insert after the last user record. */
+			btr_cur_open_at_index_side(
+				false, index, BTR_MODIFY_LEAF,
+				&cursor, 0, &mtr);
+			page_cur_position(
+				page_rec_get_prev(btr_cur_get_rec(&cursor)),
+				btr_cur_get_block(&cursor),
+				btr_cur_get_page_cur(&cursor));
+			cursor.flag = BTR_CUR_BINARY;
+#ifdef UNIV_DEBUG
+			/* Check that the records are inserted in order. */
+			rec = btr_cur_get_rec(&cursor);
+
+			if (!page_rec_is_infimum(rec)) {
+				ulint*	rec_offsets = rec_get_offsets(
+					rec, index, offsets,
+					ULINT_UNDEFINED, &tuple_heap);
+				ut_ad(cmp_dtuple_rec(dtuple, rec, rec_offsets)
+				      > 0);
+			}
+#endif /* UNIV_DEBUG */
+			ulint*	ins_offsets = NULL;
+
+			error = btr_cur_optimistic_insert(
+				BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
+				| BTR_KEEP_SYS_FLAG | BTR_CREATE_FLAG,
+				&cursor, &ins_offsets, &ins_heap,
+				dtuple, &rec, &big_rec, 0, NULL, &mtr);
+
+			if (error == DB_FAIL) {
+				ut_ad(!big_rec);
+				mtr_commit(&mtr);
+				mtr_start(&mtr);
+				btr_cur_open_at_index_side(
+					false, index, BTR_MODIFY_TREE,
+					&cursor, 0, &mtr);
+				page_cur_position(
+					page_rec_get_prev(btr_cur_get_rec(
+								  &cursor)),
+					btr_cur_get_block(&cursor),
+					btr_cur_get_page_cur(&cursor));
+
+				error = btr_cur_pessimistic_insert(
+					BTR_NO_UNDO_LOG_FLAG
+					| BTR_NO_LOCKING_FLAG
+					| BTR_KEEP_SYS_FLAG | BTR_CREATE_FLAG,
+					&cursor, &ins_offsets, &ins_heap,
+					dtuple, &rec, &big_rec, 0, NULL, &mtr);
+			}
+
+			if (!dict_index_is_clust(index)) {
+				page_update_max_trx_id(
+					btr_cur_get_block(&cursor),
+					btr_cur_get_page_zip(&cursor),
+					trx_id, &mtr);
+			}
 
-				thr->lock_state = QUE_THR_LOCK_ROW;
+			mtr_commit(&mtr);
 
-				trx->error_state = static_cast<enum db_err>(
-					error);
+			if (UNIV_LIKELY_NULL(big_rec)) {
+				/* If the system crashes at this
+				point, the clustered index record will
+				contain a null BLOB pointer. This
+				should not matter, because the copied
+				table will be dropped on crash
+				recovery anyway. */
+
+				ut_ad(dict_index_is_clust(index));
+				ut_ad(error == DB_SUCCESS);
+				error = row_ins_index_entry_big_rec(
+					dtuple, big_rec,
+					ins_offsets, &ins_heap,
+					index, NULL, __FILE__, __LINE__);
+				dtuple_convert_back_big_rec(
+					index, dtuple, big_rec);
+			}
 
-				que_thr_stop_for_mysql(thr);
-				thr->lock_state = QUE_THR_LOCK_NOLOCK;
-			} while (row_mysql_handle_errors(&error, trx,
-							 thr, NULL));
+			if (error != DB_SUCCESS) {
+				goto err_exit;
+			}
 
-			goto err_exit;
-next_rec:
 			mem_heap_empty(tuple_heap);
+			mem_heap_empty(ins_heap);
 		}
 	}
 
-	que_thr_stop_for_mysql_no_error(thr, trx);
 err_exit:
-	que_graph_free(thr->graph);
-
-	trx->op_info = "";
-
 	mem_heap_free(tuple_heap);
+	mem_heap_free(ins_heap);
+	mem_heap_free(heap);
 
 	return(error);
 }
@@ -2175,7 +2422,7 @@ err_exit:
 Sets an exclusive lock on a table, for the duration of creating indexes.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 row_merge_lock_table(
 /*=================*/
 	trx_t*		trx,		/*!< in/out: transaction */
@@ -2184,10 +2431,10 @@ row_merge_lock_table(
 {
 	mem_heap_t*	heap;
 	que_thr_t*	thr;
-	ulint		err;
+	dberr_t		err;
 	sel_node_t*	node;
 
-	ut_ad(trx);
+	ut_ad(!srv_read_only_mode);
 	ut_ad(mode == LOCK_X || mode == LOCK_S);
 
 	heap = mem_heap_create(512);
@@ -2213,7 +2460,7 @@ run_again:
 
 	err = lock_table(0, table, mode, thr);
 
-	trx->error_state =static_cast<enum db_err>( err);
+	trx->error_state = err;
 
 	if (UNIV_LIKELY(err == DB_SUCCESS)) {
 		que_thr_stop_for_mysql_no_error(thr, trx);
@@ -2221,7 +2468,7 @@ run_again:
 		que_thr_stop_for_mysql(thr);
 
 		if (err != DB_QUE_THR_SUSPENDED) {
-			ibool	was_lock_wait;
+			bool	was_lock_wait;
 
 			was_lock_wait = row_mysql_handle_errors(
 				&err, trx, thr, NULL);
@@ -2255,105 +2502,312 @@ run_again:
 }
 
 /*********************************************************************//**
-Drop an index from the InnoDB system tables.  The data dictionary must
-have been locked exclusively by the caller, because the transaction
-will not be committed. */
-UNIV_INTERN
+Drop an index that was created before an error occurred.
+The data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
+static
 void
-row_merge_drop_index(
-/*=================*/
-	dict_index_t*	index,	/*!< in: index to be removed */
-	dict_table_t*	table,	/*!< in: table */
-	trx_t*		trx)	/*!< in: transaction handle */
+row_merge_drop_index_dict(
+/*======================*/
+	trx_t*		trx,	/*!< in/out: dictionary transaction */
+	index_id_t	index_id)/*!< in: index identifier */
 {
-	db_err		err;
-	pars_info_t*	info = pars_info_create();
-
-	/* We use the private SQL parser of Innobase to generate the
-	query graphs needed in deleting the dictionary data from system
-	tables in Innobase. Deleting a row from SYS_INDEXES table also
-	frees the file segments of the B-tree associated with the index. */
-
 	static const char sql[] =
 		"PROCEDURE DROP_INDEX_PROC () IS\n"
 		"BEGIN\n"
-		/* Rename the index, so that it will be dropped by
-		row_merge_drop_temp_indexes() at crash recovery
-		if the server crashes before this trx is committed. */
-		"UPDATE SYS_INDEXES SET NAME=CONCAT('"
-		TEMP_INDEX_PREFIX_STR "', NAME) WHERE ID = :indexid;\n"
-		"COMMIT WORK;\n"
-		/* Drop the field definitions of the index. */
-		"DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"
-		/* Drop the index definition and the B-tree. */
-		"DELETE FROM SYS_INDEXES WHERE ID = :indexid;\n"
+		"DELETE FROM SYS_FIELDS WHERE INDEX_ID=:indexid;\n"
+		"DELETE FROM SYS_INDEXES WHERE ID=:indexid;\n"
 		"END;\n";
+	dberr_t		error;
+	pars_info_t*	info;
 
-	ut_ad(index && table && trx);
+	ut_ad(!srv_read_only_mode);
+	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+	ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
 
-	pars_info_add_ull_literal(info, "indexid", index->id);
+	info = pars_info_create();
+	pars_info_add_ull_literal(info, "indexid", index_id);
+	trx->op_info = "dropping index from dictionary";
+	error = que_eval_sql(info, sql, FALSE, trx);
 
-	trx_start_if_not_started_xa(trx);
-	trx->op_info = "dropping index";
+	if (error != DB_SUCCESS) {
+		/* Even though we ensure that DDL transactions are WAIT
+		and DEADLOCK free, we could encounter other errors e.g.,
+		DB_TOO_MANY_CONCURRENT_TRXS. */
+		trx->error_state = DB_SUCCESS;
 
-	ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+		ut_print_timestamp(stderr);
+		fprintf(stderr, " InnoDB: Error: row_merge_drop_index_dict "
+			"failed with error code: %u.\n", (unsigned) error);
+	}
 
-	err = static_cast<db_err>(que_eval_sql(info, sql, FALSE, trx));
+	trx->op_info = "";
+}
 
-	DBUG_EXECUTE_IF(
-		"ib_drop_index_too_many_concurrent_trxs",
-		err = DB_TOO_MANY_CONCURRENT_TRXS;
-		trx->error_state = err;);
+/*********************************************************************//**
+Drop indexes that were created before an error occurred.
+The data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
+UNIV_INTERN
+void
+row_merge_drop_indexes_dict(
+/*========================*/
+	trx_t*		trx,	/*!< in/out: dictionary transaction */
+	table_id_t	table_id)/*!< in: table identifier */
+{
+	static const char sql[] =
+		"PROCEDURE DROP_INDEXES_PROC () IS\n"
+		"ixid CHAR;\n"
+		"found INT;\n"
 
-	if (err == DB_SUCCESS) {
+		"DECLARE CURSOR index_cur IS\n"
+		" SELECT ID FROM SYS_INDEXES\n"
+		" WHERE TABLE_ID=:tableid AND\n"
+		" SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "'\n"
+		"FOR UPDATE;\n"
 
-		/* If it is FTS index, drop from table->fts and also drop
-		its auxiliary tables */
-		if (index->type & DICT_FTS) {
-			ut_a(table->fts);
-			fts_drop_index(table, index, trx);
-		}
+		"BEGIN\n"
+		"found := 1;\n"
+		"OPEN index_cur;\n"
+		"WHILE found = 1 LOOP\n"
+		"  FETCH index_cur INTO ixid;\n"
+		"  IF (SQL % NOTFOUND) THEN\n"
+		"    found := 0;\n"
+		"  ELSE\n"
+		"    DELETE FROM SYS_FIELDS WHERE INDEX_ID=ixid;\n"
+		"    DELETE FROM SYS_INDEXES WHERE CURRENT OF index_cur;\n"
+		"  END IF;\n"
+		"END LOOP;\n"
+		"CLOSE index_cur;\n"
 
-		/* Replace this index with another equivalent index for all
-		foreign key constraints on this table where this index is
-		used */
+		"END;\n";
+	dberr_t		error;
+	pars_info_t*	info;
 
-		dict_table_replace_index_in_foreign_list(table, index, trx);
-		dict_index_remove_from_cache(table, index);
+	ut_ad(!srv_read_only_mode);
+	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+	ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
 
-	} else {
+	/* It is possible that table->n_ref_count > 1 when
+	locked=TRUE. In this case, all code that should have an open
+	handle to the table be waiting for the next statement to execute,
+	or waiting for a meta-data lock.
+
+	A concurrent purge will be prevented by dict_operation_lock. */
+
+	info = pars_info_create();
+	pars_info_add_ull_literal(info, "tableid", table_id);
+	trx->op_info = "dropping indexes";
+	error = que_eval_sql(info, sql, FALSE, trx);
+
+	if (error != DB_SUCCESS) {
 		/* Even though we ensure that DDL transactions are WAIT
 		and DEADLOCK free, we could encounter other errors e.g.,
-		DB_TOO_MANY_TRANSACTIONS. */
+		DB_TOO_MANY_CONCURRENT_TRXS. */
 		trx->error_state = DB_SUCCESS;
 
 		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Error: row_merge_drop_index failed "
-			"with error code: %lu.\n", (ulint) err);
+		fprintf(stderr, " InnoDB: Error: row_merge_drop_indexes_dict "
+			"failed with error code: %u.\n", (unsigned) error);
 	}
 
 	trx->op_info = "";
 }
 
 /*********************************************************************//**
-Drop those indexes which were created before an error occurred when
-building an index.  The data dictionary must have been locked
-exclusively by the caller, because the transaction will not be
-committed. */
+Drop indexes that were created before an error occurred.
+The data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
 UNIV_INTERN
 void
 row_merge_drop_indexes(
 /*===================*/
-	trx_t*		trx,		/*!< in: transaction */
-	dict_table_t*	table,		/*!< in: table containing the indexes */
-	dict_index_t**	index,		/*!< in: indexes to drop */
-	ulint		num_created)	/*!< in: number of elements in index[] */
+	trx_t*		trx,	/*!< in/out: dictionary transaction */
+	dict_table_t*	table,	/*!< in/out: table containing the indexes */
+	ibool		locked)	/*!< in: TRUE=table locked,
+				FALSE=may need to do a lazy drop */
 {
-	ulint	key_num;
+	dict_index_t*	index;
+	dict_index_t*	next_index;
+
+	ut_ad(!srv_read_only_mode);
+	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+	ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	index = dict_table_get_first_index(table);
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(dict_index_get_online_status(index) == ONLINE_INDEX_COMPLETE);
+
+	/* the caller should have an open handle to the table */
+	ut_ad(table->n_ref_count >= 1);
+
+	/* It is possible that table->n_ref_count > 1 when
+	locked=TRUE. In this case, all code that should have an open
+	handle to the table be waiting for the next statement to execute,
+	or waiting for a meta-data lock.
+
+	A concurrent purge will be prevented by dict_operation_lock. */
+
+	if (!locked && table->n_ref_count > 1) {
+		/* We will have to drop the indexes later, when the
+		table is guaranteed to be no longer in use.  Mark the
+		indexes as incomplete and corrupted, so that other
+		threads will stop using them.  Let dict_table_close()
+		or crash recovery or the next invocation of
+		prepare_inplace_alter_table() take care of dropping
+		the indexes. */
+
+		while ((index = dict_table_get_next_index(index)) != NULL) {
+			ut_ad(!dict_index_is_clust(index));
+
+			switch (dict_index_get_online_status(index)) {
+			case ONLINE_INDEX_ABORTED_DROPPED:
+				continue;
+			case ONLINE_INDEX_COMPLETE:
+				if (*index->name != TEMP_INDEX_PREFIX) {
+					/* Do nothing to already
+					published indexes. */
+				} else if (index->type & DICT_FTS) {
+					/* Drop a completed FULLTEXT
+					index, due to a timeout during
+					MDL upgrade for
+					commit_inplace_alter_table().
+					Because only concurrent reads
+					are allowed (and they are not
+					seeing this index yet) we
+					are safe to drop the index. */
+					dict_index_t* prev = UT_LIST_GET_PREV(
+						indexes, index);
+					/* At least there should be
+					the clustered index before
+					this one. */
+					ut_ad(prev);
+					ut_a(table->fts);
+					fts_drop_index(table, index, trx);
+					/* Since
+					INNOBASE_SHARE::idx_trans_tbl
+					is shared between all open
+					ha_innobase handles to this
+					table, no thread should be
+					accessing this dict_index_t
+					object. Also, we should be
+					holding LOCK=SHARED MDL on the
+					table even after the MDL
+					upgrade timeout. */
+
+					/* We can remove a DICT_FTS
+					index from the cache, because
+					we do not allow ADD FULLTEXT INDEX
+					with LOCK=NONE. If we allowed that,
+					we should exclude FTS entries from
+					prebuilt->ins_node->entry_list
+					in ins_node_create_entry_list(). */
+					dict_index_remove_from_cache(
+						table, index);
+					index = prev;
+				} else {
+					rw_lock_x_lock(
+						dict_index_get_lock(index));
+					dict_index_set_online_status(
+						index, ONLINE_INDEX_ABORTED);
+					index->type |= DICT_CORRUPT;
+					table->drop_aborted = TRUE;
+					goto drop_aborted;
+				}
+				continue;
+			case ONLINE_INDEX_CREATION:
+				rw_lock_x_lock(dict_index_get_lock(index));
+				ut_ad(*index->name == TEMP_INDEX_PREFIX);
+				row_log_abort_sec(index);
+			drop_aborted:
+				rw_lock_x_unlock(dict_index_get_lock(index));
+
+				DEBUG_SYNC_C("merge_drop_index_after_abort");
+				/* covered by dict_sys->mutex */
+				MONITOR_INC(MONITOR_BACKGROUND_DROP_INDEX);
+				/* fall through */
+			case ONLINE_INDEX_ABORTED:
+				/* Drop the index tree from the
+				data dictionary and free it from
+				the tablespace, but keep the object
+				in the data dictionary cache. */
+				row_merge_drop_index_dict(trx, index->id);
+				rw_lock_x_lock(dict_index_get_lock(index));
+				dict_index_set_online_status(
+					index, ONLINE_INDEX_ABORTED_DROPPED);
+				rw_lock_x_unlock(dict_index_get_lock(index));
+				table->drop_aborted = TRUE;
+				continue;
+			}
+			ut_error;
+		}
 
-	for (key_num = 0; key_num < num_created; key_num++) {
-		row_merge_drop_index(index[key_num], table, trx);
+		return;
 	}
+
+	row_merge_drop_indexes_dict(trx, table->id);
+
+	/* Invalidate all row_prebuilt_t::ins_graph that are referring
+	to this table. That is, force row_get_prebuilt_insert_row() to
+	rebuild prebuilt->ins_node->entry_list). */
+	ut_ad(table->def_trx_id <= trx->id);
+	table->def_trx_id = trx->id;
+
+	next_index = dict_table_get_next_index(index);
+
+	while ((index = next_index) != NULL) {
+		/* read the next pointer before freeing the index */
+		next_index = dict_table_get_next_index(index);
+
+		ut_ad(!dict_index_is_clust(index));
+
+		if (*index->name == TEMP_INDEX_PREFIX) {
+			/* If it is FTS index, drop from table->fts
+			and also drop its auxiliary tables */
+			if (index->type & DICT_FTS) {
+				ut_a(table->fts);
+				fts_drop_index(table, index, trx);
+			}
+
+			switch (dict_index_get_online_status(index)) {
+			case ONLINE_INDEX_CREATION:
+				/* This state should only be possible
+				when prepare_inplace_alter_table() fails
+				after invoking row_merge_create_index().
+				In inplace_alter_table(),
+				row_merge_build_indexes()
+				should never leave the index in this state.
+				It would invoke row_log_abort_sec() on
+				failure. */
+			case ONLINE_INDEX_COMPLETE:
+				/* In these cases, we are able to drop
+				the index straight. The DROP INDEX was
+				never deferred. */
+				break;
+			case ONLINE_INDEX_ABORTED:
+			case ONLINE_INDEX_ABORTED_DROPPED:
+				/* covered by dict_sys->mutex */
+				MONITOR_DEC(MONITOR_BACKGROUND_DROP_INDEX);
+			}
+
+			dict_index_remove_from_cache(table, index);
+		}
+	}
+
+	table->drop_aborted = FALSE;
+	ut_d(dict_table_check_for_dup_indexes(table, CHECK_ALL_COMPLETE));
 }
 
 /*********************************************************************//**
@@ -2363,9 +2817,32 @@ void
 row_merge_drop_temp_indexes(void)
 /*=============================*/
 {
-	trx_t*		trx;
-	btr_pcur_t	pcur;
-	mtr_t		mtr;
+	static const char sql[] =
+		"PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
+		"ixid CHAR;\n"
+		"found INT;\n"
+
+		"DECLARE CURSOR index_cur IS\n"
+		" SELECT ID FROM SYS_INDEXES\n"
+		" WHERE SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "'\n"
+		"FOR UPDATE;\n"
+
+		"BEGIN\n"
+		"found := 1;\n"
+		"OPEN index_cur;\n"
+		"WHILE found = 1 LOOP\n"
+		"  FETCH index_cur INTO ixid;\n"
+		"  IF (SQL % NOTFOUND) THEN\n"
+		"    found := 0;\n"
+		"  ELSE\n"
+		"    DELETE FROM SYS_FIELDS WHERE INDEX_ID=ixid;\n"
+		"    DELETE FROM SYS_INDEXES WHERE CURRENT OF index_cur;\n"
+		"  END IF;\n"
+		"END LOOP;\n"
+		"CLOSE index_cur;\n"
+		"END;\n";
+	trx_t*	trx;
+	dberr_t	error;
 
 	/* Load the table definitions that contain partially defined
 	indexes, so that the data dictionary information can be checked
@@ -2373,75 +2850,26 @@ row_merge_drop_temp_indexes(void)
 	trx = trx_allocate_for_background();
 	trx->op_info = "dropping partially created indexes";
 	row_mysql_lock_data_dictionary(trx);
+	/* Ensure that this transaction will be rolled back and locks
+	will be released, if the server gets killed before the commit
+	gets written to the redo log. */
+	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
 
-	mtr_start(&mtr);
-
-	btr_pcur_open_at_index_side(
-		TRUE,
-		dict_table_get_first_index(dict_sys->sys_indexes),
-		BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
-
-	for (;;) {
-		const rec_t*	rec;
-		const byte*	field;
-		ulint		len;
-		table_id_t	table_id;
-		dict_table_t*	table;
-
-		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
-		if (!btr_pcur_is_on_user_rec(&pcur)) {
-			break;
-		}
-
-		rec = btr_pcur_get_rec(&pcur);
-		field = rec_get_nth_field_old(
-			rec, DICT_FLD__SYS_INDEXES__NAME, &len);
-		if (len == UNIV_SQL_NULL || len == 0
-		    || (char) *field != TEMP_INDEX_PREFIX) {
-			continue;
-		}
-
-		/* This is a temporary index. */
-
-		field = rec_get_nth_field_old(
-			rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len);
-		if (len != 8) {
-			/* Corrupted TABLE_ID */
-			continue;
-		}
-
-		table_id = mach_read_from_8(field);
-
-		btr_pcur_store_position(&pcur, &mtr);
-		btr_pcur_commit_specify_mtr(&pcur, &mtr);
-
-		table = dict_table_open_on_id(table_id, TRUE);
+	trx->op_info = "dropping indexes";
+	error = que_eval_sql(NULL, sql, FALSE, trx);
 
-		if (table) {
-			dict_index_t*	index;
-			dict_index_t*	next_index;
-
-			for (index = dict_table_get_first_index(table);
-			     index; index = next_index) {
-
-				next_index = dict_table_get_next_index(index);
-
-				if (*index->name == TEMP_INDEX_PREFIX) {
-					row_merge_drop_index(index, table, trx);
-					trx_commit_for_mysql(trx);
-				}
-			}
-
-			dict_table_close(table, TRUE);
-		}
+	if (error != DB_SUCCESS) {
+		/* Even though we ensure that DDL transactions are WAIT
+		and DEADLOCK free, we could encounter other errors e.g.,
+		DB_TOO_MANY_CONCURRENT_TRXS. */
+		trx->error_state = DB_SUCCESS;
 
-		mtr_start(&mtr);
-		btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+		ut_print_timestamp(stderr);
+		fprintf(stderr, " InnoDB: Error: row_merge_drop_temp_indexes "
+			"failed with error code: %u.\n", (unsigned) error);
 	}
 
-	btr_pcur_close(&pcur);
-	mtr_commit(&mtr);
+	trx_commit_for_mysql(trx);
 	row_mysql_unlock_data_dictionary(trx);
 	trx_free_for_background(trx);
 }
@@ -2449,8 +2877,8 @@ row_merge_drop_temp_indexes(void)
 /*********************************************************************//**
 Creates temporary merge files, and if UNIV_PFS_IO defined, register
 the file descriptor with Performance Schema.
-@return File descriptor */
-UNIV_INLINE
+@return file descriptor, or -1 on failure */
+UNIV_INTERN
 int
 row_merge_file_create_low(void)
 /*===========================*/
@@ -2469,31 +2897,43 @@ row_merge_file_create_low(void)
 #endif
 	fd = innobase_mysql_tmpfile();
 #ifdef UNIV_PFS_IO
-        register_pfs_file_open_end(locker, fd);
+	register_pfs_file_open_end(locker, fd);
 #endif
+
+	if (fd < 0) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Cannot create temporary merge file");
+		return -1;
+	}
 	return(fd);
 }
 
 /*********************************************************************//**
-Create a merge file. */
+Create a merge file.
+@return file descriptor, or -1 on failure */
 UNIV_INTERN
-void
+int
 row_merge_file_create(
 /*==================*/
 	merge_file_t*	merge_file)	/*!< out: merge file structure */
 {
 	merge_file->fd = row_merge_file_create_low();
-	if (srv_disable_sort_file_cache) {
-		os_file_set_nocache(merge_file->fd, "row0merge.c", "sort");
-	}
 	merge_file->offset = 0;
 	merge_file->n_rec = 0;
+
+	if (merge_file->fd >= 0) {
+		if (srv_disable_sort_file_cache) {
+			os_file_set_nocache(merge_file->fd,
+				"row0merge.cc", "sort");
+		}
+	}
+	return(merge_file->fd);
 }
 
 /*********************************************************************//**
 Destroy a merge file. And de-register the file from Performance Schema
 if UNIV_PFS_IO is defined. */
-UNIV_INLINE
+UNIV_INTERN
 void
 row_merge_file_destroy_low(
 /*=======================*/
@@ -2506,7 +2946,9 @@ row_merge_file_destroy_low(
 				   fd, 0, PSI_FILE_CLOSE,
 				   __FILE__, __LINE__);
 #endif
-	close(fd);
+	if (fd >= 0) {
+		close(fd);
+	}
 #ifdef UNIV_PFS_IO
 	register_pfs_file_io_end(locker, 0);
 #endif
@@ -2517,8 +2959,10 @@ UNIV_INTERN
 void
 row_merge_file_destroy(
 /*===================*/
-	merge_file_t*	merge_file)	/*!< out: merge file structure */
+	merge_file_t*	merge_file)	/*!< in/out: merge file structure */
 {
+	ut_ad(!srv_read_only_mode);
+
 	if (merge_file->fd != -1) {
 		row_merge_file_destroy_low(merge_file->fd);
 		merge_file->fd = -1;
@@ -2526,173 +2970,109 @@ row_merge_file_destroy(
 }
 
 /*********************************************************************//**
-Determine the precise type of a column that is added to a tem
-if a column must be constrained NOT NULL.
-@return	col->prtype, possibly ORed with DATA_NOT_NULL */
-UNIV_INLINE
-ulint
-row_merge_col_prtype(
-/*=================*/
-	const dict_col_t*	col,		/*!< in: column */
-	const char*		col_name,	/*!< in: name of the column */
-	const merge_index_def_t*index_def)	/*!< in: the index definition
-						of the primary key */
-{
-	ulint	prtype = col->prtype;
-	ulint	i;
-
-	ut_ad(index_def->ind_type & DICT_CLUSTERED);
-
-	if (prtype & DATA_NOT_NULL) {
-
-		return(prtype);
-	}
-
-	/* All columns that are included
-	in the PRIMARY KEY must be NOT NULL. */
-
-	for (i = 0; i < index_def->n_fields; i++) {
-		if (!strcmp(col_name, index_def->fields[i].field_name)) {
-			return(prtype | DATA_NOT_NULL);
-		}
-	}
-
-	return(prtype);
-}
-
-/*********************************************************************//**
-Create a temporary table for creating a primary key, using the definition
-of an existing table.
-@return	table, or NULL on error */
+Rename an index in the dictionary that was created. The data
+dictionary must have been locked exclusively by the caller, because
+the transaction will not be committed.
+@return	DB_SUCCESS if all OK */
 UNIV_INTERN
-dict_table_t*
-row_merge_create_temporary_table(
-/*=============================*/
-	const char*		table_name,	/*!< in: new table name */
-	const merge_index_def_t*index_def,	/*!< in: the index definition
-						of the primary key */
-	const dict_table_t*	table,		/*!< in: old table definition */
-	trx_t*			trx)		/*!< in/out: transaction
-						(sets error_state) */
+dberr_t
+row_merge_rename_index_to_add(
+/*==========================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	table_id_t	table_id,	/*!< in: table identifier */
+	index_id_t	index_id)	/*!< in: index identifier */
 {
-	ulint		i;
-	dict_table_t*	new_table = NULL;
-	ulint		n_cols = dict_table_get_n_user_cols(table);
-	ulint		error;
-	mem_heap_t*	heap = mem_heap_create(1000);
-	ulint		num_col;
-
-	ut_ad(table_name);
-	ut_ad(index_def);
-	ut_ad(table);
-	ut_ad(mutex_own(&dict_sys->mutex));
-
-	num_col = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)
-			? n_cols + 1
-			: n_cols;
-
-	new_table = dict_mem_table_create(
-		table_name, 0, num_col, table->flags, table->flags2);
-
-	for (i = 0; i < n_cols; i++) {
-		const dict_col_t*	col;
-		const char*		col_name;
+	dberr_t		err = DB_SUCCESS;
+	pars_info_t*	info = pars_info_create();
 
-		col = dict_table_get_nth_col(table, i);
-		col_name = dict_table_get_col_name(table, i);
+	/* We use the private SQL parser of Innobase to generate the
+	query graphs needed in renaming indexes. */
 
-		dict_mem_table_add_col(new_table, heap, col_name, col->mtype,
-				       row_merge_col_prtype(col, col_name,
-							    index_def),
-				       col->len);
-	}
+	static const char rename_index[] =
+		"PROCEDURE RENAME_INDEX_PROC () IS\n"
+		"BEGIN\n"
+		"UPDATE SYS_INDEXES SET NAME=SUBSTR(NAME,1,LENGTH(NAME)-1)\n"
+		"WHERE TABLE_ID = :tableid AND ID = :indexid;\n"
+		"END;\n";
 
-	/* Add the FTS doc_id hidden column */
-	if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
-		fts_add_doc_id_column(new_table);
-		new_table->fts->doc_col = n_cols;
-	}
+	ut_ad(trx);
+	ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+	ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
 
-	error = row_create_table_for_mysql(new_table, trx);
-	mem_heap_free(heap);
+	trx->op_info = "renaming index to add";
 
-	if (error != DB_SUCCESS) {
-		trx->error_state = static_cast<enum db_err>(error);
-		new_table = NULL;
-	} else {
-		dict_table_t*	temp_table;
+	pars_info_add_ull_literal(info, "tableid", table_id);
+	pars_info_add_ull_literal(info, "indexid", index_id);
 
-		/* We need to bump up the table ref count and before we can
-		use it we need to open the table. */
+	err = que_eval_sql(info, rename_index, FALSE, trx);
 
-		temp_table = dict_table_open_on_name_no_stats(
-			new_table->name, TRUE, DICT_ERR_IGNORE_NONE);
+	if (err != DB_SUCCESS) {
+		/* Even though we ensure that DDL transactions are WAIT
+		and DEADLOCK free, we could encounter other errors e.g.,
+		DB_TOO_MANY_CONCURRENT_TRXS. */
+		trx->error_state = DB_SUCCESS;
 
-		ut_a(new_table == temp_table);
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			" InnoDB: Error: row_merge_rename_index_to_add "
+			 "failed with error code: %u.\n", (unsigned) err);
 	}
 
-	return(new_table);
+	trx->op_info = "";
+
+	return(err);
 }
 
 /*********************************************************************//**
-Rename the temporary indexes in the dictionary to permanent ones.  The
-data dictionary must have been locked exclusively by the caller,
-because the transaction will not be committed.
+Rename an index in the dictionary that is to be dropped. The data
+dictionary must have been locked exclusively by the caller, because
+the transaction will not be committed.
 @return	DB_SUCCESS if all OK */
 UNIV_INTERN
-ulint
-row_merge_rename_indexes(
-/*=====================*/
+dberr_t
+row_merge_rename_index_to_drop(
+/*===========================*/
 	trx_t*		trx,		/*!< in/out: transaction */
-	dict_table_t*	table)		/*!< in/out: table with new indexes */
+	table_id_t	table_id,	/*!< in: table identifier */
+	index_id_t	index_id)	/*!< in: index identifier */
 {
-	db_err		err = DB_SUCCESS;
+	dberr_t		err;
 	pars_info_t*	info = pars_info_create();
 
+	ut_ad(!srv_read_only_mode);
+
 	/* We use the private SQL parser of Innobase to generate the
 	query graphs needed in renaming indexes. */
 
-	static const char* sql =
-		"PROCEDURE RENAME_INDEXES_PROC () IS\n"
+	static const char rename_index[] =
+		"PROCEDURE RENAME_INDEX_PROC () IS\n"
 		"BEGIN\n"
-		"UPDATE SYS_INDEXES SET NAME=SUBSTR(NAME,1,LENGTH(NAME)-1)\n"
-		"WHERE TABLE_ID = :tableid AND SUBSTR(NAME,0,1)='"
-		TEMP_INDEX_PREFIX_STR "';\n"
+		"UPDATE SYS_INDEXES SET NAME=CONCAT('"
+		TEMP_INDEX_PREFIX_STR "',NAME)\n"
+		"WHERE TABLE_ID = :tableid AND ID = :indexid;\n"
 		"END;\n";
 
-	ut_ad(table);
 	ut_ad(trx);
 	ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+	ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
 
-	trx->op_info = "renaming indexes";
+	trx->op_info = "renaming index to drop";
 
-	pars_info_add_ull_literal(info, "tableid", table->id);
+	pars_info_add_ull_literal(info, "tableid", table_id);
+	pars_info_add_ull_literal(info, "indexid", index_id);
 
-	err = static_cast<db_err>(que_eval_sql(info, sql, FALSE, trx));
+	err = que_eval_sql(info, rename_index, FALSE, trx);
 
-	DBUG_EXECUTE_IF(
-		"ib_rename_indexes_too_many_concurrent_trxs",
-		err = DB_TOO_MANY_CONCURRENT_TRXS;
-		trx->error_state = static_cast<db_err>(err););
-
-	if (err == DB_SUCCESS) {
-		dict_index_t*	index = dict_table_get_first_index(table);
-		do {
-			if (*index->name == TEMP_INDEX_PREFIX) {
-				index->name++;
-			}
-			index = dict_table_get_next_index(index);
-		} while (index);
-	} else {
+	if (err != DB_SUCCESS) {
 		/* Even though we ensure that DDL transactions are WAIT
 		and DEADLOCK free, we could encounter other errors e.g.,
-		DB_TOO_MANY_TRANSACTIONS. */
-
+		DB_TOO_MANY_CONCURRENT_TRXS. */
 		trx->error_state = DB_SUCCESS;
 
 		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Error: row_merge_rename_indexes "
-			"failed with error code: %lu.\n", (ulint) err);
+		fprintf(stderr,
+			" InnoDB: Error: row_merge_rename_index_to_drop "
+			 "failed with error code: %u.\n", (unsigned) err);
 	}
 
 	trx->op_info = "";
@@ -2701,12 +3081,39 @@ row_merge_rename_indexes(
 }
 
 /*********************************************************************//**
+Provide a new pathname for a table that is being renamed if it belongs to
+a file-per-table tablespace.  The caller is responsible for freeing the
+memory allocated for the return value.
+@return	new pathname of tablespace file, or NULL if space = 0 */
+UNIV_INTERN
+char*
+row_make_new_pathname(
+/*==================*/
+	dict_table_t*	table,		/*!< in: table to be renamed */
+	const char*	new_name)	/*!< in: new name */
+{
+	char*	new_path;
+	char*	old_path;
+
+	ut_ad(table->space != TRX_SYS_SPACE);
+
+	old_path = fil_space_get_first_path(table->space);
+	ut_a(old_path);
+
+	new_path = os_file_make_new_pathname(old_path, new_name);
+
+	mem_free(old_path);
+
+	return(new_path);
+}
+
+/*********************************************************************//**
 Rename the tables in the data dictionary.  The data dictionary must
 have been locked exclusively by the caller, because the transaction
 will not be committed.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 row_merge_rename_tables(
 /*====================*/
 	dict_table_t*	old_table,	/*!< in/out: old table, renamed to
@@ -2716,28 +3123,32 @@ row_merge_rename_tables(
 	const char*	tmp_name,	/*!< in: new name for old_table */
 	trx_t*		trx)		/*!< in: transaction handle */
 {
-	ulint		err	= DB_ERROR;
+	dberr_t		err	= DB_ERROR;
 	pars_info_t*	info;
 	char		old_name[MAX_FULL_NAME_LEN + 1];
 
+	ut_ad(!srv_read_only_mode);
 	ut_ad(old_table != new_table);
 	ut_ad(mutex_own(&dict_sys->mutex));
-
 	ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+	ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE);
 
 	/* store the old/current name to an automatic variable */
 	if (strlen(old_table->name) + 1 <= sizeof(old_name)) {
 		memcpy(old_name, old_table->name, strlen(old_table->name) + 1);
 	} else {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: too long table name: '%s', "
-			"max length is %d\n", old_table->name,
-			MAX_FULL_NAME_LEN);
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Too long table name: '%s', max length is %d",
+			old_table->name, MAX_FULL_NAME_LEN);
 		ut_error;
 	}
 
 	trx->op_info = "renaming tables";
 
+	DBUG_EXECUTE_IF(
+		"ib_rebuild_cannot_rename",
+		err = DB_ERROR; goto err_exit;);
+
 	/* We use the private SQL parser of Innobase to generate the query
 	graphs needed in updating the dictionary data in system tables. */
 
@@ -2756,21 +3167,124 @@ row_merge_rename_tables(
 			   " WHERE NAME = :new_name;\n"
 			   "END;\n", FALSE, trx);
 
-	if (err != DB_SUCCESS) {
+	/* Update SYS_TABLESPACES and SYS_DATAFILES if the old
+	table is in a non-system tablespace where space > 0. */
+	if (err == DB_SUCCESS
+	    && old_table->space != TRX_SYS_SPACE
+	    && !old_table->ibd_file_missing) {
+		/* Make pathname to update SYS_DATAFILES. */
+		char* tmp_path = row_make_new_pathname(old_table, tmp_name);
+
+		info = pars_info_create();
+
+		pars_info_add_str_literal(info, "tmp_name", tmp_name);
+		pars_info_add_str_literal(info, "tmp_path", tmp_path);
+		pars_info_add_int4_literal(info, "old_space",
+					   (lint) old_table->space);
+
+		err = que_eval_sql(info,
+				   "PROCEDURE RENAME_OLD_SPACE () IS\n"
+				   "BEGIN\n"
+				   "UPDATE SYS_TABLESPACES"
+				   " SET NAME = :tmp_name\n"
+				   " WHERE SPACE = :old_space;\n"
+				   "UPDATE SYS_DATAFILES"
+				   " SET PATH = :tmp_path\n"
+				   " WHERE SPACE = :old_space;\n"
+				   "END;\n", FALSE, trx);
+
+		mem_free(tmp_path);
+	}
+
+	/* Update SYS_TABLESPACES and SYS_DATAFILES if the new
+	table is in a non-system tablespace where space > 0. */
+	if (err == DB_SUCCESS && new_table->space != TRX_SYS_SPACE) {
+		/* Make pathname to update SYS_DATAFILES. */
+		char* old_path = row_make_new_pathname(new_table, old_name);
+
+		info = pars_info_create();
+
+		pars_info_add_str_literal(info, "old_name", old_name);
+		pars_info_add_str_literal(info, "old_path", old_path);
+		pars_info_add_int4_literal(info, "new_space",
+					   (lint) new_table->space);
+
+		err = que_eval_sql(info,
+				   "PROCEDURE RENAME_NEW_SPACE () IS\n"
+				   "BEGIN\n"
+				   "UPDATE SYS_TABLESPACES"
+				   " SET NAME = :old_name\n"
+				   " WHERE SPACE = :new_space;\n"
+				   "UPDATE SYS_DATAFILES"
+				   " SET PATH = :old_path\n"
+				   " WHERE SPACE = :new_space;\n"
+				   "END;\n", FALSE, trx);
+
+		mem_free(old_path);
+	}
 
+	if (err != DB_SUCCESS) {
 		goto err_exit;
 	}
 
+	/* Generate the redo logs for file operations */
+	fil_mtr_rename_log(old_table->space, old_name,
+			   new_table->space, new_table->name, tmp_name);
+
+	/* What if the redo logs are flushed to disk here?  This is
+	tested with following crash point */
+	DBUG_EXECUTE_IF("bug14669848_precommit", log_buffer_flush_to_disk();
+			DBUG_SUICIDE(););
+
+	/* File operations cannot be rolled back.  So, before proceeding
+	with file operations, commit the dictionary changes.*/
+	trx_commit_for_mysql(trx);
+
+	/* If server crashes here, the dictionary in InnoDB and MySQL
+	will differ.  The .ibd files and the .frm files must be swapped
+	manually by the administrator. No loss of data. */
+	DBUG_EXECUTE_IF("bug14669848", DBUG_SUICIDE(););
+
+	/* Ensure that the redo logs are flushed to disk.  The config
+	innodb_flush_log_at_trx_commit must not affect this. */
+	log_buffer_flush_to_disk();
+
 	/* The following calls will also rename the .ibd data files if
 	the tables are stored in a single-table tablespace */
 
-	if (!dict_table_rename_in_cache(old_table, tmp_name, FALSE)
-	    || !dict_table_rename_in_cache(new_table, old_name, FALSE)) {
+	err = dict_table_rename_in_cache(old_table, tmp_name, FALSE);
 
-		err = DB_ERROR;
-		goto err_exit;
+	if (err == DB_SUCCESS) {
+
+		ut_ad(dict_table_is_discarded(old_table)
+		      == dict_table_is_discarded(new_table));
+
+		err = dict_table_rename_in_cache(new_table, old_name, FALSE);
+
+		if (err != DB_SUCCESS) {
+
+			if (dict_table_rename_in_cache(
+					old_table, old_name, FALSE)
+			    != DB_SUCCESS) {
+
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Cannot undo the rename in cache "
+					"from %s to %s", old_name, tmp_name);
+			}
+
+			goto err_exit;
+		}
+
+		if (dict_table_is_discarded(new_table)) {
+
+			err = row_import_update_discarded_flag(
+				trx, new_table->id, true, true);
+		}
 	}
 
+	DBUG_EXECUTE_IF("ib_rebuild_cannot_load_fk",
+			err = DB_ERROR; goto err_exit;);
+
 	err = dict_load_foreigns(old_name, FALSE, TRUE);
 
 	if (err != DB_SUCCESS) {
@@ -2788,8 +3302,8 @@ err_exit:
 /*********************************************************************//**
 Create and execute a query graph for creating an index.
 @return	DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_merge_create_index_graph(
 /*=========================*/
 	trx_t*		trx,		/*!< in: trx */
@@ -2799,7 +3313,7 @@ row_merge_create_index_graph(
 	ind_node_t*	node;		/*!< Index creation node */
 	mem_heap_t*	heap;		/*!< Memory heap */
 	que_thr_t*	thr;		/*!< Query thread */
-	ulint		err;
+	dberr_t		err;
 
 	ut_ad(trx);
 	ut_ad(table);
@@ -2808,7 +3322,7 @@ row_merge_create_index_graph(
 	heap = mem_heap_create(512);
 
 	index->table = table;
-	node = ind_create_graph_create(index, heap);
+	node = ind_create_graph_create(index, heap, false);
 	thr = pars_complete_graph_for_exec(node, trx, heap);
 
 	ut_a(thr == que_fork_start_command(
@@ -2832,14 +3346,16 @@ row_merge_create_index(
 /*===================*/
 	trx_t*			trx,	/*!< in/out: trx (sets error_state) */
 	dict_table_t*		table,	/*!< in: the index is on this table */
-	const merge_index_def_t*index_def)
+	const index_def_t*	index_def)
 					/*!< in: the index definition */
 {
 	dict_index_t*	index;
-	ulint		err;
+	dberr_t		err;
 	ulint		n_fields = index_def->n_fields;
 	ulint		i;
 
+	ut_ad(!srv_read_only_mode);
+
 	/* Create the index prototype, using the passed in def, this is not
 	a persistent operation. We pass 0 as the space id, and determine at
 	a lower level the space id where to store the table. */
@@ -2850,10 +3366,11 @@ row_merge_create_index(
 	ut_a(index);
 
 	for (i = 0; i < n_fields; i++) {
-		merge_index_field_t*	ifield = &index_def->fields[i];
+		index_field_t*	ifield = &index_def->fields[i];
 
-		dict_mem_index_add_field(index, ifield->field_name,
-					 ifield->prefix_len);
+		dict_mem_index_add_field(
+			index, dict_table_get_col_name(table, ifield->col_no),
+			ifield->prefix_len);
 	}
 
 	/* Add the index to SYS_INDEXES, using the index prototype. */
@@ -2861,15 +3378,14 @@ row_merge_create_index(
 
 	if (err == DB_SUCCESS) {
 
-		index = row_merge_dict_table_get_index(
-			table, index_def);
+		index = dict_table_get_index_on_name(table, index_def->name);
 
 		ut_a(index);
 
 		/* Note the id of the transaction that created this
 		index, we use it to restrict readers from accessing
 		this index, to ensure read consistency. */
-		index->trx_id = trx->id;
+		ut_ad(index->trx_id == trx->id);
 	} else {
 		index = NULL;
 	}
@@ -2886,35 +3402,46 @@ row_merge_is_index_usable(
 	const trx_t*		trx,	/*!< in: transaction */
 	const dict_index_t*	index)	/*!< in: index to check */
 {
+	if (!dict_index_is_clust(index)
+	    && dict_index_is_online_ddl(index)) {
+		/* Indexes that are being created are not useable. */
+		return(FALSE);
+	}
+
 	return(!dict_index_is_corrupted(index)
-	       && (!trx->read_view
-	           || read_view_sees_trx_id(trx->read_view, index->trx_id)));
+	       && (dict_table_is_temporary(index->table)
+		   || !trx->read_view
+		   || read_view_sees_trx_id(trx->read_view, index->trx_id)));
 }
 
 /*********************************************************************//**
-Drop the old table.
+Drop a table. The caller must have ensured that the background stats
+thread is not processing the table. This can be done by calling
+dict_stats_wait_bg_to_stop_using_tables() after locking the dictionary and
+before calling this function.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 row_merge_drop_table(
 /*=================*/
 	trx_t*		trx,		/*!< in: transaction */
 	dict_table_t*	table)		/*!< in: table to drop */
 {
+	ut_ad(!srv_read_only_mode);
+
 	/* There must be no open transactions on the table. */
 	ut_a(table->n_ref_count == 0);
 
-	return(row_drop_table_for_mysql(table->name, trx, FALSE));
+	return(row_drop_table_for_mysql(table->name, trx, false, false));
 }
 
-
 /*********************************************************************//**
 Build indexes on a table by reading a clustered index,
 creating a temporary file containing index entries, merge sorting
 these index entries and inserting sorted index entries to indexes.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 row_merge_build_indexes(
 /*====================*/
 	trx_t*		trx,		/*!< in: transaction */
@@ -2923,45 +3450,62 @@ row_merge_build_indexes(
 	dict_table_t*	new_table,	/*!< in: table where indexes are
 					created; identical to old_table
 					unless creating a PRIMARY KEY */
+	bool		online,		/*!< in: true if creating indexes
+					online */
 	dict_index_t**	indexes,	/*!< in: indexes to be created */
+	const ulint*	key_numbers,	/*!< in: MySQL key numbers */
 	ulint		n_indexes,	/*!< in: size of indexes[] */
-	struct TABLE*	table)		/*!< in/out: MySQL table, for
+	struct TABLE*	table,		/*!< in/out: MySQL table, for
 					reporting erroneous key value
 					if applicable */
+	const dtuple_t*	add_cols,	/*!< in: default values of
+					added columns, or NULL */
+	const ulint*	col_map,	/*!< in: mapping of old column
+					numbers to new ones, or NULL
+					if old_table == new_table */
+	ulint		add_autoinc,	/*!< in: number of added
+					AUTO_INCREMENT column, or
+					ULINT_UNDEFINED if none is added */
+	ib_sequence_t&	sequence)	/*!< in: autoinc instance if
+					add_autoinc != ULINT_UNDEFINED */
 {
 	merge_file_t*		merge_files;
 	row_merge_block_t*	block;
 	ulint			block_size;
 	ulint			i;
 	ulint			j;
-	ulint			error;
+	dberr_t			error;
 	int			tmpfd;
 	dict_index_t*		fts_sort_idx = NULL;
 	fts_psort_t*		psort_info = NULL;
 	fts_psort_t*		merge_info = NULL;
 	ib_int64_t		sig_count = 0;
 
-	ut_ad(trx);
-	ut_ad(old_table);
-	ut_ad(new_table);
-	ut_ad(indexes);
-	ut_ad(n_indexes);
-
-	trx_start_if_not_started_xa(trx);
+	ut_ad(!srv_read_only_mode);
+	ut_ad((old_table == new_table) == !col_map);
+	ut_ad(!add_cols || col_map);
 
 	/* Allocate memory for merge file data structure and initialize
 	fields */
 
-	merge_files = static_cast<merge_file_t*>(
-		mem_alloc(n_indexes * sizeof *merge_files));
-
 	block_size = 3 * srv_sort_buf_size;
 	block = static_cast<row_merge_block_t*>(
 		os_mem_alloc_large(&block_size));
 
-	for (i = 0; i < n_indexes; i++) {
+	if (block == NULL) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	trx_start_if_not_started_xa(trx);
 
-		row_merge_file_create(&merge_files[i]);
+	merge_files = static_cast<merge_file_t*>(
+		mem_alloc(n_indexes * sizeof *merge_files));
+
+	for (i = 0; i < n_indexes; i++) {
+		if (row_merge_file_create(&merge_files[i]) < 0) {
+			error = DB_OUT_OF_MEMORY;
+			goto func_exit;
+		}
 
 		if (indexes[i]->type & DICT_FTS) {
 			ibool	opt_doc_id_size = FALSE;
@@ -2971,17 +3515,28 @@ row_merge_build_indexes(
 			we need to build a "fts sort index" indexing
 			on above three 'fields' */
 			fts_sort_idx = row_merge_create_fts_sort_index(
-					indexes[i], old_table,
-					&opt_doc_id_size);
-
-			row_fts_psort_info_init(trx, table, new_table,
-						fts_sort_idx, opt_doc_id_size,
-						&psort_info, &merge_info);
+				indexes[i], old_table, &opt_doc_id_size);
+
+			row_merge_dup_t* dup = static_cast<row_merge_dup_t*>(
+				ut_malloc(sizeof *dup));
+			dup->index = fts_sort_idx;
+			dup->table = table;
+			dup->col_map = col_map;
+			dup->n_dup = 0;
+
+			row_fts_psort_info_init(
+				trx, dup, new_table, opt_doc_id_size,
+				&psort_info, &merge_info);
 		}
 	}
 
 	tmpfd = row_merge_file_create_low();
 
+	if (tmpfd < 0) {
+		error = DB_OUT_OF_MEMORY;
+		goto func_exit;
+	}
+
 	/* Reset the MySQL row buffer that is used when reporting
 	duplicate keys. */
 	innobase_rec_reset(table);
@@ -2990,31 +3545,61 @@ row_merge_build_indexes(
 	secondary index entries for merge sort */
 
 	error = row_merge_read_clustered_index(
-		trx, table, old_table, new_table, indexes,
-		fts_sort_idx, psort_info, merge_files, n_indexes, block);
+		trx, table, old_table, new_table, online, indexes,
+		fts_sort_idx, psort_info, merge_files, key_numbers,
+		n_indexes, add_cols, col_map,
+		add_autoinc, sequence, block);
 
 	if (error != DB_SUCCESS) {
 
 		goto func_exit;
 	}
 
+	DEBUG_SYNC_C("row_merge_after_scan");
+
 	/* Now we have files containing index entries ready for
 	sorting and inserting. */
 
 	for (i = 0; i < n_indexes; i++) {
-		dict_index_t*	sort_idx;
-
-		sort_idx = (indexes[i]->type & DICT_FTS)
-				? fts_sort_idx
-				: indexes[i];
+		dict_index_t*	sort_idx = indexes[i];
 
 		if (indexes[i]->type & DICT_FTS) {
 			os_event_t	fts_parallel_merge_event;
+			bool		all_exit = false;
+			ulint		trial_count = 0;
+
+			sort_idx = fts_sort_idx;
+
+			/* Now all children should complete, wait
+			a bit until they all finish using event */
+			while (!all_exit && trial_count < 10000) {
+				all_exit = true;
+
+				for (j = 0; j < fts_sort_pll_degree;
+				     j++) {
+					if (psort_info[j].child_status
+					    != FTS_CHILD_EXITING) {
+						all_exit = false;
+						os_thread_sleep(1000);
+						break;
+					}
+				}
+				trial_count++;
+			}
+
+			if (!all_exit) {
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Not all child sort threads exited"
+					" when creating FTS index '%s'",
+					indexes[i]->name);
+			}
 
 			fts_parallel_merge_event
-				= merge_info[0].psort_common->sort_event;
+				= merge_info[0].psort_common->merge_event;
 
 			if (FTS_PLL_MERGE) {
+				trial_count = 0;
+				all_exit = false;
 				os_event_reset(fts_parallel_merge_event);
 				row_fts_start_parallel_merge(merge_info);
 wait_again:
@@ -3024,33 +3609,64 @@ wait_again:
 
 				for (j = 0; j < FTS_NUM_AUX_INDEX; j++) {
 					if (merge_info[j].child_status
-					    != FTS_CHILD_COMPLETE) {
+					    != FTS_CHILD_COMPLETE
+					    && merge_info[j].child_status
+					    != FTS_CHILD_EXITING) {
 						sig_count = os_event_reset(
 						fts_parallel_merge_event);
 
 						goto wait_again;
 					}
 				}
+
+				/* Now all children should complete, wait
+				a bit until they all finish using event */
+				while (!all_exit && trial_count < 10000) {
+					all_exit = true;
+
+					for (j = 0; j < FTS_NUM_AUX_INDEX;
+					     j++) {
+						if (merge_info[j].child_status
+						    != FTS_CHILD_EXITING) {
+							all_exit = false;
+							os_thread_sleep(1000);
+							break;
+						}
+					}
+					trial_count++;
+				}
+
+				if (!all_exit) {
+					ib_logf(IB_LOG_LEVEL_ERROR,
+						"Not all child merge threads"
+						" exited when creating FTS"
+						" index '%s'",
+						indexes[i]->name);
+				}
 			} else {
+				/* This cannot report duplicates; an
+				assertion would fail in that case. */
 				error = row_fts_merge_insert(
 					sort_idx, new_table,
 					psort_info, 0);
 			}
 
+#ifdef FTS_INTERNAL_DIAG_PRINT
+			DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Insert\n");
+#endif
 		} else {
-			error = row_merge_sort(trx, sort_idx, &merge_files[i],
-				       block, &tmpfd, table);
+			row_merge_dup_t	dup = {
+				sort_idx, table, col_map, 0};
+
+			error = row_merge_sort(
+				trx, &dup, &merge_files[i],
+				block, &tmpfd);
 
 			if (error == DB_SUCCESS) {
 				error = row_merge_insert_index_tuples(
-					trx, sort_idx, new_table,
-					dict_table_zip_size(old_table),
+					trx->id, sort_idx, old_table,
 					merge_files[i].fd, block);
 			}
-
-#ifdef FTS_INTERNAL_DIAG_PRINT
-			DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Insert\n");
-#endif
 		}
 
 		/* Close the temporary file to free up space. */
@@ -3058,10 +3674,20 @@ wait_again:
 
 		if (indexes[i]->type & DICT_FTS) {
 			row_fts_psort_info_destroy(psort_info, merge_info);
+		} else if (error != DB_SUCCESS || !online) {
+			/* Do not apply any online log. */
+		} else if (old_table != new_table) {
+			ut_ad(!sort_idx->online_log);
+			ut_ad(sort_idx->online_status
+			      == ONLINE_INDEX_COMPLETE);
+		} else {
+			DEBUG_SYNC_C("row_log_apply_before");
+			error = row_log_apply(trx, sort_idx, table);
+			DEBUG_SYNC_C("row_log_apply_after");
 		}
 
 		if (error != DB_SUCCESS) {
-			trx->error_key_num = i;
+			trx->error_key_num = key_numbers[i];
 			goto func_exit;
 		}
 
@@ -3082,7 +3708,7 @@ func_exit:
 	DBUG_EXECUTE_IF(
 		"ib_build_indexes_too_many_concurrent_trxs",
 		error = DB_TOO_MANY_CONCURRENT_TRXS;
-		trx->error_state = static_cast<db_err>(error););
+		trx->error_state = error;);
 
 	row_merge_file_destroy_low(tmpfd);
 
@@ -3097,5 +3723,45 @@ func_exit:
 	mem_free(merge_files);
 	os_mem_free_large(block, block_size);
 
+	DICT_TF2_FLAG_UNSET(new_table, DICT_TF2_FTS_ADD_DOC_ID);
+
+	if (online && old_table == new_table && error != DB_SUCCESS) {
+		/* On error, flag all online secondary index creation
+		as aborted. */
+		for (i = 0; i < n_indexes; i++) {
+			ut_ad(!(indexes[i]->type & DICT_FTS));
+			ut_ad(*indexes[i]->name == TEMP_INDEX_PREFIX);
+			ut_ad(!dict_index_is_clust(indexes[i]));
+
+			/* Completed indexes should be dropped as
+			well, and indexes whose creation was aborted
+			should be dropped from the persistent
+			storage. However, at this point we can only
+			set some flags in the not-yet-published
+			indexes. These indexes will be dropped later
+			in row_merge_drop_indexes(), called by
+			rollback_inplace_alter_table(). */
+
+			switch (dict_index_get_online_status(indexes[i])) {
+			case ONLINE_INDEX_COMPLETE:
+				break;
+			case ONLINE_INDEX_CREATION:
+				rw_lock_x_lock(
+					dict_index_get_lock(indexes[i]));
+				row_log_abort_sec(indexes[i]);
+				indexes[i]->type |= DICT_CORRUPT;
+				rw_lock_x_unlock(
+					dict_index_get_lock(indexes[i]));
+				new_table->drop_aborted = TRUE;
+				/* fall through */
+			case ONLINE_INDEX_ABORTED_DROPPED:
+			case ONLINE_INDEX_ABORTED:
+				MONITOR_MUTEX_INC(
+					&dict_sys->mutex,
+					MONITOR_BACKGROUND_DROP_INDEX);
+			}
+		}
+	}
+
 	return(error);
 }
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index f1811a664c2..f748bb4f60f 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -30,6 +30,9 @@ Created 9/17/2000 Heikki Tuuri
 #include "row0mysql.ic"
 #endif
 
+#include <debug_sync.h>
+#include <my_dbug.h>
+
 #include "row0ins.h"
 #include "row0merge.h"
 #include "row0sel.h"
@@ -42,6 +45,7 @@ Created 9/17/2000 Heikki Tuuri
 #include "dict0load.h"
 #include "dict0boot.h"
 #include "dict0stats.h"
+#include "dict0stats_bg.h"
 #include "trx0roll.h"
 #include "trx0purge.h"
 #include "trx0rec.h"
@@ -54,16 +58,16 @@ Created 9/17/2000 Heikki Tuuri
 #include "ibuf0ibuf.h"
 #include "fts0fts.h"
 #include "fts0types.h"
-#include "srv0mon.h"
+#include "srv0start.h"
+#include "row0import.h"
+#include "m_string.h"
+#include "my_sys.h"
 
 /** Provide optional 4.x backwards compatibility for 5.0 and above */
 UNIV_INTERN ibool	row_rollback_on_timeout	= FALSE;
 
 /** Chain node of the list of tables to drop in the background. */
-typedef struct row_mysql_drop_struct	row_mysql_drop_t;
-
-/** Chain node of the list of tables to drop in the background. */
-struct row_mysql_drop_struct{
+struct row_mysql_drop_t{
 	char*				table_name;	/*!< table name */
 	UT_LIST_NODE_T(row_mysql_drop_t)row_mysql_drop_list;
 							/*!< list chain node */
@@ -82,7 +86,7 @@ more.  Protected by row_drop_list_mutex. */
 static UT_LIST_BASE_NODE_T(row_mysql_drop_t)	row_mysql_drop_list;
 
 /** Mutex protecting the background table drop list. */
-static mutex_t row_drop_list_mutex;
+static ib_mutex_t row_drop_list_mutex;
 
 /** Flag: has row_mysql_drop_list been initialized? */
 static ibool	row_mysql_drop_list_inited	= FALSE;
@@ -570,21 +574,21 @@ next_column:
 
 /****************************************************************//**
 Handles user errors and lock waits detected by the database engine.
-@return TRUE if it was a lock wait and we should continue running the
+@return true if it was a lock wait and we should continue running the
 query thread and in that case the thr is ALREADY in the running state. */
 UNIV_INTERN
-ibool
+bool
 row_mysql_handle_errors(
 /*====================*/
-	ulint*		new_err,/*!< out: possible new error encountered in
+	dberr_t*	new_err,/*!< out: possible new error encountered in
 				lock wait, or if no new error, the value
 				of trx->error_state at the entry of this
 				function */
 	trx_t*		trx,	/*!< in: transaction */
-	que_thr_t*	thr,	/*!< in: query thread */
-	trx_savept_t*	savept)	/*!< in: savepoint or NULL */
+	que_thr_t*	thr,	/*!< in: query thread, or NULL */
+	trx_savept_t*	savept)	/*!< in: savepoint, or NULL */
 {
-	ulint	err;
+	dberr_t	err;
 
 handle_new_error:
 	err = trx->error_state;
@@ -612,6 +616,7 @@ handle_new_error:
 	case DB_READ_ONLY:
 	case DB_FTS_INVALID_DOCID:
 	case DB_INTERRUPTED:
+	case DB_DICT_CHANGED:
 		if (savept) {
 			/* Roll back the latest, possibly incomplete
 			insertion or update */
@@ -631,7 +636,7 @@ handle_new_error:
 
 		*new_err = err;
 
-		return(TRUE);
+		return(true);
 
 	case DB_DEADLOCK:
 	case DB_LOCK_TABLE_FULL:
@@ -648,6 +653,7 @@ handle_new_error:
 		      " a new data file to\n"
 		      "InnoDB: my.cnf and restart the database.\n", stderr);
 
+		ut_ad(0);
 		exit(1);
 
 	case DB_CORRUPTION:
@@ -686,7 +692,7 @@ handle_new_error:
 
 	trx->error_state = DB_SUCCESS;
 
-	return(FALSE);
+	return(false);
 }
 
 /********************************************************************//**
@@ -774,7 +780,7 @@ row_create_prebuilt(
 
 	prebuilt->clust_ref = ref;
 
-	prebuilt->autoinc_error = 0;
+	prebuilt->autoinc_error = DB_SUCCESS;
 	prebuilt->autoinc_offset = 0;
 
 	/* Default to 1, we will set the actual value later in
@@ -883,7 +889,7 @@ row_prebuilt_free(
 		mem_free(base);
 	}
 
-	dict_table_close(prebuilt->table, dict_locked);
+	dict_table_close(prebuilt->table, dict_locked, TRUE);
 
 	mem_heap_free(prebuilt->heap);
 }
@@ -950,44 +956,62 @@ row_get_prebuilt_insert_row(
 	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
 					handle */
 {
-	ins_node_t*	node;
-	dtuple_t*	row;
-	dict_table_t*	table	= prebuilt->table;
+	dict_table_t*		table	= prebuilt->table;
 
 	ut_ad(prebuilt && table && prebuilt->trx);
 
-	if (prebuilt->ins_node == NULL) {
-
-		/* Not called before for this handle: create an insert node
-		and query graph to the prebuilt struct */
+	if (prebuilt->ins_node != 0) {
 
-		node = ins_node_create(INS_DIRECT, table, prebuilt->heap);
+		/* Check if indexes have been dropped or added and we
+		may need to rebuild the row insert template. */
 
-		prebuilt->ins_node = node;
+		if (prebuilt->trx_id == table->def_trx_id
+		    && UT_LIST_GET_LEN(prebuilt->ins_node->entry_list)
+		    == UT_LIST_GET_LEN(table->indexes)) {
 
-		if (prebuilt->ins_upd_rec_buff == NULL) {
-			prebuilt->ins_upd_rec_buff = static_cast<byte*>(
-				mem_heap_alloc(
-					prebuilt->heap,
-					prebuilt->mysql_row_len));
+			return(prebuilt->ins_node->row);
 		}
 
-		row = dtuple_create(prebuilt->heap,
-				    dict_table_get_n_cols(table));
+		ut_ad(prebuilt->trx_id < table->def_trx_id);
 
-		dict_table_copy_types(row, table);
+		que_graph_free_recursive(prebuilt->ins_graph);
 
-		ins_node_set_new_row(node, row);
+		prebuilt->ins_graph = 0;
+	}
 
-		prebuilt->ins_graph = static_cast<que_fork_t*>(
-			que_node_get_parent(
-				pars_complete_graph_for_exec(
-					node,
-					prebuilt->trx, prebuilt->heap)));
+	/* Create an insert node and query graph to the prebuilt struct */
 
-		prebuilt->ins_graph->state = QUE_FORK_ACTIVE;
+	ins_node_t*		node;
+
+	node = ins_node_create(INS_DIRECT, table, prebuilt->heap);
+
+	prebuilt->ins_node = node;
+
+	if (prebuilt->ins_upd_rec_buff == 0) {
+		prebuilt->ins_upd_rec_buff = static_cast<byte*>(
+			mem_heap_alloc(
+				prebuilt->heap,
+				prebuilt->mysql_row_len));
 	}
 
+	dtuple_t*	row;
+
+	row = dtuple_create(prebuilt->heap, dict_table_get_n_cols(table));
+
+	dict_table_copy_types(row, table);
+
+	ins_node_set_new_row(node, row);
+
+	prebuilt->ins_graph = static_cast<que_fork_t*>(
+		que_node_get_parent(
+			pars_complete_graph_for_exec(
+				node,
+				prebuilt->trx, prebuilt->heap)));
+
+	prebuilt->ins_graph->state = QUE_FORK_ACTIVE;
+
+	prebuilt->trx_id = table->def_trx_id;
+
 	return(prebuilt->ins_node->row);
 }
 
@@ -1000,23 +1024,41 @@ row_update_statistics_if_needed(
 /*============================*/
 	dict_table_t*	table)	/*!< in: table */
 {
-	ulint	counter;
+	ib_uint64_t	counter;
+	ib_uint64_t	n_rows;
+
+	if (!table->stat_initialized) {
+		DBUG_EXECUTE_IF(
+			"test_upd_stats_if_needed_not_inited",
+			fprintf(stderr, "test_upd_stats_if_needed_not_inited "
+				"was executed\n");
+		);
+		return;
+	}
 
-	counter = table->stat_modified_counter;
+	counter = table->stat_modified_counter++;
+	n_rows = dict_table_get_n_rows(table);
 
-	table->stat_modified_counter = counter + 1;
+	if (dict_stats_is_persistent_enabled(table)) {
+		if (counter > n_rows / 10 /* 10% */
+		    && dict_stats_auto_recalc_is_enabled(table)) {
+
+			dict_stats_recalc_pool_add(table);
+			table->stat_modified_counter = 0;
+		}
+		return;
+	}
 
 	/* Calculate new statistics if 1 / 16 of table has been modified
-	since the last time a statistics batch was run, or if
-	stat_modified_counter > 2 000 000 000 (to avoid wrap-around).
+	since the last time a statistics batch was run.
 	We calculate statistics at most every 16th round, since we may have
 	a counter table which is very small and updated very often. */
 
-	if (counter > 2000000000
-	    || ((ib_int64_t) counter > 16 + table->stat_n_rows / 16)) {
+	if (counter > 16 + n_rows / 16 /* 6.25% */) {
 
 		ut_ad(!mutex_own(&dict_sys->mutex));
-		dict_stats_update(table, DICT_STATS_FETCH, FALSE);
+		/* this will reset table->stat_modified_counter to 0 */
+		dict_stats_update(table, DICT_STATS_RECALC_TRANSIENT);
 	}
 }
 
@@ -1028,7 +1070,7 @@ It is not compatible with another AUTO_INC or exclusive lock on the
 table.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_lock_table_autoinc_for_mysql(
 /*=============================*/
 	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in the MySQL
@@ -1038,7 +1080,7 @@ row_lock_table_autoinc_for_mysql(
 	ins_node_t*		node	= prebuilt->ins_node;
 	const dict_table_t*	table	= prebuilt->table;
 	que_thr_t*		thr;
-	ulint			err;
+	dberr_t			err;
 	ibool			was_lock_wait;
 
 	ut_ad(trx);
@@ -1053,10 +1095,8 @@ row_lock_table_autoinc_for_mysql(
 
 	trx->op_info = "setting auto-inc lock";
 
-	if (node == NULL) {
-		row_get_prebuilt_insert_row(prebuilt);
-		node = prebuilt->ins_node;
-	}
+	row_get_prebuilt_insert_row(prebuilt);
+	node = prebuilt->ins_node;
 
 	/* We use the insert query graph as the dummy graph needed
 	in the lock module call */
@@ -1076,7 +1116,7 @@ run_again:
 
 	err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr);
 
-	trx->error_state = static_cast<enum db_err>(err);
+	trx->error_state = err;
 
 	if (err != DB_SUCCESS) {
 		que_thr_stop_for_mysql(thr);
@@ -1089,21 +1129,21 @@ run_again:
 
 		trx->op_info = "";
 
-		return((int) err);
+		return(err);
 	}
 
 	que_thr_stop_for_mysql_no_error(thr, trx);
 
 	trx->op_info = "";
 
-	return((int) err);
+	return(err);
 }
 
 /*********************************************************************//**
 Sets a table lock on the table mentioned in prebuilt.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_lock_table_for_mysql(
 /*=====================*/
 	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct in the MySQL
@@ -1117,7 +1157,7 @@ row_lock_table_for_mysql(
 {
 	trx_t*		trx		= prebuilt->trx;
 	que_thr_t*	thr;
-	ulint		err;
+	dberr_t		err;
 	ibool		was_lock_wait;
 
 	ut_ad(trx);
@@ -1157,7 +1197,7 @@ run_again:
 			thr);
 	}
 
-	trx->error_state = static_cast<enum db_err>(err);
+	trx->error_state = err;
 
 	if (err != DB_SUCCESS) {
 		que_thr_stop_for_mysql(thr);
@@ -1170,21 +1210,21 @@ run_again:
 
 		trx->op_info = "";
 
-		return((int) err);
+		return(err);
 	}
 
 	que_thr_stop_for_mysql_no_error(thr, trx);
 
 	trx->op_info = "";
 
-	return((int) err);
+	return(err);
 }
 
 /*********************************************************************//**
 Does an insert for MySQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_insert_for_mysql(
 /*=================*/
 	byte*		mysql_rec,	/*!< in: row in the MySQL format */
@@ -1193,7 +1233,7 @@ row_insert_for_mysql(
 {
 	trx_savept_t	savept;
 	que_thr_t*	thr;
-	ulint		err;
+	dberr_t		err;
 	ibool		was_lock_wait;
 	trx_t*		trx		= prebuilt->trx;
 	ins_node_t*	node		= prebuilt->ins_node;
@@ -1201,24 +1241,23 @@ row_insert_for_mysql(
 
 	ut_ad(trx);
 
-	if (table->ibd_file_missing) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: Error:\n"
-			"InnoDB: MySQL is trying to use a table handle"
-			" but the .ibd file for\n"
-			"InnoDB: table %s does not exist.\n"
-			"InnoDB: Have you deleted the .ibd file"
-			" from the database directory under\n"
-			"InnoDB: the MySQL datadir, or have you"
-			" used DISCARD TABLESPACE?\n"
-			"InnoDB: Look from\n"
-			"InnoDB: " REFMAN "innodb-troubleshooting.html\n"
-			"InnoDB: how you can resolve the problem.\n",
+	if (dict_table_is_discarded(prebuilt->table)) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"The table %s doesn't have a corresponding "
+			"tablespace, it was discarded.",
 			prebuilt->table->name);
-		return(DB_ERROR);
-	}
 
-	if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
+		return(DB_TABLESPACE_DELETED);
+
+	} else if (prebuilt->table->ibd_file_missing) {
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			".ibd file is missing for table %s",
+			prebuilt->table->name);
+
+		return(DB_TABLESPACE_NOT_FOUND);
+
+	} else if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
 		fprintf(stderr,
 			"InnoDB: Error: trying to free a corrupt\n"
 			"InnoDB: table handle. Magic n %lu, table name ",
@@ -1229,9 +1268,7 @@ row_insert_for_mysql(
 		mem_analyze_corruption(prebuilt);
 
 		ut_error;
-	}
-
-	if (UNIV_UNLIKELY(srv_created_new_raw || srv_force_recovery)) {
+	} else if (srv_created_new_raw || srv_force_recovery) {
 		fputs("InnoDB: A new raw disk partition was initialized or\n"
 		      "InnoDB: innodb_force_recovery is on: we do not allow\n"
 		      "InnoDB: database modifications by the user. Shut down\n"
@@ -1249,10 +1286,8 @@ row_insert_for_mysql(
 
 	trx_start_if_not_started_xa(trx);
 
-	if (node == NULL) {
-		row_get_prebuilt_insert_row(prebuilt);
-		node = prebuilt->ins_node;
-	}
+	row_get_prebuilt_insert_row(prebuilt);
+	node = prebuilt->ins_node;
 
 	row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec);
 
@@ -1290,12 +1325,14 @@ error_exit:
 		thr->lock_state = QUE_THR_LOCK_NOLOCK;
 
 		if (was_lock_wait) {
+			ut_ad(node->state == INS_NODE_INSERT_ENTRIES
+			      || node->state == INS_NODE_ALLOC_ROW_ID);
 			goto run_again;
 		}
 
 		trx->op_info = "";
 
-		return((int) err);
+		return(err);
 	}
 
 	if (dict_table_has_fts_index(table)) {
@@ -1353,19 +1390,18 @@ error_exit:
 
 	que_thr_stop_for_mysql_no_error(thr, trx);
 
-	table->stat_n_rows++;
+	srv_stats.n_rows_inserted.add((size_t)trx->id, 1);
 
-	srv_n_rows_inserted++;
-
-	if (prebuilt->table->stat_n_rows == 0) {
-		/* Avoid wrap-over */
-		table->stat_n_rows--;
-	}
+	/* Not protected by dict_table_stats_lock() for performance
+	reasons, we would rather get garbage in stat_n_rows (which is
+	just an estimate anyway) than protecting the following code
+	with a latch. */
+	dict_table_n_rows_inc(table);
 
 	row_update_statistics_if_needed(table);
 	trx->op_info = "";
 
-	return((int) err);
+	return(err);
 }
 
 /*********************************************************************//**
@@ -1490,7 +1526,7 @@ row_fts_do_update(
 Handles FTS matters for an update or a delete.
 NOTE: should not be called if the table does not have an FTS index. .*/
 static
-ulint
+dberr_t
 row_fts_update_or_delete(
 /*=====================*/
 	row_prebuilt_t*	prebuilt)	/* in: prebuilt struct in MySQL
@@ -1530,16 +1566,18 @@ void
 init_fts_doc_id_for_ref(
 /*====================*/
 	dict_table_t*	table,		/*!< in: table */
-	ulint		depth)		/*!< in: recusive call depth */
+	ulint*		depth)		/*!< in: recusive call depth */
 {
 	dict_foreign_t* foreign;
 
 	foreign = UT_LIST_GET_FIRST(table->referenced_list);
 
-	depth++;
+	table->fk_max_recusive_level = 0;
+
+	(*depth)++;
 
 	/* Limit on tables involved in cascading delete/update */
-	if (depth > FK_MAX_CASCADE_DEL) {
+	if (*depth > FK_MAX_CASCADE_DEL) {
 		return;
 	}
 
@@ -1563,7 +1601,7 @@ init_fts_doc_id_for_ref(
 Does an update or delete of a row for MySQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_update_for_mysql(
 /*=================*/
 	byte*		mysql_rec,	/*!< in: the row to be updated, in
@@ -1572,7 +1610,7 @@ row_update_for_mysql(
 					handle */
 {
 	trx_savept_t	savept;
-	ulint		err;
+	dberr_t		err;
 	que_thr_t*	thr;
 	ibool		was_lock_wait;
 	dict_index_t*	clust_index;
@@ -1580,6 +1618,7 @@ row_update_for_mysql(
 	upd_node_t*	node;
 	dict_table_t*	table		= prebuilt->table;
 	trx_t*		trx		= prebuilt->trx;
+	ulint		fk_depth	= 0;
 
 	ut_ad(prebuilt && trx);
 	UT_NOT_USED(mysql_rec);
@@ -1626,14 +1665,26 @@ row_update_for_mysql(
 		return(DB_ERROR);
 	}
 
+	DEBUG_SYNC_C("innodb_row_update_for_mysql_begin");
+
 	trx->op_info = "updating or deleting";
 
 	row_mysql_delay_if_needed();
 
-	init_fts_doc_id_for_ref(table, 0);
-
 	trx_start_if_not_started_xa(trx);
 
+	if (dict_table_is_referenced_by_foreign_key(table)) {
+		/* Share lock the data dictionary to prevent any
+		table dictionary (for foreign constraint) change.
+		This is similar to row_ins_check_foreign_constraint
+		check protect by the dictionary lock as well.
+		In the future, this can be removed once the Foreign
+		key MDL is implemented */
+		row_mysql_freeze_data_dictionary(trx);
+		init_fts_doc_id_for_ref(table, &fk_depth);
+		row_mysql_unfreeze_data_dictionary(trx);
+	}
+
 	node = prebuilt->upd_node;
 
 	clust_index = dict_table_get_first_index(table);
@@ -1683,10 +1734,13 @@ run_again:
 			trx->error_state = DB_SUCCESS;
 			trx->op_info = "";
 
-			return((int) err);
+			return(err);
 		}
 
 		thr->lock_state= QUE_THR_LOCK_ROW;
+
+		DEBUG_SYNC(trx->mysql_thd, "row_update_for_mysql_error");
+
 		was_lock_wait = row_mysql_handle_errors(&err, trx, thr,
 							&savept);
 		thr->lock_state= QUE_THR_LOCK_NOLOCK;
@@ -1697,7 +1751,7 @@ run_again:
 
 		trx->op_info = "";
 
-		return((int) err);
+		return(err);
 	}
 
 	que_thr_stop_for_mysql_no_error(thr, trx);
@@ -1707,18 +1761,20 @@ run_again:
 		err = row_fts_update_or_delete(prebuilt);
 		if (err != DB_SUCCESS) {
 			trx->op_info = "";
-			return((int) err);
+			return(err);
 		}
 	}
 
 	if (node->is_delete) {
-		if (prebuilt->table->stat_n_rows > 0) {
-			prebuilt->table->stat_n_rows--;
-		}
+		/* Not protected by dict_table_stats_lock() for performance
+		reasons, we would rather get garbage in stat_n_rows (which is
+		just an estimate anyway) than protecting the following code
+		with a latch. */
+		dict_table_n_rows_dec(prebuilt->table);
 
-		srv_n_rows_deleted++;
+		srv_stats.n_rows_deleted.add((size_t)trx->id, 1);
 	} else {
-		srv_n_rows_updated++;
+		srv_stats.n_rows_updated.add((size_t)trx->id, 1);
 	}
 
 	/* We update table statistics only if it is a DELETE or UPDATE
@@ -1730,7 +1786,7 @@ run_again:
 
 	trx->op_info = "";
 
-	return((int) err);
+	return(err);
 }
 
 /*********************************************************************//**
@@ -1744,7 +1800,7 @@ prebuilt->clust_pcur.  Thus, this implements a 'mini-rollback' that
 releases the latest clustered index record lock we set.
 @return error code or DB_SUCCESS */
 UNIV_INTERN
-int
+void
 row_unlock_for_mysql(
 /*=================*/
 	row_prebuilt_t*	prebuilt,	/*!< in/out: prebuilt struct in MySQL
@@ -1770,8 +1826,7 @@ row_unlock_for_mysql(
 			"InnoDB: innodb_locks_unsafe_for_binlog is FALSE and\n"
 			"InnoDB: this session is not using"
 			" READ COMMITTED isolation level.\n");
-
-		return(DB_SUCCESS);
+		return;
 	}
 
 	trx->op_info = "unlock_row";
@@ -1863,15 +1918,13 @@ no_unlock:
 	}
 
 	trx->op_info = "";
-
-	return(DB_SUCCESS);
 }
 
 /**********************************************************************//**
 Does a cascaded delete or set null in a foreign key operation.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 row_update_cascade_for_mysql(
 /*=========================*/
 	que_thr_t*	thr,	/*!< in: query thread */
@@ -1879,7 +1932,7 @@ row_update_cascade_for_mysql(
 				or set null operation */
 	dict_table_t*	table)	/*!< in: table where we do the operation */
 {
-	ulint	err;
+	dberr_t	err;
 	trx_t*	trx;
 
 	trx = thr_get_trx(thr);
@@ -1890,12 +1943,14 @@ row_update_cascade_for_mysql(
 	thr->fk_cascade_depth++;
 
 	if (thr->fk_cascade_depth > FK_MAX_CASCADE_DEL) {
-		return (DB_FOREIGN_EXCEED_MAX_CASCADE);
+		return(DB_FOREIGN_EXCEED_MAX_CASCADE);
 	}
 run_again:
 	thr->run_node = node;
 	thr->prev_node = node;
 
+	DEBUG_SYNC_C("foreign_constraint_update_cascade");
+
 	row_upd_step(thr);
 
 	/* The recursive call for cascading update/delete happens
@@ -1937,13 +1992,15 @@ run_again:
 	}
 
 	if (node->is_delete) {
-		if (table->stat_n_rows > 0) {
-			table->stat_n_rows--;
-		}
+		/* Not protected by dict_table_stats_lock() for performance
+		reasons, we would rather get garbage in stat_n_rows (which is
+		just an estimate anyway) than protecting the following code
+		with a latch. */
+		dict_table_n_rows_dec(table);
 
-		srv_n_rows_deleted++;
+		srv_stats.n_rows_deleted.add((size_t)trx->id, 1);
 	} else {
-		srv_n_rows_updated++;
+		srv_stats.n_rows_updated.add((size_t)trx->id, 1);
 	}
 
 	row_update_statistics_if_needed(table);
@@ -1981,7 +2038,7 @@ row_mysql_freeze_data_dictionary_func(
 {
 	ut_a(trx->dict_operation_lock_mode == 0);
 
-	rw_lock_s_lock_func(&dict_operation_lock, 0, file, line);
+	rw_lock_s_lock_inline(&dict_operation_lock, 0, file, line);
 
 	trx->dict_operation_lock_mode = RW_S_LATCH;
 }
@@ -1994,6 +2051,8 @@ row_mysql_unfreeze_data_dictionary(
 /*===============================*/
 	trx_t*	trx)	/*!< in/out: transaction */
 {
+	ut_ad(lock_trx_has_sys_table_locks(trx) == NULL);
+
 	ut_a(trx->dict_operation_lock_mode == RW_S_LATCH);
 
 	rw_lock_s_unlock(&dict_operation_lock);
@@ -2018,7 +2077,7 @@ row_mysql_lock_data_dictionary_func(
 	/* Serialize data dictionary operations with dictionary mutex:
 	no deadlocks or lock waits can occur then in these operations */
 
-	rw_lock_x_lock_func(&dict_operation_lock, 0, file, line);
+	rw_lock_x_lock_inline(&dict_operation_lock, 0, file, line);
 	trx->dict_operation_lock_mode = RW_X_LATCH;
 
 	mutex_enter(&(dict_sys->mutex));
@@ -2032,6 +2091,8 @@ row_mysql_unlock_data_dictionary(
 /*=============================*/
 	trx_t*	trx)	/*!< in/out: transaction */
 {
+	ut_ad(lock_trx_has_sys_table_locks(trx) == NULL);
+
 	ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
 
 	/* Serialize data dictionary operations with dictionary mutex:
@@ -2052,19 +2113,21 @@ InnoDB will try to invoke mem_validate(). On failure the transaction will
 be rolled back and the 'table' object will be freed.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_create_table_for_mysql(
 /*=======================*/
 	dict_table_t*	table,	/*!< in, own: table definition
-				(will be freed) */
-	trx_t*		trx)	/*!< in: transaction handle */
+				(will be freed, or on DB_SUCCESS
+				added to the data dictionary cache) */
+	trx_t*		trx,	/*!< in/out: transaction */
+	bool		commit)	/*!< in: if true, commit the transaction */
 {
 	tab_node_t*	node;
 	mem_heap_t*	heap;
 	que_thr_t*	thr;
 	const char*	table_name;
 	ulint		table_name_len;
-	ulint		err;
+	dberr_t		err;
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
@@ -2072,6 +2135,11 @@ row_create_table_for_mysql(
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
 
+	DBUG_EXECUTE_IF(
+		"ib_create_table_fail_at_start_of_row_create_table_for_mysql",
+		goto err_exit;
+	);
+
 	if (srv_created_new_raw) {
 		fputs("InnoDB: A new raw disk partition was initialized:\n"
 		      "InnoDB: we do not allow database modifications"
@@ -2080,7 +2148,10 @@ row_create_table_for_mysql(
 		      " is replaced with raw.\n", stderr);
 err_exit:
 		dict_mem_table_free(table);
-		trx_commit_for_mysql(trx);
+
+		if (commit) {
+			trx_commit_for_mysql(trx);
+		}
 
 		return(DB_ERROR);
 	}
@@ -2117,23 +2188,23 @@ err_exit:
 		/* The lock timeout monitor thread also takes care
 		of InnoDB monitor prints */
 
-		os_event_set(srv_timeout_event);
+		os_event_set(lock_sys->timeout_event);
 	} else if (STR_EQ(table_name, table_name_len,
 			  S_innodb_lock_monitor)) {
 
 		srv_print_innodb_monitor = TRUE;
 		srv_print_innodb_lock_monitor = TRUE;
-		os_event_set(srv_timeout_event);
+		os_event_set(lock_sys->timeout_event);
 	} else if (STR_EQ(table_name, table_name_len,
 			  S_innodb_tablespace_monitor)) {
 
 		srv_print_innodb_tablespace_monitor = TRUE;
-		os_event_set(srv_timeout_event);
+		os_event_set(lock_sys->timeout_event);
 	} else if (STR_EQ(table_name, table_name_len,
 			  S_innodb_table_monitor)) {
 
 		srv_print_innodb_table_monitor = TRUE;
-		os_event_set(srv_timeout_event);
+		os_event_set(lock_sys->timeout_event);
 #ifdef UNIV_MEM_DEBUG
 	} else if (STR_EQ(table_name, table_name_len,
 			  S_innodb_mem_validate)) {
@@ -2152,12 +2223,21 @@ err_exit:
 #endif /* UNIV_MEM_DEBUG */
 	}
 
-
 	heap = mem_heap_create(512);
 
-	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+	switch (trx_get_dict_operation(trx)) {
+	case TRX_DICT_OP_NONE:
+		trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+	case TRX_DICT_OP_TABLE:
+		break;
+	case TRX_DICT_OP_INDEX:
+		/* If the transaction was previously flagged as
+		TRX_DICT_OP_INDEX, we should be creating auxiliary
+		tables for full-text indexes. */
+		ut_ad(strstr(table->name, "/FTS_") != NULL);
+	}
 
-	node = tab_create_graph_create(table, heap);
+	node = tab_create_graph_create(table, heap, commit);
 
 	thr = pars_complete_graph_for_exec(node, trx, heap);
 
@@ -2168,6 +2248,29 @@ err_exit:
 
 	err = trx->error_state;
 
+	if (table->space != TRX_SYS_SPACE) {
+		ut_a(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_TABLESPACE));
+
+		/* Update SYS_TABLESPACES and SYS_DATAFILES if a new
+		tablespace was created. */
+		if (err == DB_SUCCESS) {
+			char*	path;
+			path = fil_space_get_first_path(table->space);
+
+			err = dict_create_add_tablespace_to_dictionary(
+				table->space, table->name,
+				fil_space_get_flags(table->space),
+				path, trx, commit);
+
+			mem_free(path);
+		}
+
+		if (err != DB_SUCCESS) {
+			/* We must delete the link file. */
+			fil_delete_link_file(table->name);
+		}
+	}
+
 	switch (err) {
 	case DB_SUCCESS:
 		break;
@@ -2181,8 +2284,8 @@ err_exit:
 		ut_print_name(stderr, trx, TRUE, table->name);
 		fputs(" because tablespace full\n", stderr);
 
-		if (dict_table_open_on_name_no_stats(
-			table->name, FALSE, DICT_ERR_IGNORE_NONE)) {
+		if (dict_table_open_on_name(table->name, TRUE, FALSE,
+					    DICT_ERR_IGNORE_NONE)) {
 
 			/* Make things easy for the drop table code. */
 
@@ -2190,10 +2293,13 @@ err_exit:
 				dict_table_move_from_lru_to_non_lru(table);
 			}
 
-			dict_table_close(table, FALSE);
+			dict_table_close(table, TRUE, FALSE);
 
 			row_drop_table_for_mysql(table->name, trx, FALSE);
-			trx_commit_for_mysql(trx);
+
+			if (commit) {
+				trx_commit_for_mysql(trx);
+			}
 		} else {
 			dict_mem_table_free(table);
 		}
@@ -2203,7 +2309,12 @@ err_exit:
 	case DB_TOO_MANY_CONCURRENT_TRXS:
 		/* We already have .ibd file here. it should be deleted. */
 
-		if (table->space && !fil_delete_tablespace(table->space)) {
+		if (table->space
+		    && fil_delete_tablespace(
+			    table->space,
+			    BUF_REMOVE_FLUSH_NO_WRITE)
+		    != DB_SUCCESS) {
+
 			ut_print_timestamp(stderr);
 			fprintf(stderr,
 				"  InnoDB: Error: not able to"
@@ -2215,10 +2326,8 @@ err_exit:
 		/* fall through */
 
 	case DB_DUPLICATE_KEY:
+	case DB_TABLESPACE_EXISTS:
 	default:
-		/* We may also get err == DB_ERROR if the .ibd file for the
-		table already exists */
-
 		trx->error_state = DB_SUCCESS;
 		trx_rollback_to_savepoint(trx, NULL);
 		dict_mem_table_free(table);
@@ -2229,7 +2338,7 @@ err_exit:
 
 	trx->op_info = "";
 
-	return((int) err);
+	return(err);
 }
 
 /*********************************************************************//**
@@ -2238,7 +2347,7 @@ to create an index results in dropping the whole table! This is no problem
 currently as all indexes must be created at the same time as the table.
 @return	error number or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_create_index_for_mysql(
 /*=======================*/
 	dict_index_t*	index,		/*!< in, own: index definition
@@ -2254,13 +2363,13 @@ row_create_index_for_mysql(
 	ind_node_t*	node;
 	mem_heap_t*	heap;
 	que_thr_t*	thr;
-	ulint		err;
+	dberr_t		err;
 	ulint		i;
 	ulint		len;
 	char*		table_name;
 	char*		index_name;
 	dict_table_t*	table;
-	ibool		is_fts = FALSE;
+	ibool		is_fts;
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
@@ -2277,8 +2386,8 @@ row_create_index_for_mysql(
 
 	is_fts = (index->type == DICT_FTS);
 
-	table = dict_table_open_on_name_no_stats(table_name, TRUE,
-						 DICT_ERR_IGNORE_NONE);
+	table = dict_table_open_on_name(table_name, TRUE, TRUE,
+					DICT_ERR_IGNORE_NONE);
 
 	trx_start_if_not_started_xa(trx);
 
@@ -2292,6 +2401,11 @@ row_create_index_for_mysql(
 			len = ut_max(len, field_lengths[i]);
 		}
 
+		DBUG_EXECUTE_IF(
+			"ib_create_table_fail_at_create_index",
+			len = DICT_MAX_FIELD_LEN_BY_FORMAT(table) + 1;
+		);
+
 		/* Column or prefix length exceeds maximum column length */
 		if (len > (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table)) {
 			err = DB_TOO_BIG_INDEX_COL;
@@ -2308,7 +2422,7 @@ row_create_index_for_mysql(
 	/* Note that the space id where we store the index is inherited from
 	the table in dict_build_index_def_step() in dict0crea.cc. */
 
-	node = ind_create_graph_create(index, heap);
+	node = ind_create_graph_create(index, heap, true);
 
 	thr = pars_complete_graph_for_exec(node, trx, heap);
 
@@ -2332,7 +2446,7 @@ row_create_index_for_mysql(
 	}
 
 error_handling:
-	dict_table_close(table, TRUE);
+	dict_table_close(table, TRUE, FALSE);
 
 	if (err != DB_SUCCESS) {
 		/* We have special error handling here */
@@ -2353,7 +2467,7 @@ error_handling:
 	mem_free(table_name);
 	mem_free(index_name);
 
-	return((int) err);
+	return(err);
 }
 
 /*********************************************************************//**
@@ -2366,7 +2480,7 @@ fields than mentioned in the constraint. Check also that foreign key
 constraints which reference this table are ok.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_table_add_foreign_constraints(
 /*==============================*/
 	trx_t*		trx,		/*!< in: transaction */
@@ -2383,7 +2497,7 @@ row_table_add_foreign_constraints(
 					code DB_CANNOT_ADD_CONSTRAINT if
 					any foreign keys are found. */
 {
-	ulint	err;
+	dberr_t	err;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 #ifdef UNIV_SYNC_DEBUG
@@ -2399,6 +2513,12 @@ row_table_add_foreign_constraints(
 
 	err = dict_create_foreign_constraints(trx, sql_string, sql_length,
 					      name, reject_fks);
+
+	DBUG_EXECUTE_IF("ib_table_add_foreign_fail",
+			err = DB_DUPLICATE_KEY;);
+
+	DEBUG_SYNC_C("table_add_foreign_constraints");
+
 	if (err == DB_SUCCESS) {
 		/* Check that also referencing constraints are ok */
 		err = dict_load_foreigns(name, FALSE, TRUE);
@@ -2418,7 +2538,7 @@ row_table_add_foreign_constraints(
 		trx->error_state = DB_SUCCESS;
 	}
 
-	return((int) err);
+	return(err);
 }
 
 /*********************************************************************//**
@@ -2430,12 +2550,12 @@ as a background operation, which is taken care of by the master thread
 in srv0srv.cc.
 @return	error code or DB_SUCCESS */
 static
-int
+dberr_t
 row_drop_table_for_mysql_in_background(
 /*===================================*/
 	const char*	name)	/*!< in: table name */
 {
-	ulint	error;
+	dberr_t	error;
 	trx_t*	trx;
 
 	trx = trx_allocate_for_background();
@@ -2464,7 +2584,7 @@ row_drop_table_for_mysql_in_background(
 
 	trx_free_for_background(trx);
 
-	return((int) error);
+	return(error);
 }
 
 /*********************************************************************//**
@@ -2498,8 +2618,8 @@ loop:
 		return(n_tables + n_tables_dropped);
 	}
 
-	table = dict_table_open_on_name_no_stats(drop->table_name, FALSE,
-						 DICT_ERR_IGNORE_NONE);
+	table = dict_table_open_on_name(drop->table_name, FALSE, FALSE,
+					DICT_ERR_IGNORE_NONE);
 
 	if (table == NULL) {
 		/* If for some reason the table has already been dropped
@@ -2510,7 +2630,7 @@ loop:
 
 	ut_a(!table->can_be_evicted);
 
-	dict_table_close(table, FALSE);
+	dict_table_close(table, FALSE, FALSE);
 
 	if (DB_SUCCESS != row_drop_table_for_mysql_in_background(
 		    drop->table_name)) {
@@ -2617,356 +2737,429 @@ row_add_table_to_background_drop_list(
 }
 
 /*********************************************************************//**
-Discards the tablespace of a table which stored in an .ibd file. Discarding
-means that this function deletes the .ibd file and assigns a new table id for
-the table. Also the flag table->ibd_file_missing is set TRUE.
+Reassigns the table identifier of a table.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
-row_discard_tablespace_for_mysql(
-/*=============================*/
-	const char*	name,	/*!< in: table name */
-	trx_t*		trx)	/*!< in: transaction handle */
+dberr_t
+row_mysql_table_id_reassign(
+/*========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	trx_t*		trx,	/*!< in/out: transaction */
+	table_id_t*	new_id)	/*!< out: new table id */
 {
-	dict_foreign_t*	foreign;
-	table_id_t	new_id;
-	dict_table_t*	table;
-	ibool		success;
-	ulint		err;
-	pars_info_t*	info = NULL;
+	dberr_t		err;
+	pars_info_t*	info	= pars_info_create();
 
-	/* How do we prevent crashes caused by ongoing operations on
-	the table? Old operations could try to access non-existent
-	pages.
+	dict_hdr_get_new_id(new_id, NULL, NULL);
 
-	1) SQL queries, INSERT, SELECT, ...: we must get an exclusive
-	MySQL table lock on the table before we can do DISCARD
-	TABLESPACE. Then there are no running queries on the table.
+	/* Remove all locks except the table-level S and X locks. */
+	lock_remove_all_on_table(table, FALSE);
 
-	2) Purge and rollback: we assign a new table id for the
-	table. Since purge and rollback look for the table based on
-	the table id, they see the table as 'dropped' and discard
-	their operations.
+	pars_info_add_ull_literal(info, "old_id", table->id);
+	pars_info_add_ull_literal(info, "new_id", *new_id);
+
+	err = que_eval_sql(
+		info,
+		"PROCEDURE RENUMBER_TABLE_PROC () IS\n"
+		"BEGIN\n"
+		"UPDATE SYS_TABLES SET ID = :new_id\n"
+		" WHERE ID = :old_id;\n"
+		"UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
+		" WHERE TABLE_ID = :old_id;\n"
+		"UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
+		" WHERE TABLE_ID = :old_id;\n"
+		"END;\n", FALSE, trx);
 
-	3) Insert buffer: we remove all entries for the tablespace in
-	the insert buffer tree; as long as the tablespace mem object
-	does not exist, ongoing insert buffer page merges are
-	discarded in buf0rea.cc. If we recreate the tablespace mem
-	object with IMPORT TABLESPACE later, then the tablespace will
-	have the same id, but the tablespace_version field in the mem
-	object is different, and ongoing old insert buffer page merges
-	get discarded.
+	return(err);
+}
 
-	4) Linear readahead and random readahead: we use the same
-	method as in 3) to discard ongoing operations.
+/*********************************************************************//**
+Setup the pre-requisites for DISCARD TABLESPACE. It will start the transaction,
+acquire the data dictionary lock in X mode and open the table.
+@return table instance or 0 if not found. */
+static
+dict_table_t*
+row_discard_tablespace_begin(
+/*=========================*/
+	const char*	name,	/*!< in: table name */
+	trx_t*		trx)	/*!< in: transaction handle */
+{
+	trx->op_info = "discarding tablespace";
 
-	5) FOREIGN KEY operations: if
-	table->n_foreign_key_checks_running > 0, we do not allow the
-	discard. We also reserve the data dictionary latch. */
+	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 
-	trx->op_info = "discarding tablespace";
 	trx_start_if_not_started_xa(trx);
 
 	/* Serialize data dictionary operations with dictionary mutex:
-	no deadlocks can occur then in these operations */
+	this is to avoid deadlocks during data dictionary operations */
 
 	row_mysql_lock_data_dictionary(trx);
 
-	table = dict_table_open_on_name_no_stats(name, TRUE,
-						 DICT_ERR_IGNORE_NONE);
-
-	if (!table) {
-		err = DB_TABLE_NOT_FOUND;
-
-		goto funct_exit;
-	}
+	dict_table_t*	table;
 
-	if (table->space == 0) {
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Error: table ", stderr);
-		ut_print_name(stderr, trx, TRUE, name);
-		fputs("\n"
-		      "InnoDB: is in the system tablespace 0"
-		      " which cannot be discarded\n", stderr);
-		err = DB_ERROR;
+	table = dict_table_open_on_name(
+		name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
 
-		goto funct_exit;
+	if (table) {
+		dict_stats_wait_bg_to_stop_using_tables(table, NULL, trx);
+		ut_a(table->space != TRX_SYS_SPACE);
+		ut_a(table->n_foreign_key_checks_running == 0);
 	}
 
-	if (table->n_foreign_key_checks_running > 0) {
-
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: You are trying to DISCARD table ", stderr);
-		ut_print_name(stderr, trx, TRUE, table->name);
-		fputs("\n"
-		      "InnoDB: though there is a foreign key check"
-		      " running on it.\n"
-		      "InnoDB: Cannot discard the table.\n",
-		      stderr);
-
-		err = DB_ERROR;
+	return(table);
+}
 
-		goto funct_exit;
-	}
+/*********************************************************************//**
+Do the foreign key constraint checks.
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+row_discard_tablespace_foreign_key_checks(
+/*======================================*/
+	const trx_t*		trx,	/*!< in: transaction handle */
+	const dict_table_t*	table)	/*!< in: table to be discarded */
+{
+	const dict_foreign_t*	foreign;
 
 	/* Check if the table is referenced by foreign key constraints from
 	some other table (not the table itself) */
 
-	foreign = UT_LIST_GET_FIRST(table->referenced_list);
+	for (foreign = UT_LIST_GET_FIRST(table->referenced_list);
+	     foreign && foreign->foreign_table == table;
+	     foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
 
-	while (foreign && foreign->foreign_table == table) {
-		foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
 	}
 
-	if (foreign && trx->check_foreigns) {
+	if (!srv_read_only_mode && foreign && trx->check_foreigns) {
 
 		FILE*	ef	= dict_foreign_err_file;
 
 		/* We only allow discarding a referenced table if
 		FOREIGN_KEY_CHECKS is set to 0 */
 
-		err = DB_CANNOT_DROP_CONSTRAINT;
-
 		mutex_enter(&dict_foreign_err_mutex);
+
 		rewind(ef);
+
 		ut_print_timestamp(ef);
 
 		fputs("  Cannot DISCARD table ", ef);
-		ut_print_name(stderr, trx, TRUE, name);
+		ut_print_name(stderr, trx, TRUE, table->name);
 		fputs("\n"
 		      "because it is referenced by ", ef);
 		ut_print_name(stderr, trx, TRUE, foreign->foreign_table_name);
 		putc('\n', ef);
+
 		mutex_exit(&dict_foreign_err_mutex);
 
-		goto funct_exit;
+		return(DB_CANNOT_DROP_CONSTRAINT);
 	}
 
-	dict_hdr_get_new_id(&new_id, NULL, NULL);
+	return(DB_SUCCESS);
+}
 
-	/* Remove all locks except the table-level S and X locks. */
-	lock_remove_all_on_table(table, FALSE);
+/*********************************************************************//**
+Cleanup after the DISCARD TABLESPACE operation.
+@return error code. */
+static
+dberr_t
+row_discard_tablespace_end(
+/*=======================*/
+	trx_t*		trx,	/*!< in/out: transaction handle */
+	dict_table_t*	table,	/*!< in/out: table to be discarded */
+	dberr_t		err)	/*!< in: error code */
+{
+	if (table != 0) {
+		dict_table_close(table, TRUE, FALSE);
+	}
 
-	info = pars_info_create();
+	DBUG_EXECUTE_IF("ib_discard_before_commit_crash",
+			log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+			DBUG_SUICIDE(););
 
-	pars_info_add_str_literal(info, "table_name", name);
-	pars_info_add_ull_literal(info, "new_id", new_id);
+	trx_commit_for_mysql(trx);
 
-	err = que_eval_sql(info,
-			   "PROCEDURE DISCARD_TABLESPACE_PROC () IS\n"
-			   "old_id CHAR;\n"
-			   "BEGIN\n"
-			   "SELECT ID INTO old_id\n"
-			   "FROM SYS_TABLES\n"
-			   "WHERE NAME = :table_name\n"
-			   "LOCK IN SHARE MODE;\n"
-			   "IF (SQL % NOTFOUND) THEN\n"
-			   "       COMMIT WORK;\n"
-			   "       RETURN;\n"
-			   "END IF;\n"
-			   "UPDATE SYS_TABLES SET ID = :new_id\n"
-			   " WHERE ID = old_id;\n"
-			   "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
-			   " WHERE TABLE_ID = old_id;\n"
-			   "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
-			   " WHERE TABLE_ID = old_id;\n"
-			   "COMMIT WORK;\n"
-			   "END;\n"
-			   , FALSE, trx);
+	DBUG_EXECUTE_IF("ib_discard_after_commit_crash",
+			log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+			DBUG_SUICIDE(););
+
+	row_mysql_unlock_data_dictionary(trx);
+
+	trx->op_info = "";
+
+	return(err);
+}
+
+/*********************************************************************//**
+Do the DISCARD TABLESPACE operation.
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+row_discard_tablespace(
+/*===================*/
+	trx_t*		trx,	/*!< in/out: transaction handle */
+	dict_table_t*	table)	/*!< in/out: table to be discarded */
+{
+	dberr_t		err;
+
+	/* How do we prevent crashes caused by ongoing operations on
+	the table? Old operations could try to access non-existent
+	pages. MySQL will block all DML on the table using MDL and a
+	DISCARD will not start unless all existing operations on the
+	table to be discarded are completed.
+
+	1) Acquire the data dictionary latch in X mode. To prevent any
+	internal operations that MySQL is not aware off and also for
+	the internal SQL parser.
+
+	2) Purge and rollback: we assign a new table id for the
+	table. Since purge and rollback look for the table based on
+	the table id, they see the table as 'dropped' and discard
+	their operations.
+
+	3) Insert buffer: we remove all entries for the tablespace in
+	the insert buffer tree.
+
+	4) FOREIGN KEY operations: if table->n_foreign_key_checks_running > 0,
+	we do not allow the discard. */
+
+	/* Play safe and remove all insert buffer entries, though we should
+	have removed them already when DISCARD TABLESPACE was called */
+
+	ibuf_delete_for_discarded_space(table->space);
+
+	table_id_t	new_id;
+
+	/* Set the TABLESPACE DISCARD flag in the table definition on disk. */
+
+	err = row_import_update_discarded_flag(trx, table->id, true, true);
 
 	if (err != DB_SUCCESS) {
-		trx->error_state = DB_SUCCESS;
-		trx_rollback_to_savepoint(trx, NULL);
-		trx->error_state = DB_SUCCESS;
-	} else {
-		dict_table_change_id_in_cache(table, new_id);
+		return(err);
+	}
 
-		success = fil_discard_tablespace(table->space);
+	/* Update the index root pages in the system tables, on disk */
 
-		if (!success) {
-			trx->error_state = DB_SUCCESS;
-			trx_rollback_to_savepoint(trx, NULL);
-			trx->error_state = DB_SUCCESS;
+	err = row_import_update_index_root(trx, table, true, true);
 
-			err = DB_ERROR;
-		} else {
-			/* Set the flag which tells that now it is legal to
-			IMPORT a tablespace for this table */
-			table->tablespace_discarded = TRUE;
-			table->ibd_file_missing = TRUE;
-		}
+	if (err != DB_SUCCESS) {
+		return(err);
 	}
 
-funct_exit:
+	/* Drop all the FTS auxiliary tables. */
+	if (dict_table_has_fts_index(table)
+	    || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
 
-	if (table != NULL) {
-		dict_table_close(table, TRUE);
+		fts_drop_tables(trx, table);
 	}
 
-	trx_commit_for_mysql(trx);
+	/* Assign a new space ID to the table definition so that purge
+	can ignore the changes. Update the system table on disk. */
 
-	row_mysql_unlock_data_dictionary(trx);
+	err = row_mysql_table_id_reassign(table, trx, &new_id);
 
-	trx->op_info = "";
+	if (err != DB_SUCCESS) {
+		return(err);
+	}
 
-	return((int) err);
+	/* Discard the physical file that is used for the tablespace. */
+
+	err = fil_discard_tablespace(table->space);
+
+	switch(err) {
+	case DB_SUCCESS:
+	case DB_IO_ERROR:
+	case DB_TABLESPACE_NOT_FOUND:
+		/* All persistent operations successful, update the
+		data dictionary memory cache. */
+
+		table->ibd_file_missing = TRUE;
+
+		table->flags2 |= DICT_TF2_DISCARDED;
+
+		dict_table_change_id_in_cache(table, new_id);
+
+		/* Reset the root page numbers. */
+
+		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+		     index != 0;
+		     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+			index->page = FIL_NULL;
+			index->space = FIL_NULL;
+		}
+
+		/* If the tablespace did not already exist or we couldn't
+		write to it, we treat that as a successful DISCARD. It is
+		unusable anyway. */
+
+		err = DB_SUCCESS;
+		break;
+
+	default:
+		/* We need to rollback the disk changes, something failed. */
+
+		trx->error_state = DB_SUCCESS;
+
+		trx_rollback_to_savepoint(trx, NULL);
+
+		trx->error_state = DB_SUCCESS;
+	}
+
+	return(err);
 }
 
-/*****************************************************************//**
-Imports a tablespace. The space id in the .ibd file must match the space id
-of the table in the data dictionary.
+/*********************************************************************//**
+Discards the tablespace of a table which stored in an .ibd file. Discarding
+means that this function renames the .ibd file and assigns a new table id for
+the table. Also the flag table->ibd_file_missing is set to TRUE.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
-row_import_tablespace_for_mysql(
-/*============================*/
+dberr_t
+row_discard_tablespace_for_mysql(
+/*=============================*/
 	const char*	name,	/*!< in: table name */
 	trx_t*		trx)	/*!< in: transaction handle */
 {
+	dberr_t		err;
 	dict_table_t*	table;
-	ibool		success;
-	lsn_t		current_lsn;
-	ulint		err		= DB_SUCCESS;
 
-	trx_start_if_not_started_xa(trx);
+	/* Open the table and start the transaction if not started. */
 
-	trx->op_info = "importing tablespace";
+	table = row_discard_tablespace_begin(name, trx);
 
-	current_lsn = log_get_lsn();
+	if (table == 0) {
+		err = DB_TABLE_NOT_FOUND;
+	} else if (table->space == TRX_SYS_SPACE) {
+		char	table_name[MAX_FULL_NAME_LEN + 1];
 
-	/* It is possible, though very improbable, that the lsn's in the
-	tablespace to be imported have risen above the current system lsn, if
-	a lengthy purge, ibuf merge, or rollback was performed on a backup
-	taken with ibbackup. If that is the case, reset page lsn's in the
-	file. We assume that mysqld was shut down after it performed these
-	cleanup operations on the .ibd file, so that it stamped the latest lsn
-	to the FIL_PAGE_FILE_FLUSH_LSN in the first page of the .ibd file.
+		innobase_format_name(
+			table_name, sizeof(table_name), table->name, FALSE);
 
-	TODO: reset also the trx id's in clustered index records and write
-	a new space id to each data page. That would allow us to import clean
-	.ibd files from another MySQL installation. */
+		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			    ER_TABLE_IN_SYSTEM_TABLESPACE, table_name);
 
-	success = fil_reset_too_high_lsns(name, current_lsn);
+		err = DB_ERROR;
 
-	if (!success) {
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Error: cannot reset lsn's in table ", stderr);
-		ut_print_name(stderr, trx, TRUE, name);
-		fputs("\n"
-		      "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n",
-		      stderr);
+	} else if (table->n_foreign_key_checks_running > 0) {
+		char	table_name[MAX_FULL_NAME_LEN + 1];
+
+		innobase_format_name(
+			table_name, sizeof(table_name), table->name, FALSE);
+
+		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			    ER_DISCARD_FK_CHECKS_RUNNING, table_name);
 
 		err = DB_ERROR;
 
-		row_mysql_lock_data_dictionary(trx);
-		table = NULL;
+	} else {
+		/* Do foreign key constraint checks. */
 
-		goto funct_exit;
-	}
+		err = row_discard_tablespace_foreign_key_checks(trx, table);
 
-	/* Serialize data dictionary operations with dictionary mutex:
-	no deadlocks can occur then in these operations */
+		if (err == DB_SUCCESS) {
+			err = row_discard_tablespace(trx, table);
+		}
+	}
 
-	row_mysql_lock_data_dictionary(trx);
+	return(row_discard_tablespace_end(trx, table, err));
+}
 
-	table = dict_table_open_on_name_no_stats(name, TRUE,
-						 DICT_ERR_IGNORE_NONE);
+/*********************************************************************//**
+Sets an exclusive lock on a table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_mysql_lock_table(
+/*=================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	dict_table_t*	table,		/*!< in: table to lock */
+	enum lock_mode	mode,		/*!< in: LOCK_X or LOCK_S */
+	const char*	op_info)	/*!< in: string for trx->op_info */
+{
+	mem_heap_t*	heap;
+	que_thr_t*	thr;
+	dberr_t		err;
+	sel_node_t*	node;
 
-	if (!table) {
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: table ", stderr);
-		ut_print_name(stderr, trx, TRUE, name);
-		fputs("\n"
-		      "InnoDB: does not exist in the InnoDB data dictionary\n"
-		      "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n",
-		      stderr);
+	ut_ad(trx);
+	ut_ad(mode == LOCK_X || mode == LOCK_S);
 
-		err = DB_TABLE_NOT_FOUND;
+	heap = mem_heap_create(512);
 
-		goto funct_exit;
-	}
+	trx->op_info = op_info;
 
-	if (table->space == 0) {
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Error: table ", stderr);
-		ut_print_name(stderr, trx, TRUE, name);
-		fputs("\n"
-		      "InnoDB: is in the system tablespace 0"
-		      " which cannot be imported\n", stderr);
-		err = DB_ERROR;
+	node = sel_node_create(heap);
+	thr = pars_complete_graph_for_exec(node, trx, heap);
+	thr->graph->state = QUE_FORK_ACTIVE;
 
-		goto funct_exit;
-	}
+	/* We use the select query graph as the dummy graph needed
+	in the lock module call */
 
-	if (!table->tablespace_discarded) {
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Error: you are trying to"
-		      " IMPORT a tablespace\n"
-		      "InnoDB: ", stderr);
-		ut_print_name(stderr, trx, TRUE, name);
-		fputs(", though you have not called DISCARD on it yet\n"
-		      "InnoDB: during the lifetime of the mysqld process!\n",
-		      stderr);
+	thr = que_fork_get_first_thr(
+		static_cast<que_fork_t*>(que_node_get_parent(thr)));
 
-		err = DB_ERROR;
+	que_thr_move_to_run_state_for_mysql(thr, trx);
 
-		goto funct_exit;
-	}
+run_again:
+	thr->run_node = thr;
+	thr->prev_node = thr->common.parent;
 
-	/* Play safe and remove all insert buffer entries, though we should
-	have removed them already when DISCARD TABLESPACE was called */
+	err = lock_table(0, table, mode, thr);
 
-	ibuf_delete_for_discarded_space(table->space);
+	trx->error_state = err;
 
-	success = fil_open_single_table_tablespace(
-		TRUE, table->space,
-		dict_tf_to_fsp_flags(table->flags),
-		table->name);
-	if (success) {
-		table->ibd_file_missing = FALSE;
-		table->tablespace_discarded = FALSE;
+	if (err == DB_SUCCESS) {
+		que_thr_stop_for_mysql_no_error(thr, trx);
 	} else {
-		if (table->ibd_file_missing) {
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: cannot find or open in the"
-			      " database directory the .ibd file of\n"
-			      "InnoDB: table ", stderr);
-			ut_print_name(stderr, trx, TRUE, name);
-			fputs("\n"
-			      "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n",
-			      stderr);
-		}
+		que_thr_stop_for_mysql(thr);
 
-		err = DB_ERROR;
-	}
+		if (err != DB_QUE_THR_SUSPENDED) {
+			ibool	was_lock_wait;
 
-funct_exit:
+			was_lock_wait = row_mysql_handle_errors(
+				&err, trx, thr, NULL);
 
-	if (table != NULL) {
-		dict_table_close(table, TRUE);
-	}
+			if (was_lock_wait) {
+				goto run_again;
+			}
+		} else {
+			que_thr_t*	run_thr;
+			que_node_t*	parent;
 
-	trx_commit_for_mysql(trx);
+			parent = que_node_get_parent(thr);
 
-	row_mysql_unlock_data_dictionary(trx);
+			run_thr = que_fork_start_command(
+				static_cast<que_fork_t*>(parent));
+
+			ut_a(run_thr == thr);
+
+			/* There was a lock wait but the thread was not
+			in a ready to run or running state. */
+			trx->error_state = DB_LOCK_WAIT;
 
+			goto run_again;
+		}
+	}
+
+	que_graph_free(thr->graph);
 	trx->op_info = "";
 
-	return((int) err);
+	return(err);
 }
 
 /*********************************************************************//**
 Truncates a table for MySQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_truncate_table_for_mysql(
 /*=========================*/
 	dict_table_t*	table,	/*!< in: table handle */
 	trx_t*		trx)	/*!< in: transaction handle */
 {
 	dict_foreign_t*	foreign;
-	ulint		err;
+	dberr_t		err;
 	mem_heap_t*	heap;
 	byte*		buf;
 	dtuple_t*	tuple;
@@ -2978,17 +3171,15 @@ row_truncate_table_for_mysql(
 	ulint		recreate_space = 0;
 	pars_info_t*	info = NULL;
 	ibool		has_internal_doc_id;
+	ulint		old_space = table->space;
 
 	/* How do we prevent crashes caused by ongoing operations on
 	the table? Old operations could try to access non-existent
 	pages.
 
 	1) SQL queries, INSERT, SELECT, ...: we must get an exclusive
-	MySQL table lock on the table before we can do TRUNCATE
-	TABLE. Then there are no running queries on the table. This is
-	guaranteed, because in ha_innobase::store_lock(), we do not
-	weaken the TL_WRITE lock requested by MySQL when executing
-	SQLCOM_TRUNCATE.
+	InnoDB table lock on the table before we can do TRUNCATE
+	TABLE. Then there are no running queries on the table.
 
 	2) Purge and rollback: we assign a new table id for the
 	table. Since purge and rollback look for the table based on
@@ -3031,9 +3222,15 @@ row_truncate_table_for_mysql(
 		return(DB_ERROR);
 	}
 
-	trx->op_info = "truncating table";
+	if (dict_table_is_discarded(table)) {
+		return(DB_TABLESPACE_DELETED);
+	} else if (table->ibd_file_missing) {
+		return(DB_TABLESPACE_NOT_FOUND);
+	}
 
-	trx_start_if_not_started_xa(trx);
+	trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
+
+	trx->op_info = "truncating table";
 
 	/* Serialize data dictionary operations with dictionary mutex:
 	no deadlocks can occur then in these operations */
@@ -3049,16 +3246,22 @@ row_truncate_table_for_mysql(
 	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
 
+	dict_stats_wait_bg_to_stop_using_tables(table, NULL, trx);
+
 	/* Check if the table is referenced by foreign key constraints from
 	some other table (not the table itself) */
 
-	foreign = UT_LIST_GET_FIRST(table->referenced_list);
+	for (foreign = UT_LIST_GET_FIRST(table->referenced_list);
+	     foreign != 0 && foreign->foreign_table == table;
+	     foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
 
-	while (foreign && foreign->foreign_table == table) {
-		foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
+		/* Do nothing. */
 	}
 
-	if (foreign && trx->check_foreigns) {
+	if (!srv_read_only_mode
+	    && foreign
+	    && trx->check_foreigns) {
+
 		FILE*	ef	= dict_foreign_err_file;
 
 		/* We only allow truncating a referenced table if
@@ -3099,19 +3302,41 @@ row_truncate_table_for_mysql(
 		goto funct_exit;
 	}
 
-	/* Remove all locks except the table-level S and X locks. */
+	/* Remove all locks except the table-level X lock. */
 
 	lock_remove_all_on_table(table, FALSE);
 
+	/* Ensure that the table will be dropped by
+	trx_rollback_active() in case of a crash. */
+
 	trx->table_id = table->id;
+	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+	/* Assign an undo segment for the transaction, so that the
+	transaction will be recovered after a crash. */
+
+	mutex_enter(&trx->undo_mutex);
+
+	err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
+
+	mutex_exit(&trx->undo_mutex);
+
+	if (err != DB_SUCCESS) {
+
+		goto funct_exit;
+	}
 
 	if (table->space && !table->dir_path_of_temp_table) {
 		/* Discard and create the single-table tablespace. */
 		ulint	space	= table->space;
 		ulint	flags	= fil_space_get_flags(space);
 
+		ut_a(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY));
+
+		dict_get_and_save_data_dir_path(table, true);
+
 		if (flags != ULINT_UNDEFINED
-		    && fil_discard_tablespace(space)) {
+		    && fil_discard_tablespace(space) == DB_SUCCESS) {
 
 			dict_index_t*	index;
 
@@ -3124,15 +3349,18 @@ row_truncate_table_for_mysql(
 
 			if (space == ULINT_UNDEFINED
 			    || fil_create_new_single_table_tablespace(
-				    space, table->name, FALSE,
+				    space, table->name,
+				    table->data_dir_path,
 				    flags, table->flags2,
-				    FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
+				    FIL_IBD_FILE_INITIAL_SIZE)
+			    != DB_SUCCESS) {
 				dict_table_x_unlock_indexes(table);
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					"  InnoDB: TRUNCATE TABLE %s failed to"
-					" create a new tablespace\n",
+
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"TRUNCATE TABLE %s failed to "
+					"create a new tablespace",
 					table->name);
+
 				table->ibd_file_missing = 1;
 				err = DB_ERROR;
 				goto funct_exit;
@@ -3240,7 +3468,6 @@ next_rec:
 	mtr_commit(&mtr);
 
 	mem_heap_free(heap);
-
 	/* Done with index truncation, release index tree locks,
 	subsequent work relates to table level metadata change */
 	dict_table_x_unlock_indexes(table);
@@ -3259,21 +3486,21 @@ next_rec:
 		fts_table.name = table->name;
 		fts_table.id = new_id;
 
-		err = fts_create_common_tables(trx, &fts_table, table->name,
-					       TRUE);
+		err = fts_create_common_tables(
+			trx, &fts_table, table->name, TRUE);
 
-		if (err == DB_SUCCESS) {
-			for (i = 0; i < ib_vector_size(table->fts->indexes);
-			     i++) {
-				dict_index_t*	fts_index;
+		for (i = 0;
+		     i < ib_vector_size(table->fts->indexes)
+		     && err == DB_SUCCESS;
+		     i++) {
 
-				fts_index = static_cast<dict_index_t*>(
-					ib_vector_getp(
-						table->fts->indexes, i));
+			dict_index_t*	fts_index;
 
-				fts_create_index_tables_low(
-					trx, fts_index, table->name, new_id);
-			}
+			fts_index = static_cast<dict_index_t*>(
+				ib_vector_getp(table->fts->indexes, i));
+
+			err = fts_create_index_tables_low(
+				trx, fts_index, table->name, new_id);
 		}
 
 		if (err != DB_SUCCESS) {
@@ -3287,34 +3514,64 @@ next_rec:
 			fputs("\n", stderr);
 
 			goto funct_exit;
+		} else {
+			ut_ad(trx->state != TRX_STATE_NOT_STARTED);
 		}
 	}
 
 	info = pars_info_create();
 
-	pars_info_add_int4_literal(info, "space", (lint) table->space);
+	pars_info_add_int4_literal(info, "new_space", (lint) table->space);
 	pars_info_add_ull_literal(info, "old_id", table->id);
 	pars_info_add_ull_literal(info, "new_id", new_id);
 
 	err = que_eval_sql(info,
-			   "PROCEDURE RENUMBER_TABLESPACE_PROC () IS\n"
+			   "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n"
 			   "BEGIN\n"
 			   "UPDATE SYS_TABLES"
-			   " SET ID = :new_id, SPACE = :space\n"
+			   " SET ID = :new_id, SPACE = :new_space\n"
 			   " WHERE ID = :old_id;\n"
 			   "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
 			   " WHERE TABLE_ID = :old_id;\n"
 			   "UPDATE SYS_INDEXES"
-			   " SET TABLE_ID = :new_id, SPACE = :space\n"
+			   " SET TABLE_ID = :new_id, SPACE = :new_space\n"
 			   " WHERE TABLE_ID = :old_id;\n"
-			   "COMMIT WORK;\n"
 			   "END;\n"
 			   , FALSE, trx);
 
+	if (err == DB_SUCCESS && old_space != table->space) {
+		info = pars_info_create();
+
+		pars_info_add_int4_literal(info, "old_space", (lint) old_space);
+
+		pars_info_add_int4_literal(
+			info, "new_space", (lint) table->space);
+
+		err = que_eval_sql(info,
+				   "PROCEDURE RENUMBER_TABLESPACE_PROC () IS\n"
+				   "BEGIN\n"
+				   "UPDATE SYS_TABLESPACES"
+				   " SET SPACE = :new_space\n"
+				   " WHERE SPACE = :old_space;\n"
+				   "UPDATE SYS_DATAFILES"
+				   " SET SPACE = :new_space"
+				   " WHERE SPACE = :old_space;\n"
+				   "END;\n"
+				   , FALSE, trx);
+	}
+	DBUG_EXECUTE_IF("ib_ddl_crash_before_fts_truncate", err = DB_ERROR;);
+
 	if (err != DB_SUCCESS) {
 		trx->error_state = DB_SUCCESS;
 		trx_rollback_to_savepoint(trx, NULL);
 		trx->error_state = DB_SUCCESS;
+
+		/* Update system table failed.  Table in memory metadata
+		could be in an inconsistent state, mark the in-memory
+		table->corrupted to be true. In the long run, this should
+		be fixed by atomic truncate table */
+		table->corrupted = true;
+
 		ut_print_timestamp(stderr);
 		fputs("  InnoDB: Unable to assign a new identifier to table ",
 		      stderr);
@@ -3323,30 +3580,40 @@ next_rec:
 		      "InnoDB: after truncating it.  Background processes"
 		      " may corrupt the table!\n", stderr);
 
-		/* Fail to update the table id, so drop the new
+		/* Failed to update the table id, so drop the new
 		FTS auxiliary tables */
 		if (has_internal_doc_id) {
-			dict_table_t	fts_table;
+			ut_ad(trx->state == TRX_STATE_NOT_STARTED);
+
+			table_id_t	id = table->id;
 
-			fts_table.name = table->name;
-			fts_table.id = new_id;
+			table->id = new_id;
 
-			fts_drop_tables(trx, &fts_table);
+			fts_drop_tables(trx, table);
+
+			table->id = id;
+
+			ut_ad(trx->state != TRX_STATE_NOT_STARTED);
 		}
 
 		err = DB_ERROR;
 	} else {
 		/* Drop the old FTS index */
 		if (has_internal_doc_id) {
+			ut_ad(trx->state != TRX_STATE_NOT_STARTED);
 			fts_drop_tables(trx, table);
+			ut_ad(trx->state != TRX_STATE_NOT_STARTED);
 		}
 
+		DBUG_EXECUTE_IF("ib_truncate_crash_after_fts_drop",
+				DBUG_SUICIDE(););
+
 		dict_table_change_id_in_cache(table, new_id);
 
 		/* Reset the Doc ID in cache to 0 */
 		if (has_internal_doc_id && table->fts->cache) {
 			table->fts->fts_status |= TABLE_DICT_LOCKED;
-			fts_update_next_doc_id(table, NULL, 0);
+			fts_update_next_doc_id(trx, table, NULL, 0);
 			fts_cache_clear(table->fts->cache, TRUE);
 			fts_cache_init(table->fts->cache);
 			table->fts->fts_status &= ~TABLE_DICT_LOCKED;
@@ -3364,16 +3631,13 @@ funct_exit:
 
 	row_mysql_unlock_data_dictionary(trx);
 
-	/* We are supposed to recalc and save the stats only
-	on ANALYZE, but it also makes sense to do so on TRUNCATE */
-	dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT_SILENT,
-			  FALSE);
+	dict_stats_update(table, DICT_STATS_EMPTY_TABLE);
 
 	trx->op_info = "";
 
 	srv_wake_master_thread();
 
-	return((int) err);
+	return(err);
 }
 
 /*********************************************************************//**
@@ -3385,23 +3649,29 @@ by the transaction, the transaction will be committed.  Otherwise, the
 data dictionary will remain locked.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_drop_table_for_mysql(
 /*=====================*/
 	const char*	name,	/*!< in: table name */
 	trx_t*		trx,	/*!< in: transaction handle */
-	ibool		drop_db)/*!< in: TRUE=dropping whole database */
+	bool		drop_db,/*!< in: true=dropping whole database */
+	bool		nonatomic)
+				/*!< in: whether it is permitted
+				to release and reacquire dict_operation_lock */
 {
+	dberr_t		err;
 	dict_foreign_t*	foreign;
 	dict_table_t*	table;
-	dict_index_t*	index;
+	ibool		print_msg;
 	ulint		space_id;
-	ulint		err;
-	const char*	table_name;
+	char*		filepath = NULL;
+	const char*	tablename_minus_db;
+	char*		tablename =  NULL;
+	bool		ibd_file_missing;
 	ulint		namelen;
-	ibool		locked_dictionary	= FALSE;
-	ibool		fts_bg_thread_exited	= FALSE;
+	bool		locked_dictionary	= false;
 	pars_info_t*	info			= NULL;
+	mem_heap_t*	heap			= NULL;
 
 	ut_a(name != NULL);
 
@@ -3419,19 +3689,19 @@ row_drop_table_for_mysql(
 	Certain table names starting with 'innodb_' have their special
 	meaning regardless of the database name.  Thus, we need to
 	ignore the database name prefix in the comparisons. */
-	table_name = strchr(name, '/');
+	tablename_minus_db = strchr(name, '/');
 
-	if (table_name) {
-		table_name++;
+	if (tablename_minus_db) {
+		tablename_minus_db++;
 	} else {
 		/* Ancillary FTS tables don't have '/' characters. */
-		table_name = name;
+		tablename_minus_db = name;
 	}
 
-	namelen = strlen(table_name) + 1;
+	namelen = strlen(tablename_minus_db) + 1;
 
 	if (namelen == sizeof S_innodb_monitor
-	    && !memcmp(table_name, S_innodb_monitor,
+	    && !memcmp(tablename_minus_db, S_innodb_monitor,
 		       sizeof S_innodb_monitor)) {
 
 		/* Table name equals "innodb_monitor":
@@ -3440,17 +3710,17 @@ row_drop_table_for_mysql(
 		srv_print_innodb_monitor = FALSE;
 		srv_print_innodb_lock_monitor = FALSE;
 	} else if (namelen == sizeof S_innodb_lock_monitor
-		   && !memcmp(table_name, S_innodb_lock_monitor,
+		   && !memcmp(tablename_minus_db, S_innodb_lock_monitor,
 			      sizeof S_innodb_lock_monitor)) {
 		srv_print_innodb_monitor = FALSE;
 		srv_print_innodb_lock_monitor = FALSE;
 	} else if (namelen == sizeof S_innodb_tablespace_monitor
-		   && !memcmp(table_name, S_innodb_tablespace_monitor,
+		   && !memcmp(tablename_minus_db, S_innodb_tablespace_monitor,
 			      sizeof S_innodb_tablespace_monitor)) {
 
 		srv_print_innodb_tablespace_monitor = FALSE;
 	} else if (namelen == sizeof S_innodb_table_monitor
-		   && !memcmp(table_name, S_innodb_table_monitor,
+		   && !memcmp(tablename_minus_db, S_innodb_table_monitor,
 			      sizeof S_innodb_table_monitor)) {
 
 		srv_print_innodb_table_monitor = FALSE;
@@ -3461,7 +3731,10 @@ row_drop_table_for_mysql(
 
 	trx->op_info = "dropping table";
 
-	trx_start_if_not_started(trx);
+	/* This function is called recursively via fts_drop_tables(). */
+	if (trx->state == TRX_STATE_NOT_STARTED) {
+		trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
+	}
 
 	if (trx->dict_operation_lock_mode != RW_X_LATCH) {
 		/* Prevent foreign key checks etc. while we are dropping the
@@ -3469,17 +3742,17 @@ row_drop_table_for_mysql(
 
 		row_mysql_lock_data_dictionary(trx);
 
-		locked_dictionary = TRUE;
+		locked_dictionary = true;
+		nonatomic = true;
 	}
 
-retry:
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
 
-	table = dict_table_open_on_name_no_stats(
-		name, TRUE,
+	table = dict_table_open_on_name(
+		name, TRUE, FALSE,
 		static_cast<dict_err_ignore_t>(
 			DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
 
@@ -3502,34 +3775,53 @@ retry:
 		goto funct_exit;
 	}
 
-	if (table->fts) {
-		fts_t*          fts = table->fts;
+	/* Turn on this drop bit before we could release the dictionary
+	latch */
+	table->to_be_dropped = true;
 
-		/* It is possible that background 'Add' thread fts_add_thread()
-		just gets called and the fts_optimize_thread()
-		is processing deleted records. There could be undetected
-		deadlock between threads synchronization and dict_sys_mutex
-		since fts_parse_sql() requires dict_sys->mutex. Ask the
-		background thread to exit before proceeds to drop table to
-		avoid undetected deadlocks */
-		row_mysql_unlock_data_dictionary(trx);
+	if (nonatomic) {
+		/* This trx did not acquire any locks on dictionary
+		table records yet. Thus it is safe to release and
+		reacquire the data dictionary latches. */
+		if (table->fts) {
+			ut_ad(!table->fts->add_wq);
+			ut_ad(lock_trx_has_sys_table_locks(trx) == 0);
 
-		if (fts->add_wq && (!fts_bg_thread_exited)) {
-			/* Wait for any background threads accessing the table
-			to exit. */
-			mutex_enter(&fts->bg_threads_mutex);
-			fts->fts_status |= BG_THREAD_STOP;
+			row_mysql_unlock_data_dictionary(trx);
+			fts_optimize_remove_table(table);
+			row_mysql_lock_data_dictionary(trx);
+		}
 
-			dict_table_wait_for_bg_threads_to_exit(table, 250000);
+		/* Do not bother to deal with persistent stats for temp
+		tables since we know temp tables do not use persistent
+		stats. */
+		if (!dict_table_is_temporary(table)) {
+			dict_stats_wait_bg_to_stop_using_tables(
+				table, NULL, trx);
+		}
+	}
 
-			mutex_exit(&fts->bg_threads_mutex);
+	/* make sure background stats thread is not running on the table */
+	ut_ad(!(table->stats_bg_flag & BG_STAT_IN_PROGRESS));
 
-			row_mysql_lock_data_dictionary(trx);
-			fts_bg_thread_exited = TRUE;
-			goto retry;
-		} else {
-			fts_optimize_remove_table(table);
-			row_mysql_lock_data_dictionary(trx);
+	/* Delete the link file if used. */
+	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+		fil_delete_link_file(name);
+	}
+
+	if (!dict_table_is_temporary(table)) {
+
+		dict_stats_recalc_pool_del(table);
+
+		/* Remove stats for this table and all of its indexes from the
+		persistent storage if it exists and if there are stats for this
+		table in there. This function creates its own trx and commits
+		it. */
+		char	errstr[1024];
+		err = dict_stats_drop_table(name, errstr, sizeof(errstr));
+
+		if (err != DB_SUCCESS) {
+			ib_logf(IB_LOG_LEVEL_WARN, "%s", errstr);
 		}
 	}
 
@@ -3540,7 +3832,7 @@ retry:
 		dict_table_move_from_lru_to_non_lru(table);
 	}
 
-	dict_table_close(table, TRUE);
+	dict_table_close(table, TRUE, FALSE);
 
 	/* Check if the table is referenced by foreign key constraints from
 	some other table (not the table itself) */
@@ -3552,7 +3844,9 @@ check_next_foreign:
 		foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
 	}
 
-	if (foreign && trx->check_foreigns
+	if (!srv_read_only_mode
+	    && foreign
+	    && trx->check_foreigns
 	    && !(drop_db && dict_tables_have_same_db(
 			 name, foreign->foreign_table_name_lookup))) {
 		FILE*	ef	= dict_foreign_err_file;
@@ -3589,16 +3883,16 @@ check_next_foreign:
 
 	if (table->n_foreign_key_checks_running > 0) {
 
-		const char*	table_name = table->name;
+		const char*	save_tablename = table->name;
 		ibool		added;
 
-		added = row_add_table_to_background_drop_list(table_name);
+		added = row_add_table_to_background_drop_list(save_tablename);
 
 		if (added) {
 			ut_print_timestamp(stderr);
 			fputs("  InnoDB: You are trying to drop table ",
 			      stderr);
-			ut_print_name(stderr, trx, TRUE, table_name);
+			ut_print_name(stderr, trx, TRUE, save_tablename);
 			fputs("\n"
 			      "InnoDB: though there is a"
 			      " foreign key check running on it.\n"
@@ -3663,23 +3957,54 @@ check_next_foreign:
 		goto funct_exit;
 	}
 
+	/* The "to_be_dropped" marks table that is to be dropped, but
+	has not been dropped, instead, was put in the background drop
+	list due to being used by concurrent DML operations. Clear it
+	here since there are no longer any concurrent activities on it,
+	and it is free to be dropped */
+	table->to_be_dropped = false;
+
 	/* If we get this far then the table to be dropped must not have
 	any table or record locks on it. */
 
 	ut_a(!lock_table_has_locks(table));
 
-	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-	trx->table_id = table->id;
+	switch (trx_get_dict_operation(trx)) {
+	case TRX_DICT_OP_NONE:
+		trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+		trx->table_id = table->id;
+	case TRX_DICT_OP_TABLE:
+		break;
+	case TRX_DICT_OP_INDEX:
+		/* If the transaction was previously flagged as
+		TRX_DICT_OP_INDEX, we should be dropping auxiliary
+		tables for full-text indexes. */
+		ut_ad(strstr(table->name, "/FTS_") != NULL);
+	}
 
 	/* Mark all indexes unavailable in the data dictionary cache
 	before starting to drop the table. */
 
-	for (index = dict_table_get_first_index(table);
+	unsigned*	page_no;
+	unsigned*	page_nos;
+	heap = mem_heap_create(
+		200 + UT_LIST_GET_LEN(table->indexes) * sizeof *page_nos);
+	tablename = mem_heap_strdup(heap, name);
+
+	page_no = page_nos = static_cast<unsigned*>(
+		mem_heap_alloc(
+			heap,
+			UT_LIST_GET_LEN(table->indexes) * sizeof *page_no));
+
+	for (dict_index_t* index = dict_table_get_first_index(table);
 	     index != NULL;
 	     index = dict_table_get_next_index(index)) {
 		rw_lock_x_lock(dict_index_get_lock(index));
-		ut_ad(!index->to_be_dropped);
-		index->to_be_dropped = TRUE;
+		/* Save the page numbers so that we can restore them
+		if the operation fails. */
+		*page_no++ = index->page;
+		/* Mark the index unusable. */
+		index->page = FIL_NULL;
 		rw_lock_x_unlock(dict_index_get_lock(index));
 	}
 
@@ -3698,6 +4023,7 @@ check_next_foreign:
 			   "table_id CHAR;\n"
 			   "index_id CHAR;\n"
 			   "foreign_id CHAR;\n"
+			   "space_id INT;\n"
 			   "found INT;\n"
 
 			   "DECLARE CURSOR cur_fk IS\n"
@@ -3720,6 +4046,12 @@ check_next_foreign:
 			   "IF (SQL % NOTFOUND) THEN\n"
 			   "       RETURN;\n"
 			   "END IF;\n"
+			   "SELECT SPACE INTO space_id\n"
+			   "FROM SYS_TABLES\n"
+			   "WHERE NAME = :table_name;\n"
+			   "IF (SQL % NOTFOUND) THEN\n"
+			   "       RETURN;\n"
+			   "END IF;\n"
 			   "found := 1;\n"
 			   "SELECT ID INTO sys_foreign_id\n"
 			   "FROM SYS_TABLES\n"
@@ -3762,56 +4094,90 @@ check_next_foreign:
 			   "       END IF;\n"
 			   "END LOOP;\n"
 			   "CLOSE cur_idx;\n"
+			   "DELETE FROM SYS_TABLESPACES\n"
+			   "WHERE SPACE = space_id;\n"
+			   "DELETE FROM SYS_DATAFILES\n"
+			   "WHERE SPACE = space_id;\n"
 			   "DELETE FROM SYS_COLUMNS\n"
 			   "WHERE TABLE_ID = table_id;\n"
 			   "DELETE FROM SYS_TABLES\n"
-			   "WHERE ID = table_id;\n"
+			   "WHERE NAME = :table_name;\n"
 			   "END;\n"
 			   , FALSE, trx);
 
 	switch (err) {
-		ibool		is_temp;
-		mem_heap_t*	heap;
+		ibool	is_temp;
 
 	case DB_SUCCESS:
-
-		heap = mem_heap_create(200);
-
 		/* Clone the name, in case it has been allocated
 		from table->heap, which will be freed by
 		dict_table_remove_from_cache(table) below. */
-		name = mem_heap_strdup(heap, name);
 		space_id = table->space;
+		ibd_file_missing = table->ibd_file_missing;
 
-		is_temp = table->flags2 & DICT_TF2_TEMPORARY;
+		is_temp = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY);
+
+		/* If there is a temp path then the temp flag is set.
+		However, during recovery, we might have a temp flag but
+		not know the temp path */
 		ut_a(table->dir_path_of_temp_table == NULL || is_temp);
+		if (dict_table_is_discarded(table)
+		    || table->ibd_file_missing) {
+			/* Do not attempt to drop known-to-be-missing
+			tablespaces. */
+			space_id = 0;
+		}
+
+		/* We do not allow temporary tables with a remote path. */
+		ut_a(!(is_temp && DICT_TF_HAS_DATA_DIR(table->flags)));
+
+		if (space_id && DICT_TF_HAS_DATA_DIR(table->flags)) {
+			dict_get_and_save_data_dir_path(table, true);
+			ut_a(table->data_dir_path);
+
+			filepath = os_file_make_remote_pathname(
+				table->data_dir_path, table->name, "ibd");
+		} else if (table->dir_path_of_temp_table) {
+			filepath = fil_make_ibd_name(
+				table->dir_path_of_temp_table, true);
+		} else {
+			filepath = fil_make_ibd_name(tablename, false);
+		}
 
 		if (dict_table_has_fts_index(table)
 		    || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
 			ut_ad(table->n_ref_count == 0);
+			ut_ad(trx->state != TRX_STATE_NOT_STARTED);
 			err = fts_drop_tables(trx, table);
 
 			if (err != DB_SUCCESS) {
 				ut_print_timestamp(stderr);
-				fprintf(stderr," InnoDB: Error: (%lu) not "
+				fprintf(stderr," InnoDB: Error: (%s) not "
 					"able to remove ancillary FTS tables "
-					"for table ", err);
-				ut_print_name(stderr, trx, TRUE, name);
+					"for table ", ut_strerr(err));
+				ut_print_name(stderr, trx, TRUE, tablename);
 				fputs("\n", stderr);
 
 				goto funct_exit;
 			}
+		}
 
+		/* The table->fts flag can be set on the table for which
+		the cluster index is being rebuilt. Such table might not have
+		DICT_TF2_FTS flag set. So keep this out of above
+		dict_table_has_fts_index condition */
+		if (table->fts) {
 			fts_free(table);
 		}
 
 		dict_table_remove_from_cache(table);
 
-		if (dict_load_table(name, TRUE, DICT_ERR_IGNORE_NONE) != NULL) {
+		if (dict_load_table(tablename, TRUE,
+				    DICT_ERR_IGNORE_NONE) != NULL) {
 			ut_print_timestamp(stderr);
 			fputs("  InnoDB: Error: not able to remove table ",
 			      stderr);
-			ut_print_name(stderr, trx, TRUE, name);
+			ut_print_name(stderr, trx, TRUE, tablename);
 			fputs(" from the dictionary cache!\n", stderr);
 			err = DB_ERROR;
 		}
@@ -3819,23 +4185,46 @@ check_next_foreign:
 		/* Do not drop possible .ibd tablespace if something went
 		wrong: we do not want to delete valuable data of the user */
 
-		if (err == DB_SUCCESS && space_id > 0) {
-			if (!fil_space_for_table_exists_in_mem(
-					space_id, name, FALSE, !is_temp)) {
+		/* Don't spam the log if we can't find the tablespace of
+		a temp table or if the tablesace has been discarded. */
+		print_msg = !(is_temp || ibd_file_missing);
+
+		if (err == DB_SUCCESS && space_id > TRX_SYS_SPACE) {
+			if (!is_temp
+			    && !fil_space_for_table_exists_in_mem(
+					space_id, tablename, FALSE,
+					print_msg, false, NULL, 0)) {
+				/* This might happen if we are dropping a
+				discarded tablespace */
 				err = DB_SUCCESS;
 
+				if (print_msg) {
+					char msg_tablename[MAX_FULL_NAME_LEN + 1];
+
+					innobase_format_name(
+						msg_tablename, sizeof(tablename),
+						tablename, FALSE);
+
+					ib_logf(IB_LOG_LEVEL_INFO,
+						"Removed the table %s from "
+						"InnoDB's data dictionary",
+						msg_tablename);
+				}
+
+				/* Force a delete of any discarded
+				or temporary files. */
+
+				fil_delete_file(filepath);
+
+			} else if (fil_delete_tablespace(
+					space_id,
+					BUF_REMOVE_FLUSH_NO_WRITE)
+				   != DB_SUCCESS) {
 				fprintf(stderr,
 					"InnoDB: We removed now the InnoDB"
 					" internal data dictionary entry\n"
 					"InnoDB: of table ");
-				ut_print_name(stderr, trx, TRUE, name);
-				fprintf(stderr, ".\n");
-			} else if (!fil_delete_tablespace(space_id)) {
-				fprintf(stderr,
-					"InnoDB: We removed now the InnoDB"
-					" internal data dictionary entry\n"
-					"InnoDB: of table ");
-				ut_print_name(stderr, trx, TRUE, name);
+				ut_print_name(stderr, trx, TRUE, tablename);
 				fprintf(stderr, ".\n");
 
 				ut_print_timestamp(stderr);
@@ -3843,13 +4232,12 @@ check_next_foreign:
 					"  InnoDB: Error: not able to"
 					" delete tablespace %lu of table ",
 					(ulong) space_id);
-				ut_print_name(stderr, trx, TRUE, name);
+				ut_print_name(stderr, trx, TRUE, tablename);
 				fputs("!\n", stderr);
 				err = DB_ERROR;
 			}
 		}
 
-		mem_heap_free(heap);
 		break;
 
 	case DB_OUT_OF_FILE_SPACE:
@@ -3874,7 +4262,7 @@ check_next_foreign:
 
 		fprintf(stderr, "InnoDB: unknown error code %lu"
 			" while dropping table:", (ulong) err);
-		ut_print_name(stderr, trx, TRUE, name);
+		ut_print_name(stderr, trx, TRUE, tablename);
 		fprintf(stderr, ".\n");
 
 		trx->error_state = DB_SUCCESS;
@@ -3884,16 +4272,25 @@ check_next_foreign:
 		/* Mark all indexes available in the data dictionary
 		cache again. */
 
-		for (index = dict_table_get_first_index(table);
+		page_no = page_nos;
+
+		for (dict_index_t* index = dict_table_get_first_index(table);
 		     index != NULL;
 		     index = dict_table_get_next_index(index)) {
 			rw_lock_x_lock(dict_index_get_lock(index));
-			index->to_be_dropped = FALSE;
+			ut_a(index->page == FIL_NULL);
+			index->page = *page_no++;
 			rw_lock_x_unlock(dict_index_get_lock(index));
 		}
 	}
 
 funct_exit:
+	if (heap) {
+		mem_heap_free(heap);
+	}
+	if (filepath) {
+		mem_free(filepath);
+	}
 
 	if (locked_dictionary) {
 		trx_commit_for_mysql(trx);
@@ -3905,7 +4302,7 @@ funct_exit:
 
 	srv_wake_master_thread();
 
-	return((int) err);
+	return(err);
 }
 
 /*********************************************************************//**
@@ -3929,9 +4326,9 @@ row_mysql_drop_temp_tables(void)
 	mtr_start(&mtr);
 
 	btr_pcur_open_at_index_side(
-		TRUE,
+		true,
 		dict_table_get_first_index(dict_sys->sys_tables),
-		BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+		BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
 
 	for (;;) {
 		const rec_t*	rec;
@@ -3950,6 +4347,8 @@ row_mysql_drop_temp_tables(void)
 		ROW_FORMAT=REDUNDANT. */
 		rec = btr_pcur_get_rec(&pcur);
 		field = rec_get_nth_field_old(
+			rec, DICT_FLD__SYS_TABLES__NAME, &len);
+		field = rec_get_nth_field_old(
 			rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
 		if (len != 4
 		    || !(mach_read_from_4(field) & DICT_N_COLS_COMPACT)) {
@@ -4003,15 +4402,15 @@ row_mysql_drop_temp_tables(void)
 Drop all foreign keys in a database, see Bug#18942.
 Called at the end of row_drop_database_for_mysql().
 @return	error code or DB_SUCCESS */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 drop_all_foreign_keys_in_db(
 /*========================*/
 	const char*	name,	/*!< in: database name which ends to '/' */
 	trx_t*		trx)	/*!< in: transaction handle */
 {
 	pars_info_t*	pinfo;
-	ulint		err;
+	dberr_t		err;
 
 	ut_a(name[strlen(name) - 1] == '/');
 
@@ -4063,22 +4462,24 @@ drop_all_foreign_keys_in_db(
 Drops a database for MySQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 row_drop_database_for_mysql(
 /*========================*/
 	const char*	name,	/*!< in: database name which ends to '/' */
 	trx_t*		trx)	/*!< in: transaction handle */
 {
-	dict_table_t* table;
-	char*	table_name;
-	int	err	= DB_SUCCESS;
-	ulint	namelen	= strlen(name);
+	dict_table_t*	table;
+	char*		table_name;
+	dberr_t		err	= DB_SUCCESS;
+	ulint		namelen	= strlen(name);
 
 	ut_a(name != NULL);
 	ut_a(name[namelen - 1] == '/');
 
 	trx->op_info = "dropping database";
 
+	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
 	trx_start_if_not_started_xa(trx);
 loop:
 	row_mysql_lock_data_dictionary(trx);
@@ -4086,11 +4487,29 @@ loop:
 	while ((table_name = dict_get_first_table_name_in_db(name))) {
 		ut_a(memcmp(table_name, name, namelen) == 0);
 
-		table = dict_table_open_on_name_no_stats(table_name, TRUE,
-							 DICT_ERR_IGNORE_NONE);
+		table = dict_table_open_on_name(
+			table_name, TRUE, FALSE, static_cast<dict_err_ignore_t>(
+				DICT_ERR_IGNORE_INDEX_ROOT
+				| DICT_ERR_IGNORE_CORRUPT));
 
-		ut_a(table);
-		ut_a(!table->can_be_evicted);
+		if (!table) {
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Cannot load table %s from InnoDB internal "
+				"data dictionary during drop database",
+				table_name);
+			mem_free(table_name);
+			err = DB_TABLE_NOT_FOUND;
+			break;
+
+		}
+
+		if (row_is_mysql_tmp_table_name(table->name)) {
+			/* There could be an orphan temp table left from
+			interupted alter table rebuild operation */
+			dict_table_close(table, TRUE, FALSE);
+		} else {
+			ut_a(!table->can_be_evicted || table->ibd_file_missing);
+		}
 
 		/* Wait until MySQL does not have any queries running on
 		the table */
@@ -4121,8 +4540,8 @@ loop:
 		if (err != DB_SUCCESS) {
 			fputs("InnoDB: DROP DATABASE ", stderr);
 			ut_print_name(stderr, trx, TRUE, name);
-			fprintf(stderr, " failed with error %lu for table ",
-				(ulint) err);
+			fprintf(stderr, " failed with error (%s) for table ",
+				ut_strerr(err));
 			ut_print_name(stderr, trx, TRUE, table_name);
 			putc('\n', stderr);
 			mem_free(table_name);
@@ -4135,7 +4554,7 @@ loop:
 	if (err == DB_SUCCESS) {
 		/* after dropping all tables try to drop all leftover
 		foreign keys in case orphaned ones exist */
-		err = (int) drop_all_foreign_keys_in_db(name, trx);
+		err = drop_all_foreign_keys_in_db(name, trx);
 
 		if (err != DB_SUCCESS) {
 			fputs("InnoDB: DROP DATABASE ", stderr);
@@ -4157,9 +4576,9 @@ loop:
 /*********************************************************************//**
 Checks if a table name contains the string "/#sql" which denotes temporary
 tables in MySQL.
-@return	TRUE if temporary table */
-static
-ibool
+@return	true if temporary table */
+UNIV_INTERN __attribute__((warn_unused_result))
+bool
 row_is_mysql_tmp_table_name(
 /*========================*/
 	const char*	name)	/*!< in: table name in the form
@@ -4172,8 +4591,8 @@ row_is_mysql_tmp_table_name(
 /****************************************************************//**
 Delete a single constraint.
 @return	error code or DB_SUCCESS */
-static
-int
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_delete_constraint_low(
 /*======================*/
 	const char*	id,		/*!< in: constraint id */
@@ -4183,7 +4602,7 @@ row_delete_constraint_low(
 
 	pars_info_add_str_literal(info, "id", id);
 
-	return((int) que_eval_sql(info,
+	return(que_eval_sql(info,
 			    "PROCEDURE DELETE_CONSTRAINT () IS\n"
 			    "BEGIN\n"
 			    "DELETE FROM SYS_FOREIGN_COLS WHERE ID = :id;\n"
@@ -4195,8 +4614,8 @@ row_delete_constraint_low(
 /****************************************************************//**
 Delete a single constraint.
 @return	error code or DB_SUCCESS */
-static
-int
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_delete_constraint(
 /*==================*/
 	const char*	id,		/*!< in: constraint id */
@@ -4205,7 +4624,7 @@ row_delete_constraint(
 	mem_heap_t*	heap,		/*!< in: memory heap */
 	trx_t*		trx)		/*!< in: transaction handle */
 {
-	ulint		err;
+	dberr_t	err;
 
 	/* New format constraints have ids <databasename>/<constraintname>. */
 	err = row_delete_constraint_low(
@@ -4222,29 +4641,30 @@ row_delete_constraint(
 		err = row_delete_constraint_low(id, trx);
 	}
 
-	return((int) err);
+	return(err);
 }
 
 /*********************************************************************//**
 Renames a table for MySQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 row_rename_table_for_mysql(
 /*=======================*/
 	const char*	old_name,	/*!< in: old table name */
 	const char*	new_name,	/*!< in: new table name */
-	trx_t*		trx,		/*!< in: transaction handle */
-	ibool		commit)		/*!< in: if TRUE then commit trx */
+	trx_t*		trx,		/*!< in/out: transaction */
+	bool		commit)		/*!< in: whether to commit trx */
 {
 	dict_table_t*	table			= NULL;
 	ibool		dict_locked		= FALSE;
-	ulint		err			= DB_ERROR;
+	dberr_t		err			= DB_ERROR;
 	mem_heap_t*	heap			= NULL;
 	const char**	constraints_to_drop	= NULL;
 	ulint		n_constraints_to_drop	= 0;
 	ibool		old_is_tmp, new_is_tmp;
 	pars_info_t*	info			= NULL;
+	int		retry;
 
 	ut_a(old_name != NULL);
 	ut_a(new_name != NULL);
@@ -4279,8 +4699,8 @@ row_rename_table_for_mysql(
 
 	dict_locked = trx->dict_operation_lock_mode == RW_X_LATCH;
 
-	table = dict_table_open_on_name_no_stats(old_name, dict_locked,
-						 DICT_ERR_IGNORE_NONE);
+	table = dict_table_open_on_name(old_name, dict_locked, FALSE,
+					DICT_ERR_IGNORE_NONE);
 
 	if (!table) {
 		err = DB_TABLE_NOT_FOUND;
@@ -4299,18 +4719,19 @@ row_rename_table_for_mysql(
 		      "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
 		      stderr);
 		goto funct_exit;
-	} else if (table->ibd_file_missing) {
+
+	} else if (table->ibd_file_missing
+		   && !dict_table_is_discarded(table)) {
+
 		err = DB_TABLE_NOT_FOUND;
-		ut_print_timestamp(stderr);
 
-		fputs("  InnoDB: Error: table ", stderr);
-		ut_print_name(stderr, trx, TRUE, old_name);
-		fputs(" does not have an .ibd file"
-		      " in the database directory.\n"
-		      "InnoDB: You can look for further help from\n"
-		      "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
-		      stderr);
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Table %s does not have an .ibd file in the database "
+			"directory. See " REFMAN "innodb-troubleshooting.html",
+			old_name);
+
 		goto funct_exit;
+
 	} else if (new_is_tmp) {
 		/* MySQL is doing an ALTER TABLE command and it renames the
 		original table to a temporary table name. We want to preserve
@@ -4329,27 +4750,75 @@ row_rename_table_for_mysql(
 		}
 	}
 
+	/* Is a foreign key check running on this table? */
+	for (retry = 0; retry < 100
+	     && table->n_foreign_key_checks_running > 0; ++retry) {
+		row_mysql_unlock_data_dictionary(trx);
+		os_thread_yield();
+		row_mysql_lock_data_dictionary(trx);
+	}
+
+	if (table->n_foreign_key_checks_running > 0) {
+		ut_print_timestamp(stderr);
+		fputs(" InnoDB: Error: in ALTER TABLE ", stderr);
+		ut_print_name(stderr, trx, TRUE, old_name);
+		fprintf(stderr, "\n"
+			"InnoDB: a FOREIGN KEY check is running.\n"
+			"InnoDB: Cannot rename table.\n");
+		err = DB_TABLE_IN_FK_CHECK;
+		goto funct_exit;
+	}
+
 	/* We use the private SQL parser of Innobase to generate the query
 	graphs needed in updating the dictionary data from system tables. */
 
 	info = pars_info_create();
 
 	pars_info_add_str_literal(info, "new_table_name", new_name);
-
 	pars_info_add_str_literal(info, "old_table_name", old_name);
 
 	err = que_eval_sql(info,
 			   "PROCEDURE RENAME_TABLE () IS\n"
 			   "BEGIN\n"
-			   "UPDATE SYS_TABLES SET NAME = :new_table_name\n"
+			   "UPDATE SYS_TABLES"
+			   " SET NAME = :new_table_name\n"
 			   " WHERE NAME = :old_table_name;\n"
 			   "END;\n"
 			   , FALSE, trx);
 
-	if (err != DB_SUCCESS) {
+	/* SYS_TABLESPACES and SYS_DATAFILES track non-system tablespaces
+	which have space IDs > 0. */
+	if (err == DB_SUCCESS
+	    && table->space != TRX_SYS_SPACE
+	    && !table->ibd_file_missing) {
+		/* Make a new pathname to update SYS_DATAFILES. */
+		char*	new_path = row_make_new_pathname(table, new_name);
+
+		info = pars_info_create();
 
+		pars_info_add_str_literal(info, "new_table_name", new_name);
+		pars_info_add_str_literal(info, "new_path_name", new_path);
+		pars_info_add_int4_literal(info, "space_id", table->space);
+
+		err = que_eval_sql(info,
+				   "PROCEDURE RENAME_SPACE () IS\n"
+				   "BEGIN\n"
+				   "UPDATE SYS_TABLESPACES"
+				   " SET NAME = :new_table_name\n"
+				   " WHERE SPACE = :space_id;\n"
+				   "UPDATE SYS_DATAFILES"
+				   " SET PATH = :new_path_name\n"
+				   " WHERE SPACE = :space_id;\n"
+				   "END;\n"
+				   , FALSE, trx);
+
+		mem_free(new_path);
+	}
+	if (err != DB_SUCCESS) {
 		goto end;
-	} else if (!new_is_tmp) {
+	}
+
+	if (!new_is_tmp) {
 		/* Rename all constraints. */
 
 		info = pars_info_create();
@@ -4486,12 +4955,12 @@ end:
 		/* The following call will also rename the .ibd data file if
 		the table is stored in a single-table tablespace */
 
-		if (!dict_table_rename_in_cache(table, new_name,
-						!new_is_tmp)) {
+		err = dict_table_rename_in_cache(
+			table, new_name, !new_is_tmp);
+		if (err != DB_SUCCESS) {
 			trx->error_state = DB_SUCCESS;
 			trx_rollback_to_savepoint(trx, NULL);
 			trx->error_state = DB_SUCCESS;
-			err = DB_ERROR;
 			goto funct_exit;
 		}
 
@@ -4527,8 +4996,8 @@ end:
 				      stderr);
 			}
 
-			ut_a(dict_table_rename_in_cache(table,
-							old_name, FALSE));
+			ut_a(DB_SUCCESS == dict_table_rename_in_cache(
+				table, old_name, FALSE));
 			trx->error_state = DB_SUCCESS;
 			trx_rollback_to_savepoint(trx, NULL);
 			trx->error_state = DB_SUCCESS;
@@ -4538,7 +5007,7 @@ end:
 funct_exit:
 
 	if (table != NULL) {
-		dict_table_close(table, dict_locked);
+		dict_table_close(table, dict_locked, FALSE);
 	}
 
 	if (commit) {
@@ -4558,9 +5027,9 @@ funct_exit:
 Checks that the index contains entries in an ascending order, unique
 constraint is not broken, and calculates the number of index entries
 in the read view of the current transaction.
-@return	TRUE if ok */
+@return	true if ok */
 UNIV_INTERN
-ibool
+bool
 row_check_index_for_mysql(
 /*======================*/
 	row_prebuilt_t*		prebuilt,	/*!< in: prebuilt struct
@@ -4575,7 +5044,7 @@ row_check_index_for_mysql(
 	byte*		buf;
 	ulint		ret;
 	rec_t*		rec;
-	ibool		is_ok		= TRUE;
+	bool		is_ok		= true;
 	int		cmp;
 	ibool		contains_null;
 	ulint		i;
@@ -4588,10 +5057,20 @@ row_check_index_for_mysql(
 
 	*n_rows = 0;
 
-	/* Full Text index are implemented by auxiliary tables,
-	not the B-tree */
-	if (index->type & DICT_FTS) {
-		return(TRUE);
+	if (dict_index_is_clust(index)) {
+		/* The clustered index of a table is always available.
+		During online ALTER TABLE that rebuilds the table, the
+		clustered index in the old table will have
+		index->online_log pointing to the new table. All
+		indexes of the old table will remain valid and the new
+		table will be unaccessible to MySQL until the
+		completion of the ALTER TABLE. */
+	} else if (dict_index_is_online_ddl(index)
+		   || (index->type & DICT_FTS)) {
+		/* Full Text index are implemented by auxiliary tables,
+		not the B-tree. We also skip secondary indexes that are
+		being created online. */
+		return(true);
 	}
 
 	buf = static_cast<byte*>(mem_alloc(UNIV_PAGE_SIZE));
@@ -4672,7 +5151,7 @@ not_ok:
 			      "InnoDB: record ", stderr);
 			rec_print_new(stderr, rec, offsets);
 			putc('\n', stderr);
-			is_ok = FALSE;
+			is_ok = false;
 		} else if (dict_index_is_unique(index)
 			   && !contains_null
 			   && matched_fields
@@ -4702,9 +5181,8 @@ not_ok:
 
 		mem_heap_empty(heap);
 
-		prev_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec,
-						    index, offsets,
-						    &n_ext, heap);
+		prev_entry = row_rec_to_index_entry(
+			rec, index, offsets, &n_ext, heap);
 
 		if (UNIV_LIKELY_NULL(tmp_heap)) {
 			mem_heap_free(tmp_heap);
@@ -4718,9 +5196,9 @@ not_ok:
 
 /*********************************************************************//**
 Determines if a table is a magic monitor table.
-@return	TRUE if monitor table */
+@return	true if monitor table */
 UNIV_INTERN
-ibool
+bool
 row_is_magic_monitor_table(
 /*=======================*/
 	const char*	table_name)	/*!< in: name of the table, in the
@@ -4751,7 +5229,7 @@ row_mysql_init(void)
 {
 	mutex_create(
 		row_drop_list_mutex_key,
-	       	&row_drop_list_mutex, SYNC_NO_ORDER_CHECK);
+		&row_drop_list_mutex, SYNC_NO_ORDER_CHECK);
 
 	UT_LIST_INIT(row_mysql_drop_list);
 
diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
index ab28b396920..ee603be453a 100644
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -42,8 +42,10 @@ Created 3/14/1997 Heikki Tuuri
 #include "row0upd.h"
 #include "row0vers.h"
 #include "row0mysql.h"
+#include "row0log.h"
 #include "log0log.h"
 #include "srv0mon.h"
+#include "srv0start.h"
 
 /*************************************************************************
 IMPORTANT NOTE: Any operation that generates redo MUST check that there
@@ -110,119 +112,134 @@ row_purge_reposition_pcur(
 	return(node->found_clust);
 }
 
+/** Status of row_purge_remove_clust() */
+enum row_purge_status {
+	ROW_PURGE_DONE,	/*!< The row has been removed. */
+	ROW_PURGE_FAIL,	/*!< The purge was not successful. */
+	ROW_PURGE_SUSPEND/*!< Cannot purge now, due to online rebuild. */
+};
+
 /***********************************************************//**
 Removes a delete marked clustered index record if possible.
-@return TRUE if success, or if not found, or if modified after the
-delete marking */
-static
-ibool
+@retval ROW_PURGE_DONE if the row was not found, or it was successfully removed
+@retval ROW_PURGE_FAIL if the row was modified after the delete marking
+@retval ROW_PURGE_SUSPEND if the row refers to an off-page column and
+an online ALTER TABLE (table rebuild) is in progress. */
+static __attribute__((nonnull, warn_unused_result))
+enum row_purge_status
 row_purge_remove_clust_if_poss_low(
 /*===============================*/
-	purge_node_t*	node,	/*!< in: row purge node */
+	purge_node_t*	node,	/*!< in/out: row purge node */
 	ulint		mode)	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
 {
-	dict_index_t*	index;
-	btr_pcur_t*	pcur;
-	btr_cur_t*	btr_cur;
-	ibool		success;
-	ulint		err;
-	mtr_t		mtr;
-	rec_t*		rec;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	dict_index_t*		index;
+	enum row_purge_status	status		= ROW_PURGE_DONE;
+	mtr_t			mtr;
+	rec_t*			rec;
+	mem_heap_t*		heap		= NULL;
+	ulint*			offsets;
+	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
 	rec_offs_init(offsets_);
 
-	index = dict_table_get_first_index(node->table);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
 
-	pcur = &node->pcur;
-	btr_cur = btr_pcur_get_btr_cur(pcur);
+	index = dict_table_get_first_index(node->table);
 
 	log_free_check();
 	mtr_start(&mtr);
 
-	success = row_purge_reposition_pcur(mode, node, &mtr);
-
-	if (!success) {
-		/* The record is already removed */
-
-		btr_pcur_commit_specify_mtr(pcur, &mtr);
-
-		return(TRUE);
+	if (!row_purge_reposition_pcur(mode, node, &mtr)) {
+		/* The record was already removed. */
+		goto func_exit;
 	}
 
-	rec = btr_pcur_get_rec(pcur);
+	rec = btr_pcur_get_rec(&node->pcur);
 
-	if (node->roll_ptr != row_get_rec_roll_ptr(
-		    rec, index, rec_get_offsets(rec, index, offsets_,
-						ULINT_UNDEFINED, &heap))) {
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
-		/* Someone else has modified the record later: do not remove */
-		btr_pcur_commit_specify_mtr(pcur, &mtr);
+	offsets = rec_get_offsets(
+		rec, index, offsets_, ULINT_UNDEFINED, &heap);
 
-		return(TRUE);
+	if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
+		/* Someone else has modified the record later: do not remove */
+		goto func_exit;
 	}
 
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
+	if (dict_index_get_online_status(index) == ONLINE_INDEX_CREATION
+	    && rec_offs_any_extern(offsets)) {
+		status = ROW_PURGE_SUSPEND;
+		goto func_exit;
 	}
 
 	if (mode == BTR_MODIFY_LEAF) {
-		success = btr_cur_optimistic_delete(btr_cur, &mtr);
+		status = btr_cur_optimistic_delete(
+			btr_pcur_get_btr_cur(&node->pcur), 0, &mtr)
+			? ROW_PURGE_DONE : ROW_PURGE_FAIL;
 	} else {
+		dberr_t	err;
 		ut_ad(mode == BTR_MODIFY_TREE);
-		btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
-					   RB_NONE, &mtr);
+		btr_cur_pessimistic_delete(
+			&err, FALSE, btr_pcur_get_btr_cur(&node->pcur), 0,
+			RB_NONE, &mtr);
 
-		if (err == DB_SUCCESS) {
-			success = TRUE;
-		} else if (err == DB_OUT_OF_FILE_SPACE) {
-			success = FALSE;
-		} else {
+		switch (err) {
+		case DB_SUCCESS:
+			break;
+		case DB_OUT_OF_FILE_SPACE:
+			status = ROW_PURGE_FAIL;
+			break;
+		default:
 			ut_error;
 		}
 	}
 
-	btr_pcur_commit_specify_mtr(pcur, &mtr);
+func_exit:
+	if (heap) {
+		mem_heap_free(heap);
+	}
 
-	return(success);
+	btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
+
+	return(status);
 }
 
 /***********************************************************//**
 Removes a clustered index record if it has not been modified after the delete
-marking. */
-static
-void
+marking.
+@retval true if the row was not found, or it was successfully removed
+@retval false the purge needs to be suspended, either because of
+running out of file space or because the row refers to an off-page
+column and an online ALTER TABLE (table rebuild) is in progress. */
+static __attribute__((nonnull, warn_unused_result))
+bool
 row_purge_remove_clust_if_poss(
 /*===========================*/
-	purge_node_t*	node)	/*!< in: row purge node */
+	purge_node_t*	node)	/*!< in/out: row purge node */
 {
-	ibool	success;
-	ulint	n_tries	= 0;
-
-	/*	fputs("Purge: Removing clustered record\n", stderr); */
-
-	success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF);
-	if (success) {
-
-		return;
+	switch (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) {
+	case ROW_PURGE_DONE:
+		return(true);
+	case ROW_PURGE_SUSPEND:
+		return(false);
+	case ROW_PURGE_FAIL:
+		break;
 	}
-retry:
-	success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_TREE);
-	/* The delete operation may fail if we have little
-	file space left: TODO: easiest to crash the database
-	and restart with more file space */
 
-	if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
-		n_tries++;
-
-		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
-
-		goto retry;
+	for (ulint n_tries = 0;
+	     n_tries < BTR_CUR_RETRY_DELETE_N_TIMES;
+	     n_tries++) {
+		switch (row_purge_remove_clust_if_poss_low(
+				node, BTR_MODIFY_TREE)) {
+		case ROW_PURGE_DONE:
+			return(true);
+		case ROW_PURGE_SUSPEND:
+			return(false);
+		case ROW_PURGE_FAIL:
+			os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
+		}
 	}
 
-	ut_a(success);
+	return(false);
 }
 
 /***********************************************************//**
@@ -234,21 +251,21 @@ is newer than the purge view.
 NOTE: This function should only be called by the purge thread, only
 while holding a latch on the leaf page of the secondary index entry
 (or keeping the buffer pool watch on the page).  It is possible that
-this function first returns TRUE and then FALSE, if a user transaction
+this function first returns true and then false, if a user transaction
 inserts a record that the secondary index entry would refer to.
 However, in that case, the user transaction would also re-insert the
 secondary index entry after purge has removed it and released the leaf
 page latch.
-@return	TRUE if the secondary index record can be purged */
+@return	true if the secondary index record can be purged */
 UNIV_INTERN
-ibool
+bool
 row_purge_poss_sec(
 /*===============*/
 	purge_node_t*	node,	/*!< in/out: row purge node */
 	dict_index_t*	index,	/*!< in: secondary index */
 	const dtuple_t*	entry)	/*!< in: secondary index entry */
 {
-	ibool	can_delete;
+	bool	can_delete;
 	mtr_t	mtr;
 
 	ut_ad(!dict_index_is_clust(index));
@@ -268,7 +285,7 @@ row_purge_poss_sec(
 Removes a secondary index entry if possible, by modifying the
 index tree.  Does not try to buffer the delete.
 @return	TRUE if success or if not found */
-static
+static __attribute__((nonnull, warn_unused_result))
 ibool
 row_purge_remove_sec_if_poss_tree(
 /*==============================*/
@@ -279,13 +296,35 @@ row_purge_remove_sec_if_poss_tree(
 	btr_pcur_t		pcur;
 	btr_cur_t*		btr_cur;
 	ibool			success	= TRUE;
-	ulint			err;
+	dberr_t			err;
 	mtr_t			mtr;
 	enum row_search_result	search_result;
 
 	log_free_check();
 	mtr_start(&mtr);
 
+	if (*index->name == TEMP_INDEX_PREFIX) {
+		/* The index->online_status may change if the
+		index->name starts with TEMP_INDEX_PREFIX (meaning
+		that the index is or was being created online). It is
+		protected by index->lock. */
+		mtr_x_lock(dict_index_get_lock(index), &mtr);
+
+		if (dict_index_is_online_ddl(index)) {
+			/* Online secondary index creation will not
+			copy any delete-marked records. Therefore
+			there is nothing to be purged. We must also
+			skip the purge when a completed index is
+			dropped by rollback_inplace_alter_table(). */
+			goto func_exit_no_pcur;
+		}
+	} else {
+		/* For secondary indexes,
+		index->online_status==ONLINE_INDEX_CREATION unless
+		index->name starts with TEMP_INDEX_PREFIX. */
+		ut_ad(!dict_index_is_online_ddl(index));
+	}
+
 	search_result = row_search_index_entry(index, entry, BTR_MODIFY_TREE,
 					       &pcur, &mtr);
 
@@ -327,7 +366,7 @@ row_purge_remove_sec_if_poss_tree(
 		      & rec_get_info_bits(btr_cur_get_rec(btr_cur),
 					  dict_table_is_comp(index->table)));
 
-		btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
 					   RB_NONE, &mtr);
 		switch (UNIV_EXPECT(err, DB_SUCCESS)) {
 		case DB_SUCCESS:
@@ -342,6 +381,7 @@ row_purge_remove_sec_if_poss_tree(
 
 func_exit:
 	btr_pcur_close(&pcur);
+func_exit_no_pcur:
 	mtr_commit(&mtr);
 
 	return(success);
@@ -350,9 +390,10 @@ func_exit:
 /***************************************************************
 Removes a secondary index entry without modifying the index tree,
 if possible.
-@return	TRUE if success or if not found */
-static
-ibool
+@retval	true if success or if not found
+@retval	false if row_purge_remove_sec_if_poss_tree() should be invoked */
+static __attribute__((nonnull, warn_unused_result))
+bool
 row_purge_remove_sec_if_poss_leaf(
 /*==============================*/
 	purge_node_t*	node,	/*!< in: row purge node */
@@ -361,12 +402,40 @@ row_purge_remove_sec_if_poss_leaf(
 {
 	mtr_t			mtr;
 	btr_pcur_t		pcur;
+	ulint			mode;
 	enum row_search_result	search_result;
+	bool			success	= true;
 
 	log_free_check();
 
 	mtr_start(&mtr);
 
+	if (*index->name == TEMP_INDEX_PREFIX) {
+		/* The index->online_status may change if the
+		index->name starts with TEMP_INDEX_PREFIX (meaning
+		that the index is or was being created online). It is
+		protected by index->lock. */
+		mtr_s_lock(dict_index_get_lock(index), &mtr);
+
+		if (dict_index_is_online_ddl(index)) {
+			/* Online secondary index creation will not
+			copy any delete-marked records. Therefore
+			there is nothing to be purged. We must also
+			skip the purge when a completed index is
+			dropped by rollback_inplace_alter_table(). */
+			goto func_exit_no_pcur;
+		}
+
+		mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED | BTR_DELETE;
+	} else {
+		/* For secondary indexes,
+		index->online_status==ONLINE_INDEX_CREATION unless
+		index->name starts with TEMP_INDEX_PREFIX. */
+		ut_ad(!dict_index_is_online_ddl(index));
+
+		mode = BTR_MODIFY_LEAF | BTR_DELETE;
+	}
+
 	/* Set the purge node for the call to row_purge_poss_sec(). */
 	pcur.btr_cur.purge_node = node;
 	/* Set the query thread, so that ibuf_insert_low() will be
@@ -374,10 +443,9 @@ row_purge_remove_sec_if_poss_leaf(
 	pcur.btr_cur.thr = static_cast<que_thr_t*>(que_node_get_parent(node));
 
 	search_result = row_search_index_entry(
-		index, entry, BTR_MODIFY_LEAF | BTR_DELETE, &pcur, &mtr);
+		index, entry, mode, &pcur, &mtr);
 
 	switch (search_result) {
-		ibool	success;
 	case ROW_FOUND:
 		/* Before attempting to purge a record, check
 		if it is safe to do so. */
@@ -390,11 +458,10 @@ row_purge_remove_sec_if_poss_leaf(
 				      btr_cur_get_rec(btr_cur),
 				      dict_table_is_comp(index->table)));
 
-			if (!btr_cur_optimistic_delete(btr_cur, &mtr)) {
+			if (!btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
 
 				/* The index entry could not be deleted. */
-				success = FALSE;
-				goto func_exit;
+				success = false;
 			}
 		}
 		/* fall through (the index entry is still needed,
@@ -405,9 +472,8 @@ row_purge_remove_sec_if_poss_leaf(
 		/* The deletion was buffered. */
 	case ROW_NOT_FOUND:
 		/* The index entry does not exist, nothing to do. */
-		success = TRUE;
-	func_exit:
 		btr_pcur_close(&pcur);
+	func_exit_no_pcur:
 		mtr_commit(&mtr);
 		return(success);
 	}
@@ -418,19 +484,26 @@ row_purge_remove_sec_if_poss_leaf(
 
 /***********************************************************//**
 Removes a secondary index entry if possible. */
-UNIV_INLINE
+UNIV_INLINE __attribute__((nonnull(1,2)))
 void
 row_purge_remove_sec_if_poss(
 /*=========================*/
 	purge_node_t*	node,	/*!< in: row purge node */
 	dict_index_t*	index,	/*!< in: index */
-	dtuple_t*	entry)	/*!< in: index entry */
+	const dtuple_t*	entry)	/*!< in: index entry */
 {
 	ibool	success;
 	ulint	n_tries		= 0;
 
 	/*	fputs("Purge: Removing secondary record\n", stderr); */
 
+	if (!entry) {
+		/* The node->row must have lacked some fields of this
+		index. This is possible when the undo log record was
+		written before this index was created. */
+		return;
+	}
+
 	if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) {
 
 		return;
@@ -454,18 +527,18 @@ retry:
 }
 
 /***********************************************************//**
-Purges a delete marking of a record. */
-static
-void
+Purges a delete marking of a record.
+@retval true if the row was not found, or it was successfully removed
+@retval false the purge needs to be suspended, either because of
+running out of file space or because the row refers to an off-page
+column and an online ALTER TABLE (table rebuild) is in progress. */
+static __attribute__((nonnull, warn_unused_result))
+bool
 row_purge_del_mark(
 /*===============*/
-	purge_node_t*	node)	/*!< in: row purge node */
+	purge_node_t*	node)	/*!< in/out: row purge node */
 {
 	mem_heap_t*	heap;
-	dtuple_t*	entry;
-	dict_index_t*	index;
-
-	ut_ad(node);
 
 	heap = mem_heap_create(1024);
 
@@ -477,13 +550,11 @@ row_purge_del_mark(
 			break;
 		}
 
-		index = node->index;
-
 		if (node->index->type != DICT_FTS) {
-			/* Build the index entry */
-			entry = row_build_index_entry(node->row, NULL, index, heap);
-			ut_a(entry);
-			row_purge_remove_sec_if_poss(node, index, entry);
+			dtuple_t*	entry = row_build_index_entry_low(
+				node->row, NULL, node->index, heap);
+			row_purge_remove_sec_if_poss(node, node->index, entry);
+			mem_heap_empty(heap);
 		}
 
 		node->index = dict_table_get_next_index(node->index);
@@ -491,14 +562,15 @@ row_purge_del_mark(
 
 	mem_heap_free(heap);
 
-	row_purge_remove_clust_if_poss(node);
+	return(row_purge_remove_clust_if_poss(node));
 }
 
 /***********************************************************//**
 Purges an update of an existing record. Also purges an update of a delete
-marked record if that record contained an externally stored field. */
-static
-void
+marked record if that record contained an externally stored field.
+@return true if purged, false if skipped */
+static __attribute__((nonnull, warn_unused_result))
+bool
 row_purge_upd_exist_or_extern_func(
 /*===============================*/
 #ifdef UNIV_DEBUG
@@ -508,16 +580,24 @@ row_purge_upd_exist_or_extern_func(
 	trx_undo_rec_t*	undo_rec)	/*!< in: record to purge */
 {
 	mem_heap_t*	heap;
-	dtuple_t*	entry;
-	dict_index_t*	index;
-	ibool		is_insert;
-	ulint		rseg_id;
-	ulint		page_no;
-	ulint		offset;
-	ulint		i;
-	mtr_t		mtr;
 
-	ut_ad(node);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
+
+	if (dict_index_get_online_status(dict_table_get_first_index(
+						 node->table))
+	    == ONLINE_INDEX_CREATION) {
+		for (ulint i = 0; i < upd_get_n_fields(node->update); i++) {
+
+			const upd_field_t*	ufield
+				= upd_get_nth_field(node->update, i);
+
+			if (dfield_is_ext(&ufield->new_val)) {
+				return(false);
+			}
+		}
+	}
 
 	if (node->rec_type == TRX_UNDO_UPD_DEL_REC
 	    || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
@@ -534,15 +614,13 @@ row_purge_upd_exist_or_extern_func(
 			break;
 		}
 
-		index = node->index;
-
 		if (row_upd_changes_ord_field_binary(node->index, node->update,
 						     thr, NULL, NULL)) {
 			/* Build the older version of the index entry */
-			entry = row_build_index_entry(node->row, NULL,
-						      index, heap);
-			ut_a(entry);
-			row_purge_remove_sec_if_poss(node, index, entry);
+			dtuple_t*	entry = row_build_index_entry_low(
+				node->row, NULL, node->index, heap);
+			row_purge_remove_sec_if_poss(node, node->index, entry);
+			mem_heap_empty(heap);
 		}
 
 		node->index = dict_table_get_next_index(node->index);
@@ -552,7 +630,7 @@ row_purge_upd_exist_or_extern_func(
 
 skip_secondaries:
 	/* Free possible externally stored fields */
-	for (i = 0; i < upd_get_n_fields(node->update); i++) {
+	for (ulint i = 0; i < upd_get_n_fields(node->update); i++) {
 
 		const upd_field_t*	ufield
 			= upd_get_nth_field(node->update, i);
@@ -562,6 +640,12 @@ skip_secondaries:
 			buf_block_t*	block;
 			ulint		internal_offset;
 			byte*		data_field;
+			dict_index_t*	index;
+			ibool		is_insert;
+			ulint		rseg_id;
+			ulint		page_no;
+			ulint		offset;
+			mtr_t		mtr;
 
 			/* We use the fact that new_val points to
 			undo_rec and get thus the offset of
@@ -590,9 +674,17 @@ skip_secondaries:
 			index tree */
 
 			index = dict_table_get_first_index(node->table);
-
 			mtr_x_lock(dict_index_get_lock(index), &mtr);
-
+#ifdef UNIV_DEBUG
+			switch (dict_index_get_online_status(index)) {
+			case ONLINE_INDEX_CREATION:
+			case ONLINE_INDEX_ABORTED_DROPPED:
+				ut_ad(0);
+			case ONLINE_INDEX_COMPLETE:
+			case ONLINE_INDEX_ABORTED:
+				break;
+			}
+#endif /* UNIV_DEBUG */
 			/* NOTE: we must also acquire an X-latch to the
 			root page of the tree. We will need it when we
 			free pages from the tree. If the tree is of height 1,
@@ -622,6 +714,8 @@ skip_secondaries:
 			mtr_commit(&mtr);
 		}
 	}
+
+	return(true);
 }
 
 #ifdef UNIV_DEBUG
@@ -634,14 +728,14 @@ skip_secondaries:
 
 /***********************************************************//**
 Parses the row reference and other info in a modify undo log record.
-@return TRUE if purge operation required */
+@return true if purge operation required */
 static
-ibool
+bool
 row_purge_parse_undo_rec(
 /*=====================*/
 	purge_node_t*		node,		/*!< in: row undo node */
 	trx_undo_rec_t*		undo_rec,	/*!< in: record to purge */
-	ibool*			updated_extern, /*!< out: TRUE if an externally
+	bool*			updated_extern, /*!< out: true if an externally
 						stored field was updated */
 	que_thr_t*		thr)		/*!< in: query thread */
 {
@@ -665,40 +759,29 @@ row_purge_parse_undo_rec(
 
 	if (type == TRX_UNDO_UPD_DEL_REC && !*updated_extern) {
 
-		return(FALSE);
+		return(false);
 	}
 
 	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
 					       &info_bits);
 	node->table = NULL;
 
-	if (type == TRX_UNDO_UPD_EXIST_REC
-	    && node->cmpl_info & UPD_NODE_NO_ORD_CHANGE
-	    && !(*updated_extern)) {
-
-		/* Purge requires no changes to indexes: we may return */
-
-		return(FALSE);
-	}
-
 	/* Prevent DROP TABLE etc. from running when we are doing the purge
 	for this row */
 
-	rw_lock_s_lock_func(&dict_operation_lock, 0, __FILE__, __LINE__);
+	rw_lock_s_lock_inline(&dict_operation_lock, 0, __FILE__, __LINE__);
 
-	node->table = dict_table_open_on_id(table_id, FALSE);
+	node->table = dict_table_open_on_id(table_id, FALSE, FALSE);
 
 	if (node->table == NULL) {
-err_exit:
 		/* The table has been dropped: no need to do purge */
-		rw_lock_s_unlock_gen(&dict_operation_lock, 0);
-		return(FALSE);
+		goto err_exit;
 	}
 
 	if (node->table->ibd_file_missing) {
 		/* We skip purge of missing .ibd files */
 
-		dict_table_close(node->table, FALSE);
+		dict_table_close(node->table, FALSE, FALSE);
 
 		node->table = NULL;
 
@@ -708,12 +791,22 @@ err_exit:
 	clust_index = dict_table_get_first_index(node->table);
 
 	if (clust_index == NULL) {
+		/* The table was corrupt in the data dictionary.
+		dict_set_corrupted() works on an index, and
+		we do not have an index to call it with. */
+close_exit:
+		dict_table_close(node->table, FALSE, FALSE);
+err_exit:
+		rw_lock_s_unlock(&dict_operation_lock);
+		return(false);
+	}
 
-		dict_table_close(node->table, FALSE);
-
-		/* The table was corrupt in the data dictionary */
+	if (type == TRX_UNDO_UPD_EXIST_REC
+	    && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)
+	    && !*updated_extern) {
 
-		goto err_exit;
+		/* Purge requires no changes to indexes: we may return */
+		goto close_exit;
 	}
 
 	ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
@@ -734,13 +827,14 @@ err_exit:
 			node->heap);
 	}
 
-	return(TRUE);
+	return(true);
 }
 
 /***********************************************************//**
-Purges the parsed record. */
-static
-void
+Purges the parsed record.
+@return true if purged, false if skipped */
+static __attribute__((nonnull, warn_unused_result))
+bool
 row_purge_record_func(
 /*==================*/
 	purge_node_t*	node,		/*!< in: row purge node */
@@ -748,10 +842,11 @@ row_purge_record_func(
 #ifdef UNIV_DEBUG
 	const que_thr_t*thr,		/*!< in: query thread */
 #endif /* UNIV_DEBUG */
-	ibool		updated_extern)	/*!< in: TRUE if external columns
+	bool		updated_extern)	/*!< in: whether external columns
 					were updated */
 {
 	dict_index_t*	clust_index;
+	bool		purged		= true;
 
 	clust_index = dict_table_get_first_index(node->table);
 
@@ -759,7 +854,10 @@ row_purge_record_func(
 
 	switch (node->rec_type) {
 	case TRX_UNDO_DEL_MARK_REC:
-		row_purge_del_mark(node);
+		purged = row_purge_del_mark(node);
+		if (!purged) {
+			break;
+		}
 		MONITOR_INC(MONITOR_N_DEL_ROW_PURGE);
 		break;
 	default:
@@ -768,20 +866,25 @@ row_purge_record_func(
 		}
 		/* fall through */
 	case TRX_UNDO_UPD_EXIST_REC:
-		row_purge_upd_exist_or_extern(thr, node, undo_rec);
+		purged = row_purge_upd_exist_or_extern(thr, node, undo_rec);
+		if (!purged) {
+			break;
+		}
 		MONITOR_INC(MONITOR_N_UPD_EXIST_EXTERN);
 		break;
 	}
 
 	if (node->found_clust) {
 		btr_pcur_close(&node->pcur);
+		node->found_clust = FALSE;
 	}
 
 	if (node->table != NULL) {
-		dict_table_close(node->table, FALSE);
+		dict_table_close(node->table, FALSE, FALSE);
 		node->table = NULL;
 	}
 
+	return(purged);
 }
 
 #ifdef UNIV_DEBUG
@@ -804,18 +907,24 @@ row_purge(
 	trx_undo_rec_t*	undo_rec,	/*!< in: record to purge */
 	que_thr_t*	thr)		/*!< in: query thread */
 {
-	ut_ad(node);
-	ut_ad(thr);
-
 	if (undo_rec != &trx_purge_dummy_rec) {
-		ibool	updated_extern;
+		bool	updated_extern;
 
-		if (row_purge_parse_undo_rec(
-			node, undo_rec, &updated_extern, thr)) {
+		while (row_purge_parse_undo_rec(
+			       node, undo_rec, &updated_extern, thr)) {
 
-			row_purge_record(node, undo_rec, thr, updated_extern);
+			bool purged = row_purge_record(
+				node, undo_rec, thr, updated_extern);
+
+			rw_lock_s_unlock(&dict_operation_lock);
+
+			if (purged
+			    || srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+				return;
+			}
 
-			rw_lock_s_unlock_gen(&dict_operation_lock, 0);
+			/* Retry the purge in a second. */
+			os_thread_sleep(1000000);
 		}
 	}
 }
diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc
new file mode 100644
index 00000000000..72e0bf43d77
--- /dev/null
+++ b/storage/innobase/row/row0quiesce.cc
@@ -0,0 +1,702 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0quiesce.cc
+Quiesce a tablespace.
+
+Created 2012-02-08 by Sunny Bains.
+*******************************************************/
+
+#include "row0quiesce.h"
+#include "row0mysql.h"
+
+#ifdef UNIV_NONINL
+#include "row0quiesce.ic"
+#endif
+
+#include "ibuf0ibuf.h"
+#include "srv0start.h"
+#include "trx0purge.h"
+
+/*********************************************************************//**
+Write the meta data (index user fields) config file.
+@return DB_SUCCESS or error code. */
+static	__attribute__((nonnull, warn_unused_result))
+dberr_t
+row_quiesce_write_index_fields(
+/*===========================*/
+	const dict_index_t*	index,	/*!< in: write the meta data for
+					this index */
+	FILE*			file,	/*!< in: file to write to */
+	THD*			thd)	/*!< in/out: session */
+{
+	byte			row[sizeof(ib_uint32_t) * 2];
+
+	for (ulint i = 0; i < index->n_fields; ++i) {
+		byte*			ptr = row;
+		const dict_field_t*	field = &index->fields[i];
+
+		mach_write_to_4(ptr, field->prefix_len);
+		ptr += sizeof(ib_uint32_t);
+
+		mach_write_to_4(ptr, field->fixed_len);
+
+		DBUG_EXECUTE_IF("ib_export_io_write_failure_9",
+				close(fileno(file)););
+
+		if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+				errno, strerror(errno),
+				"while writing index fields.");
+
+			return(DB_IO_ERROR);
+		}
+
+		/* Include the NUL byte in the length. */
+		ib_uint32_t	len = strlen(field->name) + 1;
+		ut_a(len > 1);
+
+		mach_write_to_4(row, len);
+
+		DBUG_EXECUTE_IF("ib_export_io_write_failure_10",
+				close(fileno(file)););
+
+		if (fwrite(row, 1,  sizeof(len), file) != sizeof(len)
+		    || fwrite(field->name, 1, len, file) != len) {
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+				errno, strerror(errno),
+				"while writing index column.");
+
+			return(DB_IO_ERROR);
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Write the meta data config file index information.
+@return DB_SUCCESS or error code. */
+static	__attribute__((nonnull, warn_unused_result))
+dberr_t
+row_quiesce_write_indexes(
+/*======================*/
+	const dict_table_t*	table,	/*!< in: write the meta data for
+					this table */
+	FILE*			file,	/*!< in: file to write to */
+	THD*			thd)	/*!< in/out: session */
+{
+	{
+		byte		row[sizeof(ib_uint32_t)];
+
+		/* Write the number of indexes in the table. */
+		mach_write_to_4(row, UT_LIST_GET_LEN(table->indexes));
+
+		DBUG_EXECUTE_IF("ib_export_io_write_failure_11",
+				close(fileno(file)););
+
+		if (fwrite(row, 1,  sizeof(row), file) != sizeof(row)) {
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+				errno, strerror(errno),
+				"while writing index count.");
+
+			return(DB_IO_ERROR);
+		}
+	}
+
+	dberr_t			err = DB_SUCCESS;
+
+	/* Write the index meta data. */
+	for (const dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+	     index != 0 && err == DB_SUCCESS;
+	     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+		byte*		ptr;
+		byte		row[sizeof(index_id_t)
+				    + sizeof(ib_uint32_t) * 8];
+
+		ptr = row;
+
+		ut_ad(sizeof(index_id_t) == 8);
+		mach_write_to_8(ptr, index->id);
+		ptr += sizeof(index_id_t);
+
+		mach_write_to_4(ptr, index->space);
+		ptr += sizeof(ib_uint32_t);
+
+		mach_write_to_4(ptr, index->page);
+		ptr += sizeof(ib_uint32_t);
+
+		mach_write_to_4(ptr, index->type);
+		ptr += sizeof(ib_uint32_t);
+
+		mach_write_to_4(ptr, index->trx_id_offset);
+		ptr += sizeof(ib_uint32_t);
+
+		mach_write_to_4(ptr, index->n_user_defined_cols);
+		ptr += sizeof(ib_uint32_t);
+
+		mach_write_to_4(ptr, index->n_uniq);
+		ptr += sizeof(ib_uint32_t);
+
+		mach_write_to_4(ptr, index->n_nullable);
+		ptr += sizeof(ib_uint32_t);
+
+		mach_write_to_4(ptr, index->n_fields);
+
+		DBUG_EXECUTE_IF("ib_export_io_write_failure_12",
+				close(fileno(file)););
+
+		if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+				errno, strerror(errno),
+				"while writing index meta-data.");
+
+			return(DB_IO_ERROR);
+		}
+
+		/* Write the length of the index name.
+		NUL byte is included in the length. */
+		ib_uint32_t	len = strlen(index->name) + 1;
+		ut_a(len > 1);
+
+		mach_write_to_4(row, len);
+
+		DBUG_EXECUTE_IF("ib_export_io_write_failure_1",
+				close(fileno(file)););
+
+		if (fwrite(row, 1, sizeof(len), file) != sizeof(len)
+		    || fwrite(index->name, 1, len, file) != len) {
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+				errno, strerror(errno),
+				"while writing index name.");
+
+			return(DB_IO_ERROR);
+		}
+
+		err = row_quiesce_write_index_fields(index, file, thd);
+	}
+
+	return(err);
+}
+
+/*********************************************************************//**
+Write the meta data (table columns) config file. Serialise the contents of
+dict_col_t structure, along with the column name. All fields are serialized
+as ib_uint32_t.
+@return DB_SUCCESS or error code. */
+static	__attribute__((nonnull, warn_unused_result))
+dberr_t
+row_quiesce_write_table(
+/*====================*/
+	const dict_table_t*	table,	/*!< in: write the meta data for
+					this table */
+	FILE*			file,	/*!< in: file to write to */
+	THD*			thd)	/*!< in/out: session */
+{
+	dict_col_t*		col;
+	byte			row[sizeof(ib_uint32_t) * 7];
+
+	col = table->cols;
+
+	for (ulint i = 0; i < table->n_cols; ++i, ++col) {
+		byte*		ptr = row;
+
+		mach_write_to_4(ptr, col->prtype);
+		ptr += sizeof(ib_uint32_t);
+
+		mach_write_to_4(ptr, col->mtype);
+		ptr += sizeof(ib_uint32_t);
+
+		mach_write_to_4(ptr, col->len);
+		ptr += sizeof(ib_uint32_t);
+
+		mach_write_to_4(ptr, col->mbminmaxlen);
+		ptr += sizeof(ib_uint32_t);
+
+		mach_write_to_4(ptr, col->ind);
+		ptr += sizeof(ib_uint32_t);
+
+		mach_write_to_4(ptr, col->ord_part);
+		ptr += sizeof(ib_uint32_t);
+
+		mach_write_to_4(ptr, col->max_prefix);
+
+		DBUG_EXECUTE_IF("ib_export_io_write_failure_2",
+				close(fileno(file)););
+
+		if (fwrite(row, 1,  sizeof(row), file) != sizeof(row)) {
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+				errno, strerror(errno),
+				"while writing table column data.");
+
+			return(DB_IO_ERROR);
+		}
+
+		/* Write out the column name as [len, byte array]. The len
+		includes the NUL byte. */
+		ib_uint32_t	len;
+		const char*	col_name;
+
+		col_name = dict_table_get_col_name(table, dict_col_get_no(col));
+
+		/* Include the NUL byte in the length. */
+		len = strlen(col_name) + 1;
+		ut_a(len > 1);
+
+		mach_write_to_4(row, len);
+
+		DBUG_EXECUTE_IF("ib_export_io_write_failure_3",
+				close(fileno(file)););
+
+		if (fwrite(row, 1,  sizeof(len), file) != sizeof(len)
+		    || fwrite(col_name, 1, len, file) != len) {
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+				errno, strerror(errno),
+				"while writing column name.");
+
+			return(DB_IO_ERROR);
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Write the meta data config file header.
+@return DB_SUCCESS or error code. */
+static	__attribute__((nonnull, warn_unused_result))
+dberr_t
+row_quiesce_write_header(
+/*=====================*/
+	const dict_table_t*	table,	/*!< in: write the meta data for
+					this table */
+	FILE*			file,	/*!< in: file to write to */
+	THD*			thd)	/*!< in/out: session */
+{
+	byte			value[sizeof(ib_uint32_t)];
+
+	/* Write the meta-data version number. */
+	mach_write_to_4(value, IB_EXPORT_CFG_VERSION_V1);
+
+	DBUG_EXECUTE_IF("ib_export_io_write_failure_4", close(fileno(file)););
+
+	if (fwrite(&value, 1,  sizeof(value), file) != sizeof(value)) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+			errno, strerror(errno),
+			"while writing meta-data version number.");
+
+		return(DB_IO_ERROR);
+	}
+
+	/* Write the server hostname. */
+	ib_uint32_t		len;
+	const char*		hostname = server_get_hostname();
+
+	/* Play it safe and check for NULL. */
+	if (hostname == 0) {
+		static const char	NullHostname[] = "Hostname unknown";
+
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Unable to determine server hostname.");
+
+		hostname = NullHostname;
+	}
+
+	/* The server hostname includes the NUL byte. */
+	len = strlen(hostname) + 1;
+	mach_write_to_4(value, len);
+
+	DBUG_EXECUTE_IF("ib_export_io_write_failure_5", close(fileno(file)););
+
+	if (fwrite(&value, 1,  sizeof(value), file) != sizeof(value)
+	    || fwrite(hostname, 1,  len, file) != len) {
+
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+			errno, strerror(errno),
+			"while writing hostname.");
+
+		return(DB_IO_ERROR);
+	}
+
+	/* The table name includes the NUL byte. */
+	ut_a(table->name != 0);
+	len = strlen(table->name) + 1;
+
+	/* Write the table name. */
+	mach_write_to_4(value, len);
+
+	DBUG_EXECUTE_IF("ib_export_io_write_failure_6", close(fileno(file)););
+
+	if (fwrite(&value, 1,  sizeof(value), file) != sizeof(value)
+	    || fwrite(table->name, 1,  len, file) != len) {
+
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+			errno, strerror(errno),
+			"while writing table name.");
+
+		return(DB_IO_ERROR);
+	}
+
+	byte		row[sizeof(ib_uint32_t) * 3];
+
+	/* Write the next autoinc value. */
+	mach_write_to_8(row, table->autoinc);
+
+	DBUG_EXECUTE_IF("ib_export_io_write_failure_7", close(fileno(file)););
+
+	if (fwrite(row, 1,  sizeof(ib_uint64_t), file) != sizeof(ib_uint64_t)) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+			errno, strerror(errno),
+			"while writing table autoinc value.");
+
+		return(DB_IO_ERROR);
+	}
+
+	byte*		ptr = row;
+
+	/* Write the system page size. */
+	mach_write_to_4(ptr, UNIV_PAGE_SIZE);
+	ptr += sizeof(ib_uint32_t);
+
+	/* Write the table->flags. */
+	mach_write_to_4(ptr, table->flags);
+	ptr += sizeof(ib_uint32_t);
+
+	/* Write the number of columns in the table. */
+	mach_write_to_4(ptr, table->n_cols);
+
+	DBUG_EXECUTE_IF("ib_export_io_write_failure_8", close(fileno(file)););
+
+	if (fwrite(row, 1,  sizeof(row), file) != sizeof(row)) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+			errno, strerror(errno),
+			"while writing table meta-data.");
+
+		return(DB_IO_ERROR);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Write the table meta data after quiesce.
+@return DB_SUCCESS or error code */
+static	__attribute__((nonnull, warn_unused_result))
+dberr_t
+row_quiesce_write_cfg(
+/*==================*/
+	dict_table_t*	table,	/*!< in: write the meta data for
+					this table */
+	THD*			thd)	/*!< in/out: session */
+{
+	dberr_t			err;
+	char			name[OS_FILE_MAX_PATH];
+
+	srv_get_meta_data_filename(table, name, sizeof(name));
+
+	ib_logf(IB_LOG_LEVEL_INFO, "Writing table metadata to '%s'", name);
+
+	FILE*	file = fopen(name, "w+b");
+
+	if (file == NULL) {
+		ib_errf(thd, IB_LOG_LEVEL_WARN, ER_CANT_CREATE_FILE,
+			 name, errno, strerror(errno));
+
+		err = DB_IO_ERROR;
+	} else {
+		err = row_quiesce_write_header(table, file, thd);
+
+		if (err == DB_SUCCESS) {
+			err = row_quiesce_write_table(table, file, thd);
+		}
+
+		if (err == DB_SUCCESS) {
+			err = row_quiesce_write_indexes(table, file, thd);
+		}
+
+		if (fflush(file) != 0) {
+
+			char	msg[BUFSIZ];
+
+			ut_snprintf(msg, sizeof(msg), "%s flush() failed",
+				    name);
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+				errno, strerror(errno), msg);
+		}
+
+		if (fclose(file) != 0) {
+			char	msg[BUFSIZ];
+
+			ut_snprintf(msg, sizeof(msg), "%s flose() failed",
+				    name);
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+				errno, strerror(errno), msg);
+		}
+	}
+
+	return(err);
+}
+
+/*********************************************************************//**
+Check whether a table has an FTS index defined on it.
+@return true if an FTS index exists on the table */
+static
+bool
+row_quiesce_table_has_fts_index(
+/*============================*/
+	const dict_table_t*	table)	/*!< in: quiesce this table */
+{
+	bool			exists = false;
+
+	dict_mutex_enter_for_mysql();
+
+	for (const dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+	     index != 0;
+	     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+		if (index->type & DICT_FTS) {
+			exists = true;
+			break;
+		}
+	}
+
+	dict_mutex_exit_for_mysql();
+
+	return(exists);
+}
+
+/*********************************************************************//**
+Quiesce the tablespace that the table resides in. */
+UNIV_INTERN
+void
+row_quiesce_table_start(
+/*====================*/
+	dict_table_t*	table,		/*!< in: quiesce this table */
+	trx_t*		trx)		/*!< in/out: transaction/session */
+{
+	ut_a(trx->mysql_thd != 0);
+	ut_a(srv_n_purge_threads > 0);
+	ut_ad(!srv_read_only_mode);
+
+	char		table_name[MAX_FULL_NAME_LEN + 1];
+
+	ut_a(trx->mysql_thd != 0);
+
+	innobase_format_name(
+		table_name, sizeof(table_name), table->name, FALSE);
+
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Sync to disk of '%s' started.", table_name);
+
+	if (trx_purge_state() != PURGE_STATE_DISABLED) {
+		trx_purge_stop();
+	}
+
+	ut_a(table->id > 0);
+
+	ulint	count = 0;
+
+	while (ibuf_contract_in_background(table->id, TRUE) != 0) {
+		if (!(++count % 20)) {
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Merging change buffer entries for '%s'",
+				table_name);
+		}
+	}
+
+	if (!trx_is_interrupted(trx)) {
+		buf_LRU_flush_or_remove_pages(
+			table->space, BUF_REMOVE_FLUSH_WRITE, trx);
+
+		if (trx_is_interrupted(trx)) {
+
+			ib_logf(IB_LOG_LEVEL_WARN, "Quiesce aborted!");
+
+		} else if (row_quiesce_write_cfg(table, trx->mysql_thd)
+			   != DB_SUCCESS) {
+
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"There was an error writing to the "
+				"meta data file");
+		} else {
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Table '%s' flushed to disk", table_name);
+		}
+	} else {
+		ib_logf(IB_LOG_LEVEL_WARN, "Quiesce aborted!");
+	}
+
+	dberr_t	err = row_quiesce_set_state(table, QUIESCE_COMPLETE, trx);
+	ut_a(err == DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Cleanup after table quiesce. */
+UNIV_INTERN
+void
+row_quiesce_table_complete(
+/*=======================*/
+	dict_table_t*	table,		/*!< in: quiesce this table */
+	trx_t*		trx)		/*!< in/out: transaction/session */
+{
+	ulint		count = 0;
+	char		table_name[MAX_FULL_NAME_LEN + 1];
+
+	ut_a(trx->mysql_thd != 0);
+
+	innobase_format_name(
+		table_name, sizeof(table_name), table->name, FALSE);
+
+	/* We need to wait for the operation to complete if the
+	transaction has been killed. */
+
+	while (table->quiesce != QUIESCE_COMPLETE) {
+
+		/* Print a warning after every minute. */
+		if (!(count % 60)) {
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Waiting for quiesce of '%s' to complete",
+				table_name);
+		}
+
+		/* Sleep for a second. */
+		os_thread_sleep(1000000);
+
+		++count;
+	}
+
+	/* Remove the .cfg file now that the user has resumed
+	normal operations. Otherwise it will cause problems when
+	the user tries to drop the database (remove directory). */
+	char		cfg_name[OS_FILE_MAX_PATH];
+
+	srv_get_meta_data_filename(table, cfg_name, sizeof(cfg_name));
+
+	os_file_delete_if_exists(cfg_name);
+
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Deleting the meta-data file '%s'", cfg_name);
+
+	if (trx_purge_state() != PURGE_STATE_DISABLED) {
+		trx_purge_run();
+	}
+
+	dberr_t	err = row_quiesce_set_state(table, QUIESCE_NONE, trx);
+	ut_a(err == DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Set a table's quiesce state.
+@return DB_SUCCESS or error code. */
+UNIV_INTERN
+dberr_t
+row_quiesce_set_state(
+/*==================*/
+	dict_table_t*	table,		/*!< in: quiesce this table */
+	ib_quiesce_t	state,		/*!< in: quiesce state to set */
+	trx_t*		trx)		/*!< in/out: transaction */
+{
+	ut_a(srv_n_purge_threads > 0);
+
+	if (srv_read_only_mode) {
+
+		ib_senderrf(trx->mysql_thd,
+			    IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+
+		return(DB_UNSUPPORTED);
+
+	} else if (table->space == TRX_SYS_SPACE) {
+
+		char	table_name[MAX_FULL_NAME_LEN + 1];
+
+		innobase_format_name(
+			table_name, sizeof(table_name), table->name, FALSE);
+
+		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
+			    ER_TABLE_IN_SYSTEM_TABLESPACE, table_name);
+
+		return(DB_UNSUPPORTED);
+	} else if (row_quiesce_table_has_fts_index(table)) {
+
+		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
+			    ER_NOT_SUPPORTED_YET,
+			    "FLUSH TABLES on tables that have an FTS index. "
+			    "FTS auxiliary tables will not be flushed.");
+
+	} else if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
+		/* If this flag is set then the table may not have any active
+		FTS indexes but it will still have the auxiliary tables. */
+
+		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
+			    ER_NOT_SUPPORTED_YET,
+			    "FLUSH TABLES on a table that had an FTS index, "
+			    "created on a hidden column, the "
+			    "auxiliary tables haven't been dropped as yet. "
+			    "FTS auxiliary tables will not be flushed.");
+	}
+
+	row_mysql_lock_data_dictionary(trx);
+
+	dict_table_x_lock_indexes(table);
+
+	switch (state) {
+	case QUIESCE_START:
+		ut_a(table->quiesce == QUIESCE_NONE);
+		break;
+
+	case QUIESCE_COMPLETE:
+		ut_a(table->quiesce == QUIESCE_START);
+		break;
+
+	case QUIESCE_NONE:
+		ut_a(table->quiesce == QUIESCE_COMPLETE);
+		break;
+	}
+
+	table->quiesce = state;
+
+	dict_table_x_unlock_indexes(table);
+
+	row_mysql_unlock_data_dictionary(trx);
+
+	return(DB_SUCCESS);
+}
+
diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc
index 8c703b1e06c..be786f954fb 100644
--- a/storage/innobase/row/row0row.cc
+++ b/storage/innobase/row/row0row.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -50,28 +50,26 @@ Created 4/20/1996 Heikki Tuuri
 /*****************************************************************//**
 When an insert or purge to a table is performed, this function builds
 the entry to be inserted into or purged from an index on the table.
-@return index entry which should be inserted or purged, or NULL if the
-externally stored columns in the clustered index record are
-unavailable and ext != NULL */
+@return index entry which should be inserted or purged
+@retval NULL if the externally stored columns in the clustered index record
+are unavailable and ext != NULL, or row is missing some needed columns. */
 UNIV_INTERN
 dtuple_t*
-row_build_index_entry(
-/*==================*/
-	const dtuple_t*	row,	/*!< in: row which should be
-				inserted or purged */
-	row_ext_t*	ext,	/*!< in: externally stored column prefixes,
-				or NULL */
-	dict_index_t*	index,	/*!< in: index on the table */
-	mem_heap_t*	heap)	/*!< in: memory heap from which the memory for
-				the index entry is allocated */
+row_build_index_entry_low(
+/*======================*/
+	const dtuple_t*		row,	/*!< in: row which should be
+					inserted or purged */
+	const row_ext_t*	ext,	/*!< in: externally stored column
+					prefixes, or NULL */
+	dict_index_t*		index,	/*!< in: index on the table */
+	mem_heap_t*		heap)	/*!< in: memory heap from which
+					the memory for the index entry
+					is allocated */
 {
 	dtuple_t*	entry;
 	ulint		entry_len;
 	ulint		i;
 
-	ut_ad(row && index && heap);
-	ut_ad(dtuple_check_typed(row));
-
 	entry_len = dict_index_get_n_fields(index);
 	entry = dtuple_create(heap, entry_len);
 
@@ -96,8 +94,19 @@ row_build_index_entry(
 			= dtuple_get_nth_field(entry, i);
 		const dfield_t*		dfield2
 			= dtuple_get_nth_field(row, col_no);
-		ulint			len
-			= dfield_get_len(dfield2);
+		ulint			len;
+
+#if DATA_MISSING != 0
+# error "DATA_MISSING != 0"
+#endif
+		if (UNIV_UNLIKELY(dfield_get_type(dfield2)->mtype
+				  == DATA_MISSING)) {
+			/* The field has not been initialized in the row.
+			This should be from trx_undo_rec_get_partial_row(). */
+			return(NULL);
+		}
+
+		len = dfield_get_len(dfield2);
 
 		dfield_copy(dfield, dfield2);
 
@@ -171,8 +180,6 @@ row_build_index_entry(
 		}
 	}
 
-	ut_ad(dtuple_check_typed(entry));
-
 	return(entry);
 }
 
@@ -211,21 +218,23 @@ row_build(
 					of an index, or NULL if
 					index->table should be
 					consulted instead */
+	const dtuple_t*		add_cols,
+					/*!< in: default values of
+					added columns, or NULL */
+	const ulint*		col_map,/*!< in: mapping of old column
+					numbers to new ones, or NULL */
 	row_ext_t**		ext,	/*!< out, own: cache of
 					externally stored column
 					prefixes, or NULL */
 	mem_heap_t*		heap)	/*!< in: memory heap from which
 					the memory needed is allocated */
 {
+	const byte*		copy;
 	dtuple_t*		row;
-	const dict_table_t*	table;
-	ulint			n_fields;
 	ulint			n_ext_cols;
 	ulint*			ext_cols	= NULL; /* remove warning */
 	ulint			len;
-	ulint			row_len;
 	byte*			buf;
-	ulint			i;
 	ulint			j;
 	mem_heap_t*		tmp_heap	= NULL;
 	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
@@ -234,6 +243,7 @@ row_build(
 	ut_ad(index && rec && heap);
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(!mutex_own(&trx_sys->mutex));
+	ut_ad(!col_map || col_table);
 
 	if (!offsets) {
 		offsets = rec_get_offsets(rec, index, offsets_,
@@ -260,55 +270,84 @@ row_build(
 		buf = static_cast<byte*>(
 			mem_heap_alloc(heap, rec_offs_size(offsets)));
 
-		rec = rec_copy(buf, rec, offsets);
-		/* Avoid a debug assertion in rec_offs_validate(). */
-		rec_offs_make_valid(rec, index, (ulint*) offsets);
+		copy = rec_copy(buf, rec, offsets);
+	} else {
+		copy = rec;
 	}
 
-	table = index->table;
-	row_len = dict_table_get_n_cols(table);
-
-	row = dtuple_create(heap, row_len);
-
-	dict_table_copy_types(row, table);
-
-	dtuple_set_info_bits(row, rec_get_info_bits(
-				     rec, dict_table_is_comp(table)));
-
-	n_fields = rec_offs_n_fields(offsets);
 	n_ext_cols = rec_offs_n_extern(offsets);
 	if (n_ext_cols) {
 		ext_cols = static_cast<ulint*>(
 			mem_heap_alloc(heap, n_ext_cols * sizeof *ext_cols));
 	}
 
-	for (i = j = 0; i < n_fields; i++) {
-		dict_field_t*		ind_field
+	/* Avoid a debug assertion in rec_offs_validate(). */
+	rec_offs_make_valid(copy, index, const_cast<ulint*>(offsets));
+
+	if (!col_table) {
+		ut_ad(!col_map);
+		ut_ad(!add_cols);
+		col_table = index->table;
+	}
+
+	if (add_cols) {
+		ut_ad(col_map);
+		row = dtuple_copy(add_cols, heap);
+		/* dict_table_copy_types() would set the fields to NULL */
+		for (ulint i = 0; i < dict_table_get_n_cols(col_table); i++) {
+			dict_col_copy_type(
+				dict_table_get_nth_col(col_table, i),
+				dfield_get_type(dtuple_get_nth_field(row, i)));
+		}
+	} else {
+		row = dtuple_create(heap, dict_table_get_n_cols(col_table));
+		dict_table_copy_types(row, col_table);
+	}
+
+	dtuple_set_info_bits(row, rec_get_info_bits(
+				     copy, rec_offs_comp(offsets)));
+
+	j = 0;
+
+	for (ulint i = 0; i < rec_offs_n_fields(offsets); i++) {
+		const dict_field_t*	ind_field
 			= dict_index_get_nth_field(index, i);
+
+		if (ind_field->prefix_len) {
+			/* Column prefixes can only occur in key
+			fields, which cannot be stored externally. For
+			a column prefix, there should also be the full
+			field in the clustered index tuple. The row
+			tuple comprises full fields, not prefixes. */
+			ut_ad(!rec_offs_nth_extern(offsets, i));
+			continue;
+		}
+
 		const dict_col_t*	col
 			= dict_field_get_col(ind_field);
 		ulint			col_no
 			= dict_col_get_no(col);
-		dfield_t*		dfield
-			= dtuple_get_nth_field(row, col_no);
-
-		if (ind_field->prefix_len == 0) {
 
-			const byte*	field = rec_get_nth_field(
-				rec, offsets, i, &len);
+		if (col_map) {
+			col_no = col_map[col_no];
 
-			dfield_set_data(dfield, field, len);
+			if (col_no == ULINT_UNDEFINED) {
+				/* dropped column */
+				continue;
+			}
 		}
 
+		dfield_t*	dfield = dtuple_get_nth_field(row, col_no);
+
+		const byte*	field = rec_get_nth_field(
+			copy, offsets, i, &len);
+
+		dfield_set_data(dfield, field, len);
+
 		if (rec_offs_nth_extern(offsets, i)) {
 			dfield_set_ext(dfield);
 
-			if (UNIV_LIKELY_NULL(col_table)) {
-				ut_a(col_no
-				     < dict_table_get_n_cols(col_table));
-				col = dict_table_get_nth_col(
-					col_table, col_no);
-			}
+			col = dict_table_get_nth_col(col_table, col_no);
 
 			if (col->ord_part) {
 				/* We will have to fetch prefixes of
@@ -319,14 +358,20 @@ row_build(
 		}
 	}
 
+	rec_offs_make_valid(rec, index, const_cast<ulint*>(offsets));
+
 	ut_ad(dtuple_check_typed(row));
 
 	if (!ext) {
 		/* REDUNDANT and COMPACT formats store a local
 		768-byte prefix of each externally stored
-		column. No cache is needed. */
-		ut_ad(dict_table_get_format(index->table)
-		      < UNIV_FORMAT_B);
+		column. No cache is needed.
+
+		During online table rebuild,
+		row_log_table_apply_delete_low()
+		may use a cache that was set up by
+		row_log_table_delete(). */
+
 	} else if (j) {
 		*ext = row_ext_create(j, ext_cols, index->table->flags, row,
 				      heap);
@@ -402,28 +447,14 @@ row_rec_to_index_entry_low(
 /*******************************************************************//**
 Converts an index record to a typed data tuple. NOTE that externally
 stored (often big) fields are NOT copied to heap.
-@return	own: index entry built; see the NOTE below! */
+@return	own: index entry built */
 UNIV_INTERN
 dtuple_t*
 row_rec_to_index_entry(
 /*===================*/
-	ulint			type,	/*!< in: ROW_COPY_DATA, or
-					ROW_COPY_POINTERS: the former
-					copies also the data fields to
-					heap as the latter only places
-					pointers to data fields on the
-					index page */
-	const rec_t*		rec,	/*!< in: record in the index;
-					NOTE: in the case
-					ROW_COPY_POINTERS the data
-					fields in the row will point
-					directly into this record,
-					therefore, the buffer page of
-					this record must be at least
-					s-latched and the latch held
-					as long as the dtuple is used! */
+	const rec_t*		rec,	/*!< in: record in the index */
 	const dict_index_t*	index,	/*!< in: index */
-	ulint*			offsets,/*!< in/out: rec_get_offsets(rec) */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec) */
 	ulint*			n_ext,	/*!< out: number of externally
 					stored columns */
 	mem_heap_t*		heap)	/*!< in: memory heap from which
@@ -431,25 +462,21 @@ row_rec_to_index_entry(
 {
 	dtuple_t*	entry;
 	byte*		buf;
+	const rec_t*	copy_rec;
 
 	ut_ad(rec && heap && index);
 	ut_ad(rec_offs_validate(rec, index, offsets));
 
-	if (type == ROW_COPY_DATA) {
-		/* Take a copy of rec to heap */
-		buf = static_cast<byte*>(
-			mem_heap_alloc(heap, rec_offs_size(offsets)));
+	/* Take a copy of rec to heap */
+	buf = static_cast<byte*>(
+		mem_heap_alloc(heap, rec_offs_size(offsets)));
 
-		rec = rec_copy(buf, rec, offsets);
-		/* Avoid a debug assertion in rec_offs_validate(). */
-		rec_offs_make_valid(rec, index, offsets);
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
-	} else {
-		ut_a(!rec_offs_any_null_extern(rec, offsets));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-	}
+	copy_rec = rec_copy(buf, rec, offsets);
 
-	entry = row_rec_to_index_entry_low(rec, index, offsets, n_ext, heap);
+	rec_offs_make_valid(copy_rec, index, const_cast<ulint*>(offsets));
+	entry = row_rec_to_index_entry_low(
+		copy_rec, index, offsets, n_ext, heap);
+	rec_offs_make_valid(rec, index, const_cast<ulint*>(offsets));
 
 	dtuple_set_info_bits(entry,
 			     rec_get_info_bits(rec, rec_offs_comp(offsets)));
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index 96884e89511..bfda669d97a 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -57,7 +57,6 @@ Created 12/19/1997 Heikki Tuuri
 #include "read0read.h"
 #include "buf0lru.h"
 #include "ha_prototypes.h"
-#include "srv0mon.h"
 
 #include "my_compare.h" /* enum icp_result */
 
@@ -673,8 +672,8 @@ sel_enqueue_prefetched_row(
 /*********************************************************************//**
 Builds a previous version of a clustered index record for a consistent read
 @return	DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_sel_build_prev_vers(
 /*====================*/
 	read_view_t*	read_view,	/*!< in: read view */
@@ -691,7 +690,7 @@ row_sel_build_prev_vers(
 					afterwards */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
-	ulint	err;
+	dberr_t	err;
 
 	if (*old_vers_heap) {
 		mem_heap_empty(*old_vers_heap);
@@ -707,10 +706,9 @@ row_sel_build_prev_vers(
 
 /*********************************************************************//**
 Builds the last committed version of a clustered index record for a
-semi-consistent read.
-@return	DB_SUCCESS or error code */
-static
-ulint
+semi-consistent read. */
+static __attribute__((nonnull))
+void
 row_sel_build_committed_vers_for_mysql(
 /*===================================*/
 	dict_index_t*	clust_index,	/*!< in: clustered index */
@@ -726,18 +724,16 @@ row_sel_build_committed_vers_for_mysql(
 					afterwards */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
-	ulint	err;
-
 	if (prebuilt->old_vers_heap) {
 		mem_heap_empty(prebuilt->old_vers_heap);
 	} else {
-		prebuilt->old_vers_heap = mem_heap_create(200);
+		prebuilt->old_vers_heap = mem_heap_create(
+			rec_offs_size(*offsets));
 	}
 
-	err = row_vers_build_for_semi_consistent_read(
+	row_vers_build_for_semi_consistent_read(
 		rec, mtr, clust_index, offsets, offset_heap,
 		prebuilt->old_vers_heap, old_vers);
-	return(err);
 }
 
 /*********************************************************************//**
@@ -809,8 +805,8 @@ row_sel_test_other_conds(
 Retrieves the clustered index record corresponding to a record in a
 non-clustered index. Does the necessary locking.
 @return	DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_sel_get_clust_rec(
 /*==================*/
 	sel_node_t*	node,	/*!< in: select_node */
@@ -828,7 +824,7 @@ row_sel_get_clust_rec(
 	dict_index_t*	index;
 	rec_t*		clust_rec;
 	rec_t*		old_vers;
-	ulint		err;
+	dberr_t		err;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
@@ -982,7 +978,7 @@ err_exit:
 Sets a lock on a record.
 @return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
 UNIV_INLINE
-enum db_err
+dberr_t
 sel_set_rec_lock(
 /*=============*/
 	const buf_block_t*	block,	/*!< in: buffer block of rec */
@@ -995,7 +991,7 @@ sel_set_rec_lock(
 	que_thr_t*		thr)	/*!< in: query thread */
 {
 	trx_t*		trx;
-	enum db_err	err;
+	dberr_t		err;
 
 	trx = thr_get_trx(thr);
 
@@ -1084,7 +1080,7 @@ row_sel_open_pcur(
 		(FALSE: no init) */
 
 		btr_pcur_open_at_index_side(plan->asc, index, BTR_SEARCH_LEAF,
-					    &(plan->pcur), FALSE, mtr);
+					    &(plan->pcur), false, 0, mtr);
 	}
 
 	ut_ad(plan->n_rows_prefetched == 0);
@@ -1313,8 +1309,8 @@ func_exit:
 /*********************************************************************//**
 Performs a select step.
 @return	DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_sel(
 /*====*/
 	sel_node_t*	node,	/*!< in: select node */
@@ -1347,7 +1343,7 @@ row_sel(
 	&mtr must be committed before we move
 	to the next non-clustered record */
 	ulint		found_flag;
-	ulint		err;
+	dberr_t		err;
 	mem_heap_t*	heap				= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets				= offsets_;
@@ -2083,11 +2079,9 @@ row_sel_step(
 			     table_node = static_cast<sym_node_t*>(
 					que_node_get_next(table_node))) {
 
-				enum db_err	err;
-
-				err = static_cast<enum db_err>(lock_table(
+				dberr_t	err = lock_table(
 					0, table_node->table, i_lock_mode,
-					thr));
+					thr);
 
 				if (err != DB_SUCCESS) {
 					trx_t*	trx;
@@ -2120,7 +2114,7 @@ row_sel_step(
 		}
 	}
 
-	enum db_err err = static_cast<enum db_err>(row_sel(node, thr));
+	dberr_t	err = row_sel(node, thr);
 
 	/* NOTE! if queries are parallelized, the following assignment may
 	have problems; the assignment should be made only if thr is the
@@ -2305,42 +2299,6 @@ row_printf_step(
 	return(thr);
 }
 
-/********************************************************************
-Creates a key in Innobase dtuple format.*/
-
-void
-row_create_key(
-/*===========*/
-	dtuple_t*	tuple,		/* in: tuple where to build;
-					NOTE: we assume that the type info
-					in the tuple is already according
-					to index! */
-	dict_index_t*	index,		/* in: index of the key value */
-	doc_id_t*	doc_id)		/* in: doc id to search. */
-{
-	dtype_t		type;
-	dict_field_t*	field;
-	doc_id_t	temp_doc_id;
-	dfield_t*	dfield = dtuple_get_nth_field(tuple, 0);
-
-	ut_a(dict_index_get_n_unique(index) == 1);
-
-	/* Permit us to access any field in the tuple (ULINT_MAX): */
-	dtuple_set_n_fields(tuple, ULINT_MAX);
-
-	field = dict_index_get_nth_field(index, 0);
-	dict_col_copy_type(field->col, &type);
-	ut_a(dtype_get_mtype(&type) == DATA_INT);
-
-	/* Convert to storage byte order */
-	mach_write_to_8((byte*) &temp_doc_id, *doc_id);
-	*doc_id = temp_doc_id;
-
-	ut_a(sizeof(*doc_id) == field->fixed_len);
-	dfield_set_data(dfield, doc_id, field->fixed_len);
-
-	dtuple_set_n_fields(tuple, 1);
-}
 /****************************************************************//**
 Converts a key value stored in MySQL format to an Innobase dtuple. The last
 field of the key value may be just a prefix of a fixed length field: hence
@@ -2536,6 +2494,7 @@ row_sel_convert_mysql_key_to_innobase(
 				dfield_set_len(dfield, len
 					       - (ulint) (key_ptr - key_end));
 			}
+                        ut_ad(0);
 		}
 
 		n_fields++;
@@ -3008,8 +2967,8 @@ row_sel_store_mysql_rec(
 /*********************************************************************//**
 Builds a previous version of a clustered index record for a consistent read
 @return	DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_sel_build_prev_vers_for_mysql(
 /*==============================*/
 	read_view_t*	read_view,	/*!< in: read view */
@@ -3026,7 +2985,7 @@ row_sel_build_prev_vers_for_mysql(
 					afterwards */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
-	ulint	err;
+	dberr_t	err;
 
 	if (prebuilt->old_vers_heap) {
 		mem_heap_empty(prebuilt->old_vers_heap);
@@ -3045,8 +3004,8 @@ Retrieves the clustered index record corresponding to a record in a
 non-clustered index. Does the necessary locking. Used in the MySQL
 interface.
 @return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
-static
-enum db_err
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_sel_get_clust_rec_for_mysql(
 /*============================*/
 	row_prebuilt_t*	prebuilt,/*!< in: prebuilt struct in the handle */
@@ -3073,7 +3032,7 @@ row_sel_get_clust_rec_for_mysql(
 	dict_index_t*	clust_index;
 	const rec_t*	clust_rec;
 	rec_t*		old_vers;
-	enum db_err	err;
+	dberr_t		err;
 	trx_t*		trx;
 
 	*out_rec = NULL;
@@ -3172,17 +3131,13 @@ row_sel_get_clust_rec_for_mysql(
 			    clust_rec, clust_index, *offsets,
 			    trx->read_view)) {
 
-			ulint	db_err;
-
 			/* The following call returns 'offsets' associated with
 			'old_vers' */
-			db_err = row_sel_build_prev_vers_for_mysql(
+			err = row_sel_build_prev_vers_for_mysql(
 				trx->read_view, clust_index, prebuilt,
 				clust_rec, offsets, offset_heap, &old_vers,
 				mtr);
 
-			err = static_cast<enum db_err>(db_err);
-
 			if (err != DB_SUCCESS || old_vers == NULL) {
 
 				goto err_exit;
@@ -3226,7 +3181,10 @@ row_sel_get_clust_rec_for_mysql(
 func_exit:
 	*out_rec = clust_rec;
 
-	if (prebuilt->select_lock_type != LOCK_NONE) {
+	/* Store the current position if select_lock_type is not
+	LOCK_NONE or if we are scanning using InnoDB APIs */
+	if (prebuilt->select_lock_type != LOCK_NONE
+	    || prebuilt->innodb_api) {
 		/* We may use the cursor in update or in unlock_row():
 		store its position */
 
@@ -3633,7 +3591,7 @@ row_search_idx_cond_check(
 		return(result);
         case ICP_ERROR:
         case ICP_ABORTED_BY_USER:
-                return(result);
+		return(result);
 	}
 
 	ut_error;
@@ -3649,7 +3607,7 @@ position and fetch next or fetch prev must not be tried to the cursor!
 @return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
 DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
 UNIV_INTERN
-ulint
+dberr_t
 row_search_for_mysql(
 /*=================*/
 	byte*		buf,		/*!< in/out: buffer for the fetched
@@ -3678,9 +3636,9 @@ row_search_for_mysql(
 	dict_index_t*	clust_index;
 	que_thr_t*	thr;
 	const rec_t*	rec;
-	const rec_t*	result_rec;
+	const rec_t*	result_rec = NULL;
 	const rec_t*	clust_rec;
-	ulint		err				= DB_SUCCESS;
+	dberr_t		err				= DB_SUCCESS;
 	ibool		unique_search			= FALSE;
 	ibool		mtr_has_extra_clust_latch	= FALSE;
 	ibool		moves_up			= FALSE;
@@ -3701,48 +3659,41 @@ row_search_for_mysql(
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets				= offsets_;
 	ibool		table_lock_waited		= FALSE;
+	byte*		next_buf			= 0;
 
 	rec_offs_init(offsets_);
 
 	ut_ad(index && pcur && search_tuple);
 
-	if (UNIV_UNLIKELY(prebuilt->table->ibd_file_missing)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: Error:\n"
-			"InnoDB: MySQL is trying to use a table handle"
-			" but the .ibd file for\n"
-			"InnoDB: table %s does not exist.\n"
-			"InnoDB: Have you deleted the .ibd file"
-			" from the database directory under\n"
-			"InnoDB: the MySQL datadir, or have you used"
-			" DISCARD TABLESPACE?\n"
-			"InnoDB: Look from\n"
-			"InnoDB: " REFMAN "innodb-troubleshooting.html\n"
-			"InnoDB: how you can resolve the problem.\n",
-			prebuilt->table->name);
+	/* We don't support FTS queries from the HANDLER interfaces, because
+	we implemented FTS as reversed inverted index with auxiliary tables.
+	So anything related to traditional index query would not apply to
+	it. */
+	if (index->type & DICT_FTS) {
+		return(DB_END_OF_INDEX);
+	}
 
 #ifdef UNIV_SYNC_DEBUG
-		ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
+	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
 #endif /* UNIV_SYNC_DEBUG */
-		return(DB_ERROR);
-	}
 
-	if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
+	if (dict_table_is_discarded(prebuilt->table)) {
+
+		return(DB_TABLESPACE_DELETED);
+
+	} else if (prebuilt->table->ibd_file_missing) {
+
+		return(DB_TABLESPACE_NOT_FOUND);
+
+	} else if (!prebuilt->index_usable) {
 
-#ifdef UNIV_SYNC_DEBUG
-		ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
 		return(DB_MISSING_HISTORY);
-	}
 
-	if (dict_index_is_corrupted(index)) {
-#ifdef UNIV_SYNC_DEBUG
-		ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+	} else if (dict_index_is_corrupted(index)) {
+
 		return(DB_CORRUPTION);
-	}
 
-	if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
+	} else if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
 		fprintf(stderr,
 			"InnoDB: Error: trying to free a corrupt\n"
 			"InnoDB: table handle. Magic n %lu, table name ",
@@ -3846,7 +3797,6 @@ row_search_for_mysql(
 
 			prebuilt->n_rows_fetched++;
 
-			srv_n_rows_read++;
 			err = DB_SUCCESS;
 			goto func_exit;
 		}
@@ -3925,7 +3875,8 @@ row_search_for_mysql(
 	    && dict_index_is_clust(index)
 	    && !prebuilt->templ_contains_blob
 	    && !prebuilt->used_in_HANDLER
-	    && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) {
+	    && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)
+	    && !prebuilt->innodb_api) {
 
 		mode = PAGE_CUR_GE;
 
@@ -3973,8 +3924,8 @@ row_search_for_mysql(
 							rec, offsets)) {
 					case ICP_NO_MATCH:
 					case ICP_OUT_OF_RANGE:
-                                        case ICP_ERROR:
                                         case ICP_ABORTED_BY_USER:
+                                        case ICP_ERROR:
 						goto shortcut_mismatch;
 					case ICP_MATCH:
 						goto shortcut_match;
@@ -4005,8 +3956,6 @@ row_search_for_mysql(
 				/* ut_print_name(stderr, index->name);
 				fputs(" shortcut\n", stderr); */
 
-				srv_n_rows_read++;
-
 				err = DB_SUCCESS;
 				goto release_search_latch_if_needed;
 
@@ -4179,12 +4128,12 @@ wait_table_again:
 
 			/* Try to place a gap lock on the next index record
 			to prevent phantoms in ORDER BY ... DESC queries */
-			const rec_t*	next = page_rec_get_next_const(rec);
+			const rec_t*	next_rec = page_rec_get_next_const(rec);
 
-			offsets = rec_get_offsets(next, index, offsets,
+			offsets = rec_get_offsets(next_rec, index, offsets,
 						  ULINT_UNDEFINED, &heap);
 			err = sel_set_rec_lock(btr_pcur_get_block(pcur),
-					       next, index, offsets,
+					       next_rec, index, offsets,
 					       prebuilt->select_lock_type,
 					       LOCK_GAP, thr);
 
@@ -4197,16 +4146,10 @@ wait_table_again:
 				goto lock_wait_or_error;
 			}
 		}
-	} else {
-		if (mode == PAGE_CUR_G) {
-			btr_pcur_open_at_index_side(
-				TRUE, index, BTR_SEARCH_LEAF, pcur, FALSE,
-				&mtr);
-		} else if (mode == PAGE_CUR_L) {
-			btr_pcur_open_at_index_side(
-				FALSE, index, BTR_SEARCH_LEAF, pcur, FALSE,
-				&mtr);
-		}
+	} else if (mode == PAGE_CUR_G || mode == PAGE_CUR_L) {
+		btr_pcur_open_at_index_side(
+			mode == PAGE_CUR_G, index, BTR_SEARCH_LEAF,
+			pcur, false, 0, &mtr);
 	}
 
 rec_loop:
@@ -4348,6 +4291,9 @@ wrong_offs:
 
 	/* Calculate the 'offsets' associated with 'rec' */
 
+	ut_ad(fil_page_get_type(btr_pcur_get_page(pcur)) == FIL_PAGE_INDEX);
+	ut_ad(btr_page_get_index_id(btr_pcur_get_page(pcur)) == index->id);
+
 	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
 
 	if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
@@ -4539,15 +4485,10 @@ no_gap_lock:
 
 			/* The following call returns 'offsets'
 			associated with 'old_vers' */
-			err = row_sel_build_committed_vers_for_mysql(
+			row_sel_build_committed_vers_for_mysql(
 				clust_index, prebuilt, rec,
 				&offsets, &heap, &old_vers, &mtr);
 
-			if (err != DB_SUCCESS) {
-
-				goto lock_wait_or_error;
-			}
-
 			/* Check whether it was a deadlock or not, if not
 			a deadlock and the transaction had to wait then
 			release the lock it is waiting on. */
@@ -4649,8 +4590,8 @@ no_gap_lock:
 				case ICP_NO_MATCH:
 					goto next_rec;
 				case ICP_OUT_OF_RANGE:
-                                case ICP_ERROR:
                                 case ICP_ABORTED_BY_USER:
+                                case ICP_ERROR:
 					err = DB_RECORD_NOT_FOUND;
 					goto idx_cond_failed;
 				case ICP_MATCH:
@@ -4690,12 +4631,15 @@ locks_ok:
 		delete marked record and the record following it.
 
 		For now this is applicable only to clustered indexes while
-		doing a unique search. There is scope for further optimization
+		doing a unique search except for HANDLER queries because
+		HANDLER allows NEXT and PREV even in unique search on
+		clustered index. There is scope for further optimization
 		applicable to unique secondary indexes. Current behaviour is
 		to widen the scope of a lock on an already delete marked record
 		if the same record is deleted twice by the same transaction */
 		if (index == clust_index && unique_search
-		    && !prebuilt->used_in_HANDLER) {                  
+		    && !prebuilt->used_in_HANDLER) {
+
 			err = DB_RECORD_NOT_FOUND;
 
 			goto normal_return;
@@ -4712,8 +4656,8 @@ locks_ok:
 		}
 		goto next_rec;
 	case ICP_OUT_OF_RANGE:
-        case ICP_ERROR:
         case ICP_ABORTED_BY_USER:
+        case ICP_ERROR:
 		err = DB_RECORD_NOT_FOUND;
 		goto idx_cond_failed;
 	case ICP_MATCH:
@@ -4831,9 +4775,10 @@ requires_clust_rec:
 	    && !prebuilt->templ_contains_blob
 	    && !prebuilt->clust_index_was_generated
 	    && !prebuilt->used_in_HANDLER
+	    && !prebuilt->innodb_api
 	    && prebuilt->template_type
 	    != ROW_MYSQL_DUMMY_TEMPLATE
-	    && !prebuilt->result) {
+	    && !prebuilt->in_fts_query) {
 
 		/* Inside an update, for example, we do not cache rows,
 		since we may use the cursor position to do the actual
@@ -4849,29 +4794,58 @@ requires_clust_rec:
 		/* We only convert from InnoDB row format to MySQL row
 		format when ICP is disabled. */
 
-		if (!prebuilt->idx_cond
-		    && !row_sel_store_mysql_rec(
-			    row_sel_fetch_last_buf(prebuilt),
-			    prebuilt, result_rec,
-			    result_rec != rec,
-			    result_rec != rec ? clust_index : index,
-			    offsets)) {
-
-			/* Only fresh inserts may contain incomplete
-			externally stored columns. Pretend that such
-			records do not exist. Such records may only be
-			accessed at the READ UNCOMMITTED isolation
-			level or when rolling back a recovered
-			transaction. Rollback happens at a lower
-			level, not here. */
-			goto next_rec;
-		}
+		if (!prebuilt->idx_cond) {
 
-		row_sel_enqueue_cache_row_for_mysql(buf, prebuilt);
+			/* We use next_buf to track the allocation of buffers
+			where we store and enqueue the buffers for our
+			pre-fetch optimisation.
+
+			If next_buf == 0 then we store the converted record
+			directly into the MySQL record buffer (buf). If it is
+			!= 0 then we allocate a pre-fetch buffer and store the
+			converted record there.
+
+			If the conversion fails and the MySQL record buffer
+			was not written to then we reset next_buf so that
+			we can re-use the MySQL record buffer in the next
+			iteration. */
+
+			next_buf = next_buf
+				 ? row_sel_fetch_last_buf(prebuilt) : buf;
+
+			if (!row_sel_store_mysql_rec(
+				next_buf, prebuilt, result_rec,
+				result_rec != rec,
+				result_rec != rec ? clust_index : index,
+				offsets)) {
+
+				if (next_buf == buf) {
+					ut_a(prebuilt->n_fetch_cached == 0);
+					next_buf = 0;
+				}
+
+				/* Only fresh inserts may contain incomplete
+				externally stored columns. Pretend that such
+				records do not exist. Such records may only be
+				accessed at the READ UNCOMMITTED isolation
+				level or when rolling back a recovered
+				transaction. Rollback happens at a lower
+				level, not here. */
+				goto next_rec;
+			}
+
+			if (next_buf != buf) {
+				row_sel_enqueue_cache_row_for_mysql(
+					next_buf, prebuilt);
+			}
+		} else {
+			row_sel_enqueue_cache_row_for_mysql(buf, prebuilt);
+		}
 
 		if (prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE) {
 			goto next_rec;
 		}
+
 	} else {
 		if (UNIV_UNLIKELY
 		    (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE)) {
@@ -4892,7 +4866,7 @@ requires_clust_rec:
 			       rec_offs_size(offsets));
 			mach_write_to_4(buf,
 					rec_offs_extra_size(offsets) + 4);
-		} else if (!prebuilt->idx_cond) {
+		} else if (!prebuilt->idx_cond && !prebuilt->innodb_api) {
 			/* The record was not yet converted to MySQL format. */
 			if (!row_sel_store_mysql_rec(
 				    buf, prebuilt, result_rec,
@@ -4935,11 +4909,16 @@ idx_cond_failed:
 	    || !dict_index_is_clust(index)
 	    || direction != 0
 	    || prebuilt->select_lock_type != LOCK_NONE
-	    || prebuilt->used_in_HANDLER) {
+	    || prebuilt->used_in_HANDLER
+	    || prebuilt->innodb_api) {
 
 		/* Inside an update always store the cursor position */
 
 		btr_pcur_store_position(pcur, &mtr);
+
+		if (prebuilt->innodb_api) {
+			prebuilt->innodb_api_rec = result_rec;
+		}
 	}
 
 	goto normal_return;
@@ -5032,7 +5011,7 @@ lock_table_wait:
 	mtr_commit(&mtr);
 	mtr_has_extra_clust_latch = FALSE;
 
-	trx->error_state = static_cast<enum db_err>(err);
+	trx->error_state = err;
 
 	/* The following is a patch for MySQL */
 
@@ -5101,8 +5080,23 @@ normal_return:
 
 	mtr_commit(&mtr);
 
-	if (prebuilt->n_fetch_cached > 0) {
-		row_sel_dequeue_cached_row_for_mysql(buf, prebuilt);
+	if (prebuilt->idx_cond != 0) {
+
+		/* When ICP is active we don't write to the MySQL buffer
+		directly, only to buffers that are enqueued in the pre-fetch
+		queue. We need to dequeue the first buffer and copy the contents
+		to the record buffer that was passed in by MySQL. */
+
+		if (prebuilt->n_fetch_cached > 0) {
+			row_sel_dequeue_cached_row_for_mysql(buf, prebuilt);
+			err = DB_SUCCESS;
+		}
+
+	} else if (next_buf != 0) {
+
+		/* We may or may not have enqueued some buffers to the
+		pre-fetch queue, but we definitely wrote to the record
+		buffer passed to use by MySQL. */
 
 		err = DB_SUCCESS;
 	}
@@ -5112,9 +5106,6 @@ normal_return:
 	dict_index_name_print(stderr, index);
 	fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
 #endif /* UNIV_SEARCH_DEBUG */
-	if (err == DB_SUCCESS) {
-		srv_n_rows_read++;
-	}
 
 func_exit:
 	trx->op_info = "";
@@ -5139,6 +5130,9 @@ func_exit:
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
 #endif /* UNIV_SYNC_DEBUG */
+
+	DEBUG_SYNC_C("innodb_row_search_for_mysql_exit");
+
 	return(err);
 }
 
@@ -5157,7 +5151,22 @@ row_search_check_if_query_cache_permitted(
 	dict_table_t*	table;
 	ibool		ret	= FALSE;
 
-	table = dict_table_open_on_name(norm_name, FALSE);
+	/* Disable query cache altogether for all tables if recovered XA
+	transactions in prepared state exist. This is because we do not
+	restore the table locks for those transactions and we may wrongly
+	set ret=TRUE above if "lock_table_get_n_locks(table) == 0". See
+	"Bug#14658648 XA ROLLBACK (DISTRIBUTED DATABASE) NOT WORKING WITH
+	QUERY CACHE ENABLED".
+	Read trx_sys->n_prepared_recovered_trx without mutex protection,
+	not possible to end up with a torn read since n_prepared_recovered_trx
+	is word size. */
+	if (trx_sys->n_prepared_recovered_trx > 0) {
+
+		return(FALSE);
+	}
+
+	table = dict_table_open_on_name(norm_name, FALSE, FALSE,
+					DICT_ERR_IGNORE_NONE);
 
 	if (table == NULL) {
 
@@ -5191,7 +5200,7 @@ row_search_check_if_query_cache_permitted(
 		}
 	}
 
-	dict_table_close(table, FALSE);
+	dict_table_close(table, FALSE, FALSE);
 
 	return(ret);
 }
@@ -5229,8 +5238,6 @@ row_search_autoinc_read_column(
 
 	data = rec_get_nth_field(rec, offsets, col_no, &len);
 
-	ut_a(len != UNIV_SQL_NULL);
-
 	switch (mtype) {
 	case DATA_INT:
 		ut_a(len <= sizeof value);
@@ -5289,7 +5296,7 @@ Read the max AUTOINC value from an index.
 @return DB_SUCCESS if all OK else error code, DB_RECORD_NOT_FOUND if
 column name can't be found in index */
 UNIV_INTERN
-ulint
+dberr_t
 row_search_max_autoinc(
 /*===================*/
 	dict_index_t*	index,		/*!< in: index to search */
@@ -5299,7 +5306,7 @@ row_search_max_autoinc(
 	ulint		i;
 	ulint		n_cols;
 	dict_field_t*	dfield = NULL;
-	ulint		error = DB_SUCCESS;
+	dberr_t		error = DB_SUCCESS;
 
 	n_cols = dict_index_get_n_ordering_defined_by_user(index);
 
@@ -5321,10 +5328,9 @@ row_search_max_autoinc(
 
 		mtr_start(&mtr);
 
-		/* Open at the high/right end (FALSE), and INIT
-		cursor (TRUE) */
+		/* Open at the high/right end (false), and init cursor */
 		btr_pcur_open_at_index_side(
-			FALSE, index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+			false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
 
 		if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
 			const rec_t*	rec;
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 78fd4ad5199..25b2b6b62ce 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -38,6 +38,7 @@ Created 2/25/1997 Heikki Tuuri
 #include "mach0data.h"
 #include "row0undo.h"
 #include "row0vers.h"
+#include "row0log.h"
 #include "trx0trx.h"
 #include "trx0rec.h"
 #include "row0row.h"
@@ -60,25 +61,64 @@ introduced where a call to log_free_check() is bypassed. */
 Removes a clustered index record. The pcur in node was positioned on the
 record, now it is detached.
 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static  __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_undo_ins_remove_clust_rec(
 /*==========================*/
 	undo_node_t*	node)	/*!< in: undo node */
 {
 	btr_cur_t*	btr_cur;
 	ibool		success;
-	ulint		err;
-	ulint		n_tries		= 0;
+	dberr_t		err;
+	ulint		n_tries	= 0;
 	mtr_t		mtr;
+	dict_index_t*	index	= node->pcur.btr_cur.index;
+	bool		online;
+
+	ut_ad(dict_index_is_clust(index));
 
 	mtr_start(&mtr);
 
-	success = btr_pcur_restore_position(BTR_MODIFY_LEAF, &(node->pcur),
-					    &mtr);
+	/* This is similar to row_undo_mod_clust(). Even though we
+	call row_log_table_rollback() elsewhere, the DDL thread may
+	already have copied this row to the sort buffers or to the new
+	table. We must log the removal, so that the row will be
+	correctly purged. However, we can log the removal out of sync
+	with the B-tree modification. */
+
+	online = dict_index_is_online_ddl(index);
+	if (online) {
+		ut_ad(node->trx->dict_operation_lock_mode
+		      != RW_X_LATCH);
+		ut_ad(node->table->id != DICT_INDEXES_ID);
+		mtr_s_lock(dict_index_get_lock(index), &mtr);
+	}
+
+	success = btr_pcur_restore_position(
+		online
+		? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
+		: BTR_MODIFY_LEAF, &node->pcur, &mtr);
 	ut_a(success);
 
+	btr_cur = btr_pcur_get_btr_cur(&node->pcur);
+
+	ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index)
+	      == node->trx->id);
+
+	if (online && dict_index_is_online_ddl(index)) {
+		const rec_t*	rec	= btr_cur_get_rec(btr_cur);
+		mem_heap_t*	heap	= NULL;
+		const ulint*	offsets	= rec_get_offsets(
+			rec, index, NULL, ULINT_UNDEFINED, &heap);
+		row_log_table_delete(
+			rec, index, offsets,
+			trx_read_trx_id(row_get_trx_id_offset(index, offsets)
+					+ rec));
+		mem_heap_free(heap);
+	}
+
 	if (node->table->id == DICT_INDEXES_ID) {
+		ut_ad(!online);
 		ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
 
 		/* Drop the index tree associated with the row in
@@ -90,14 +130,12 @@ row_undo_ins_remove_clust_rec(
 
 		mtr_start(&mtr);
 
-		success = btr_pcur_restore_position(BTR_MODIFY_LEAF,
-						    &(node->pcur), &mtr);
+		success = btr_pcur_restore_position(
+			BTR_MODIFY_LEAF, &node->pcur, &mtr);
 		ut_a(success);
 	}
 
-	btr_cur = btr_pcur_get_btr_cur(&(node->pcur));
-
-	if (btr_cur_optimistic_delete(btr_cur, &mtr)) {
+	if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
 		err = DB_SUCCESS;
 		goto func_exit;
 	}
@@ -111,7 +149,7 @@ retry:
 					    &(node->pcur), &mtr);
 	ut_a(success);
 
-	btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+	btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
 				   trx_is_recv(node->trx)
 				   ? RB_RECOVERY
 				   : RB_NORMAL, &mtr);
@@ -142,8 +180,8 @@ func_exit:
 /***************************************************************//**
 Removes a secondary index entry if found.
 @return	DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_undo_ins_remove_sec_low(
 /*========================*/
 	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
@@ -154,22 +192,31 @@ row_undo_ins_remove_sec_low(
 {
 	btr_pcur_t		pcur;
 	btr_cur_t*		btr_cur;
-	ulint			err;
+	dberr_t			err	= DB_SUCCESS;
 	mtr_t			mtr;
 	enum row_search_result	search_result;
 
+	log_free_check();
+
 	mtr_start(&mtr);
 
-	btr_cur = btr_pcur_get_btr_cur(&pcur);
+	if (mode == BTR_MODIFY_LEAF) {
+		mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
+		mtr_s_lock(dict_index_get_lock(index), &mtr);
+	} else {
+		ut_ad(mode == BTR_MODIFY_TREE);
+		mtr_x_lock(dict_index_get_lock(index), &mtr);
+	}
 
-	ut_ad(mode == BTR_MODIFY_TREE || mode == BTR_MODIFY_LEAF);
+	if (row_log_online_op_try(index, entry, 0)) {
+		goto func_exit_no_pcur;
+	}
 
 	search_result = row_search_index_entry(index, entry, mode,
 					       &pcur, &mtr);
 
 	switch (search_result) {
 	case ROW_NOT_FOUND:
-		err = DB_SUCCESS;
 		goto func_exit;
 	case ROW_FOUND:
 		break;
@@ -181,23 +228,24 @@ row_undo_ins_remove_sec_low(
 		ut_error;
 	}
 
-	if (mode == BTR_MODIFY_LEAF) {
-		err = btr_cur_optimistic_delete(btr_cur, &mtr)
+	btr_cur = btr_pcur_get_btr_cur(&pcur);
+
+	if (mode != BTR_MODIFY_TREE) {
+		err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
 			? DB_SUCCESS : DB_FAIL;
 	} else {
-		ut_ad(mode == BTR_MODIFY_TREE);
-
 		/* No need to distinguish RB_RECOVERY here, because we
 		are deleting a secondary index record: the distinction
 		between RB_NORMAL and RB_RECOVERY only matters when
 		deleting a record that contains externally stored
 		columns. */
 		ut_ad(!dict_index_is_clust(index));
-		btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
 					   RB_NORMAL, &mtr);
 	}
 func_exit:
 	btr_pcur_close(&pcur);
+func_exit_no_pcur:
 	mtr_commit(&mtr);
 
 	return(err);
@@ -207,14 +255,14 @@ func_exit:
 Removes a secondary index entry from the index if found. Tries first
 optimistic, then pessimistic descent down the tree.
 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_undo_ins_remove_sec(
 /*====================*/
 	dict_index_t*	index,	/*!< in: index */
 	dtuple_t*	entry)	/*!< in: index entry to insert */
 {
-	ulint	err;
+	dberr_t	err;
 	ulint	n_tries	= 0;
 
 	/* Try first optimistic descent to the B-tree */
@@ -261,7 +309,7 @@ row_undo_ins_parse_undo_rec(
 	table_id_t	table_id;
 	ulint		type;
 	ulint		dummy;
-	ibool		dummy_extern;
+	bool		dummy_extern;
 
 	ut_ad(node);
 
@@ -271,12 +319,13 @@ row_undo_ins_parse_undo_rec(
 	node->rec_type = type;
 
 	node->update = NULL;
-	node->table = dict_table_open_on_id(table_id, dict_locked);
+	node->table = dict_table_open_on_id(table_id, dict_locked, FALSE);
 
 	/* Skip the UNDO if we can't find the table or the .ibd file. */
 	if (UNIV_UNLIKELY(node->table == NULL)) {
 	} else if (UNIV_UNLIKELY(node->table->ibd_file_missing)) {
-		dict_table_close(node->table, dict_locked);
+close_table:
+		dict_table_close(node->table, dict_locked, FALSE);
 		node->table = NULL;
 	} else {
 		clust_index = dict_table_get_first_index(node->table);
@@ -286,10 +335,7 @@ row_undo_ins_parse_undo_rec(
 				ptr, clust_index, &node->ref, node->heap);
 
 			if (!row_undo_search_clust_to_pcur(node)) {
-
-				dict_table_close(node->table, dict_locked);
-
-				node->table = NULL;
+				goto close_table;
 			}
 
 		} else {
@@ -299,10 +345,7 @@ row_undo_ins_parse_undo_rec(
 				      node->table->name);
 			fprintf(stderr, " has no indexes, "
 				"ignoring the table\n");
-
-			dict_table_close(node->table, dict_locked);
-
-			node->table = NULL;
+			goto close_table;
 		}
 	}
 }
@@ -310,27 +353,32 @@ row_undo_ins_parse_undo_rec(
 /***************************************************************//**
 Removes secondary index records.
 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_undo_ins_remove_sec_rec(
 /*========================*/
 	undo_node_t*	node)	/*!< in/out: row undo node */
 {
-	ulint		err	= DB_SUCCESS;
+	dberr_t		err	= DB_SUCCESS;
+	dict_index_t*	index	= node->index;
 	mem_heap_t*	heap;
 
 	heap = mem_heap_create(1024);
 
-	while (node->index != NULL) {
+	while (index != NULL) {
 		dtuple_t*	entry;
 
-		if (node->index->type & DICT_FTS) {
-			dict_table_next_uncorrupted_index(node->index);
+		if (index->type & DICT_FTS) {
+			dict_table_next_uncorrupted_index(index);
 			continue;
 		}
 
-		entry = row_build_index_entry(node->row, node->ext,
-					      node->index, heap);
+		/* An insert undo record TRX_UNDO_INSERT_REC will
+		always contain all fields of the index. It does not
+		matter if any indexes were created afterwards; all
+		index entries can be reconstructed from the row. */
+		entry = row_build_index_entry(
+			node->row, node->ext, index, heap);
 		if (UNIV_UNLIKELY(!entry)) {
 			/* The database must have crashed after
 			inserting a clustered index record but before
@@ -343,9 +391,7 @@ row_undo_ins_remove_sec_rec(
 			transactions. */
 			ut_a(trx_is_recv(node->trx));
 		} else {
-			log_free_check();
-
-			err = row_undo_ins_remove_sec(node->index, entry);
+			err = row_undo_ins_remove_sec(index, entry);
 
 			if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
 				goto func_exit;
@@ -353,10 +399,11 @@ row_undo_ins_remove_sec_rec(
 		}
 
 		mem_heap_empty(heap);
-		dict_table_next_uncorrupted_index(node->index);
+		dict_table_next_uncorrupted_index(index);
 	}
 
 func_exit:
+	node->index = index;
 	mem_heap_free(heap);
 	return(err);
 }
@@ -369,15 +416,14 @@ if it figures out that an index record will be removed in the purge
 anyway, it will remove it in the rollback.
 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 UNIV_INTERN
-ulint
+dberr_t
 row_undo_ins(
 /*=========*/
 	undo_node_t*	node)	/*!< in: row undo node */
 {
-	ulint		err;
-	ibool		dict_locked;
+	dberr_t	err;
+	ibool	dict_locked;
 
-	ut_ad(node);
 	ut_ad(node->state == UNDO_NODE_INSERT);
 
 	dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH;
@@ -392,24 +438,46 @@ row_undo_ins(
 
 	/* Iterate over all the indexes and undo the insert.*/
 
+	node->index = dict_table_get_first_index(node->table);
+	ut_ad(dict_index_is_clust(node->index));
+
+	if (dict_index_is_online_ddl(node->index)) {
+		/* Note that we are rolling back this transaction, so
+		that all inserts and updates with this DB_TRX_ID can
+		be skipped. */
+		row_log_table_rollback(node->index, node->trx->id);
+	}
+
 	/* Skip the clustered index (the first index) */
-	node->index = dict_table_get_next_index(
-		dict_table_get_first_index(node->table));
+	node->index = dict_table_get_next_index(node->index);
 
 	dict_table_skip_corrupt_index(node->index);
 
 	err = row_undo_ins_remove_sec_rec(node);
 
-	if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
-		goto func_exit;
-	}
+	if (err == DB_SUCCESS) {
 
-	log_free_check();
+		log_free_check();
 
-	err = row_undo_ins_remove_clust_rec(node);
+		if (node->table->id == DICT_INDEXES_ID) {
 
-func_exit:
-	dict_table_close(node->table, dict_locked);
+			if (!dict_locked) {
+				mutex_enter(&dict_sys->mutex);
+			}
+		}
+
+		// FIXME: We need to update the dict_index_t::space and
+		// page number fields too.
+		err = row_undo_ins_remove_clust_rec(node);
+
+		if (node->table->id == DICT_INDEXES_ID
+		    && !dict_locked) {
+
+			mutex_exit(&dict_sys->mutex);
+		}
+	}
+
+	dict_table_close(node->table, dict_locked, FALSE);
 
 	node->table = NULL;
 
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index 4869909f5a6..c1a4ba76052 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -37,6 +37,7 @@ Created 2/27/1997 Heikki Tuuri
 #include "mach0data.h"
 #include "row0undo.h"
 #include "row0vers.h"
+#include "row0log.h"
 #include "trx0trx.h"
 #include "trx0rec.h"
 #include "row0row.h"
@@ -71,11 +72,20 @@ introduced where a call to log_free_check() is bypassed. */
 /***********************************************************//**
 Undoes a modify in a clustered index record.
 @return	DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_undo_mod_clust_low(
 /*===================*/
 	undo_node_t*	node,	/*!< in: row undo node */
+	ulint**		offsets,/*!< out: rec_get_offsets() on the record */
+	mem_heap_t**	offsets_heap,
+				/*!< in/out: memory heap that can be emptied */
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	const dtuple_t**rebuilt_old_pk,
+				/*!< out: row_log_table_get_pk()
+				before the update, or NULL if
+				the table is not being rebuilt online or
+				the PRIMARY KEY definition does not change */
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr,	/*!< in: mtr; must be committed before
 				latching any further pages */
@@ -83,12 +93,12 @@ row_undo_mod_clust_low(
 {
 	btr_pcur_t*	pcur;
 	btr_cur_t*	btr_cur;
-	ulint		err;
+	dberr_t		err;
 #ifdef UNIV_DEBUG
 	ibool		success;
 #endif /* UNIV_DEBUG */
 
-	pcur = &(node->pcur);
+	pcur = &node->pcur;
 	btr_cur = btr_pcur_get_btr_cur(pcur);
 
 #ifdef UNIV_DEBUG
@@ -97,31 +107,40 @@ row_undo_mod_clust_low(
 	btr_pcur_restore_position(mode, pcur, mtr);
 
 	ut_ad(success);
+	ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur),
+			     btr_cur_get_index(btr_cur))
+	      == thr_get_trx(thr)->id);
+
+	if (mode != BTR_MODIFY_LEAF
+	    && dict_index_is_online_ddl(btr_cur_get_index(btr_cur))) {
+		*rebuilt_old_pk = row_log_table_get_pk(
+			btr_cur_get_rec(btr_cur),
+			btr_cur_get_index(btr_cur), NULL, &heap);
+	} else {
+		*rebuilt_old_pk = NULL;
+	}
 
-	if (mode == BTR_MODIFY_LEAF) {
+	if (mode != BTR_MODIFY_TREE) {
+		ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF);
 
-		err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG
-						| BTR_NO_UNDO_LOG_FLAG
-						| BTR_KEEP_SYS_FLAG,
-						btr_cur, node->update,
-						node->cmpl_info, thr, mtr);
+		err = btr_cur_optimistic_update(
+			BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG
+			| BTR_KEEP_SYS_FLAG,
+			btr_cur, offsets, offsets_heap,
+			node->update, node->cmpl_info,
+			thr, thr_get_trx(thr)->id, mtr);
 	} else {
-		mem_heap_t*	heap		= NULL;
 		big_rec_t*	dummy_big_rec;
 
-		ut_ad(mode == BTR_MODIFY_TREE);
-
 		err = btr_cur_pessimistic_update(
 			BTR_NO_LOCKING_FLAG
 			| BTR_NO_UNDO_LOG_FLAG
 			| BTR_KEEP_SYS_FLAG,
-			btr_cur, &heap, &dummy_big_rec, node->update,
-			node->cmpl_info, thr, mtr);
+			btr_cur, offsets, offsets_heap, heap,
+			&dummy_big_rec, node->update,
+			node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
 
 		ut_a(!dummy_big_rec);
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
 	}
 
 	return(err);
@@ -134,8 +153,8 @@ delete-marked record and there no longer exist transactions
 that would see the delete-marked record.  In other words, we
 roll back the insert by purging the record.
 @return	DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_undo_mod_remove_clust_low(
 /*==========================*/
 	undo_node_t*	node,	/*!< in: row undo node */
@@ -144,7 +163,7 @@ row_undo_mod_remove_clust_low(
 	ulint		mode)	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
 {
 	btr_cur_t*	btr_cur;
-	ulint		err;
+	dberr_t		err;
 
 	ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
 
@@ -159,8 +178,14 @@ row_undo_mod_remove_clust_low(
 
 	btr_cur = btr_pcur_get_btr_cur(&node->pcur);
 
+	/* We are about to remove an old, delete-marked version of the
+	record that may have been delete-marked by a different transaction
+	than the rolling-back one. */
+	ut_ad(rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
+				   dict_table_is_comp(node->table)));
+
 	if (mode == BTR_MODIFY_LEAF) {
-		err = btr_cur_optimistic_delete(btr_cur, mtr)
+		err = btr_cur_optimistic_delete(btr_cur, 0, mtr)
 			? DB_SUCCESS
 			: DB_FAIL;
 	} else {
@@ -169,7 +194,7 @@ row_undo_mod_remove_clust_low(
 		/* This operation is analogous to purge, we can free also
 		inherited externally stored fields */
 
-		btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
 					   thr_is_recv(thr)
 					   ? RB_RECOVERY_PURGE_REC
 					   : RB_NONE, mtr);
@@ -186,8 +211,8 @@ row_undo_mod_remove_clust_low(
 Undoes a modify in a clustered index record. Sets also the node state for the
 next round of undo.
 @return	DB_SUCCESS or error code: we may run out of file space */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_undo_mod_clust(
 /*===============*/
 	undo_node_t*	node,	/*!< in: row undo node */
@@ -195,21 +220,42 @@ row_undo_mod_clust(
 {
 	btr_pcur_t*	pcur;
 	mtr_t		mtr;
-	ulint		err;
+	dberr_t		err;
+	dict_index_t*	index;
+	bool		online;
 
-	ut_ad(node && thr);
+	ut_ad(thr_get_trx(thr) == node->trx);
+	ut_ad(node->trx->dict_operation_lock_mode);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)
+	      || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
 
 	log_free_check();
+	pcur = &node->pcur;
+	index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur));
 
+	mtr_start(&mtr);
 
-	pcur = &(node->pcur);
+	online = dict_index_is_online_ddl(index);
+	if (online) {
+		ut_ad(node->trx->dict_operation_lock_mode != RW_X_LATCH);
+		mtr_s_lock(dict_index_get_lock(index), &mtr);
+	}
 
-	mtr_start(&mtr);
+	mem_heap_t*	heap		= mem_heap_create(1024);
+	mem_heap_t*	offsets_heap	= NULL;
+	ulint*		offsets		= NULL;
+	const dtuple_t*	rebuilt_old_pk;
 
 	/* Try optimistic processing of the record, keeping changes within
 	the index page */
 
-	err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_LEAF);
+	err = row_undo_mod_clust_low(node, &offsets, &offsets_heap,
+				     heap, &rebuilt_old_pk,
+				     thr, &mtr, online
+				     ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
+				     : BTR_MODIFY_LEAF);
 
 	if (err != DB_SUCCESS) {
 		btr_pcur_commit_specify_mtr(pcur, &mtr);
@@ -219,7 +265,40 @@ row_undo_mod_clust(
 
 		mtr_start(&mtr);
 
-		err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_TREE);
+		err = row_undo_mod_clust_low(
+			node, &offsets, &offsets_heap, heap, &rebuilt_old_pk,
+			thr, &mtr, BTR_MODIFY_TREE);
+		ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE);
+	}
+
+	/* Online rebuild cannot be initiated while we are holding
+	dict_operation_lock and index->lock. (It can be aborted.) */
+	ut_ad(online || !dict_index_is_online_ddl(index));
+
+	if (err == DB_SUCCESS && online) {
+#ifdef UNIV_SYNC_DEBUG
+		ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
+		      || rw_lock_own(&index->lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+		switch (node->rec_type) {
+		case TRX_UNDO_DEL_MARK_REC:
+			row_log_table_insert(
+				btr_pcur_get_rec(pcur), index, offsets);
+			break;
+		case TRX_UNDO_UPD_EXIST_REC:
+			row_log_table_update(
+				btr_pcur_get_rec(pcur), index, offsets,
+				rebuilt_old_pk);
+			break;
+		case TRX_UNDO_UPD_DEL_REC:
+			row_log_table_delete(
+				btr_pcur_get_rec(pcur), index, offsets,
+				node->trx->id);
+			break;
+		default:
+			ut_ad(0);
+			break;
+		}
 	}
 
 	btr_pcur_commit_specify_mtr(pcur, &mtr);
@@ -228,8 +307,11 @@ row_undo_mod_clust(
 
 		mtr_start(&mtr);
 
-		err = row_undo_mod_remove_clust_low(node, thr, &mtr,
-						    BTR_MODIFY_LEAF);
+		/* It is not necessary to call row_log_table,
+		because the record is delete-marked and would thus
+		be omitted from the rebuilt copy of the table. */
+		err = row_undo_mod_remove_clust_low(
+			node, thr, &mtr, BTR_MODIFY_LEAF);
 		if (err != DB_SUCCESS) {
 			btr_pcur_commit_specify_mtr(pcur, &mtr);
 
@@ -240,6 +322,9 @@ row_undo_mod_clust(
 
 			err = row_undo_mod_remove_clust_low(node, thr, &mtr,
 							    BTR_MODIFY_TREE);
+
+			ut_ad(err == DB_SUCCESS
+			      || err == DB_OUT_OF_FILE_SPACE);
 		}
 
 		btr_pcur_commit_specify_mtr(pcur, &mtr);
@@ -249,14 +334,18 @@ row_undo_mod_clust(
 
 	trx_undo_rec_release(node->trx, node->undo_no);
 
+	if (offsets_heap) {
+		mem_heap_free(offsets_heap);
+	}
+	mem_heap_free(heap);
 	return(err);
 }
 
 /***********************************************************//**
 Delete marks or removes a secondary index entry if found.
 @return	DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_undo_mod_del_mark_or_remove_sec_low(
 /*====================================*/
 	undo_node_t*	node,	/*!< in: row undo node */
@@ -270,7 +359,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
 	btr_cur_t*		btr_cur;
 	ibool			success;
 	ibool			old_has;
-	ulint			err;
+	dberr_t			err	= DB_SUCCESS;
 	mtr_t			mtr;
 	mtr_t			mtr_vers;
 	enum row_search_result	search_result;
@@ -278,9 +367,30 @@ row_undo_mod_del_mark_or_remove_sec_low(
 	log_free_check();
 	mtr_start(&mtr);
 
-	btr_cur = btr_pcur_get_btr_cur(&pcur);
+	if (*index->name == TEMP_INDEX_PREFIX) {
+		/* The index->online_status may change if the
+		index->name starts with TEMP_INDEX_PREFIX (meaning
+		that the index is or was being created online). It is
+		protected by index->lock. */
+		if (mode == BTR_MODIFY_LEAF) {
+			mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
+			mtr_s_lock(dict_index_get_lock(index), &mtr);
+		} else {
+			ut_ad(mode == BTR_MODIFY_TREE);
+			mtr_x_lock(dict_index_get_lock(index), &mtr);
+		}
+
+		if (row_log_online_op_try(index, entry, 0)) {
+			goto func_exit_no_pcur;
+		}
+	} else {
+		/* For secondary indexes,
+		index->online_status==ONLINE_INDEX_CREATION unless
+		index->name starts with TEMP_INDEX_PREFIX. */
+		ut_ad(!dict_index_is_online_ddl(index));
+	}
 
-	ut_ad(mode == BTR_MODIFY_TREE || mode == BTR_MODIFY_LEAF);
+	btr_cur = btr_pcur_get_btr_cur(&pcur);
 
 	search_result = row_search_index_entry(index, entry, mode,
 					       &pcur, &mtr);
@@ -296,8 +406,6 @@ row_undo_mod_del_mark_or_remove_sec_low(
 		In normal processing, if an update ends in a deadlock
 		before it has inserted all updated secondary index
 		records, then the undo will not find those records. */
-
-		err = DB_SUCCESS;
 		goto func_exit;
 	case ROW_FOUND:
 		break;
@@ -329,16 +437,14 @@ row_undo_mod_del_mark_or_remove_sec_low(
 	} else {
 		/* Remove the index record */
 
-		if (mode == BTR_MODIFY_LEAF) {
-			success = btr_cur_optimistic_delete(btr_cur, &mtr);
+		if (mode != BTR_MODIFY_TREE) {
+			success = btr_cur_optimistic_delete(btr_cur, 0, &mtr);
 			if (success) {
 				err = DB_SUCCESS;
 			} else {
 				err = DB_FAIL;
 			}
 		} else {
-			ut_ad(mode == BTR_MODIFY_TREE);
-
 			/* No need to distinguish RB_RECOVERY_PURGE here,
 			because we are deleting a secondary index record:
 			the distinction between RB_NORMAL and
@@ -346,7 +452,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
 			record that contains externally stored
 			columns. */
 			ut_ad(!dict_index_is_clust(index));
-			btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+			btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
 						   RB_NORMAL, &mtr);
 
 			/* The delete operation may fail if we have little
@@ -359,6 +465,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
 
 func_exit:
 	btr_pcur_close(&pcur);
+func_exit_no_pcur:
 	mtr_commit(&mtr);
 
 	return(err);
@@ -373,8 +480,8 @@ not cause problems because in row0sel.cc, in queries we always retrieve the
 clustered index record or an earlier version of it, if the secondary index
 record through which we do the search is delete-marked.
 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_undo_mod_del_mark_or_remove_sec(
 /*================================*/
 	undo_node_t*	node,	/*!< in: row undo node */
@@ -382,7 +489,7 @@ row_undo_mod_del_mark_or_remove_sec(
 	dict_index_t*	index,	/*!< in: index */
 	dtuple_t*	entry)	/*!< in: index entry */
 {
-	ulint	err;
+	dberr_t	err;
 
 	err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
 						      entry, BTR_MODIFY_LEAF);
@@ -401,42 +508,67 @@ Delete unmarks a secondary index entry which must be found. It might not be
 delete-marked at the moment, but it does not harm to unmark it anyway. We also
 need to update the fields of the secondary index record if we updated its
 fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'.
-@return	DB_FAIL or DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+@retval	DB_SUCCESS on success
+@retval	DB_FAIL if BTR_MODIFY_TREE should be tried
+@retval	DB_OUT_OF_FILE_SPACE when running out of tablespace
+@retval	DB_DUPLICATE_KEY if the value was missing
+	and an insert would lead to a duplicate exists */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_undo_mod_del_unmark_sec_and_undo_update(
 /*========================================*/
 	ulint		mode,	/*!< in: search mode: BTR_MODIFY_LEAF or
 				BTR_MODIFY_TREE */
 	que_thr_t*	thr,	/*!< in: query thread */
 	dict_index_t*	index,	/*!< in: index */
-	const dtuple_t*	entry)	/*!< in: index entry */
+	dtuple_t*	entry)	/*!< in: index entry */
 {
-	mem_heap_t*		heap;
 	btr_pcur_t		pcur;
-	btr_cur_t*		btr_cur;
+	btr_cur_t*		btr_cur		= btr_pcur_get_btr_cur(&pcur);
 	upd_t*			update;
-	ulint			err		= DB_SUCCESS;
+	dberr_t			err		= DB_SUCCESS;
 	big_rec_t*		dummy_big_rec;
 	mtr_t			mtr;
 	trx_t*			trx		= thr_get_trx(thr);
+	const ulint		flags
+		= BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG;
 	enum row_search_result	search_result;
 
-	/* Ignore indexes that are being created. */
-	if (UNIV_UNLIKELY(*index->name == TEMP_INDEX_PREFIX)) {
-
-		return(DB_SUCCESS);
-	}
+	ut_ad(trx->id);
 
 	log_free_check();
 	mtr_start(&mtr);
 
-	ut_ad(mode == BTR_MODIFY_TREE || mode == BTR_MODIFY_LEAF);
+	if (*index->name == TEMP_INDEX_PREFIX) {
+		/* The index->online_status may change if the
+		index->name starts with TEMP_INDEX_PREFIX (meaning
+		that the index is or was being created online). It is
+		protected by index->lock. */
+		if (mode == BTR_MODIFY_LEAF) {
+			mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
+			mtr_s_lock(dict_index_get_lock(index), &mtr);
+		} else {
+			ut_ad(mode == BTR_MODIFY_TREE);
+			mtr_x_lock(dict_index_get_lock(index), &mtr);
+		}
+
+		if (row_log_online_op_try(index, entry, trx->id)) {
+			goto func_exit_no_pcur;
+		}
+	} else {
+		/* For secondary indexes,
+		index->online_status==ONLINE_INDEX_CREATION unless
+		index->name starts with TEMP_INDEX_PREFIX. */
+		ut_ad(!dict_index_is_online_ddl(index));
+	}
 
 	search_result = row_search_index_entry(index, entry, mode,
 					       &pcur, &mtr);
 
 	switch (search_result) {
+		mem_heap_t*	heap;
+		mem_heap_t*	offsets_heap;
+		ulint*		offsets;
 	case ROW_BUFFERED:
 	case ROW_NOT_DELETED_REF:
 		/* These are invalid outcomes, because the mode passed
@@ -444,80 +576,183 @@ row_undo_mod_del_unmark_sec_and_undo_update(
 		flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
 		ut_error;
 	case ROW_NOT_FOUND:
-		fputs("InnoDB: error in sec index entry del undo in\n"
-		      "InnoDB: ", stderr);
-		dict_index_name_print(stderr, trx, index);
-		fputs("\n"
-		      "InnoDB: tuple ", stderr);
-		dtuple_print(stderr, entry);
-		fputs("\n"
-		      "InnoDB: record ", stderr);
-		rec_print(stderr, btr_pcur_get_rec(&pcur), index);
-		putc('\n', stderr);
-		trx_print(stderr, trx, 0);
-		fputs("\n"
-		      "InnoDB: Submit a detailed bug report"
-		      " to http://bugs.mysql.com\n", stderr);
-		ut_ad(0);
+		if (*index->name != TEMP_INDEX_PREFIX) {
+			/* During online secondary index creation, it
+			is possible that MySQL is waiting for a
+			meta-data lock upgrade before invoking
+			ha_innobase::commit_inplace_alter_table()
+			while this ROLLBACK is executing. InnoDB has
+			finished building the index, but it does not
+			yet exist in MySQL. In this case, we suppress
+			the printout to the error log. */
+			fputs("InnoDB: error in sec index entry del undo in\n"
+			      "InnoDB: ", stderr);
+			dict_index_name_print(stderr, trx, index);
+			fputs("\n"
+			      "InnoDB: tuple ", stderr);
+			dtuple_print(stderr, entry);
+			fputs("\n"
+			      "InnoDB: record ", stderr);
+			rec_print(stderr, btr_pcur_get_rec(&pcur), index);
+			putc('\n', stderr);
+			trx_print(stderr, trx, 0);
+			fputs("\n"
+			      "InnoDB: Submit a detailed bug report"
+			      " to http://bugs.mysql.com\n", stderr);
+
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"record in index %s was not found"
+				" on rollback, trying to insert",
+				index->name);
+		}
+
+		if (btr_cur->up_match >= dict_index_get_n_unique(index)
+		    || btr_cur->low_match >= dict_index_get_n_unique(index)) {
+			if (*index->name != TEMP_INDEX_PREFIX) {
+				ib_logf(IB_LOG_LEVEL_WARN,
+					"record in index %s was not found on"
+					" rollback, and a duplicate exists",
+					index->name);
+			}
+			err = DB_DUPLICATE_KEY;
+			break;
+		}
+
+		/* Insert the missing record that we were trying to
+		delete-unmark. */
+		big_rec_t*	big_rec;
+		rec_t*		insert_rec;
+		offsets = NULL;
+		offsets_heap = NULL;
+
+		err = btr_cur_optimistic_insert(
+			flags, btr_cur, &offsets, &offsets_heap,
+			entry, &insert_rec, &big_rec,
+			0, thr, &mtr);
+		ut_ad(!big_rec);
+
+		if (err == DB_FAIL && mode == BTR_MODIFY_TREE) {
+			err = btr_cur_pessimistic_insert(
+				flags, btr_cur,
+				&offsets, &offsets_heap,
+				entry, &insert_rec, &big_rec,
+				0, thr, &mtr);
+			/* There are no off-page columns in
+			secondary indexes. */
+			ut_ad(!big_rec);
+		}
+
+		if (err == DB_SUCCESS) {
+			page_update_max_trx_id(
+				btr_cur_get_block(btr_cur),
+				btr_cur_get_page_zip(btr_cur),
+				trx->id, &mtr);
+		}
+
+		if (offsets_heap) {
+			mem_heap_free(offsets_heap);
+		}
+
 		break;
 	case ROW_FOUND:
-		btr_cur = btr_pcur_get_btr_cur(&pcur);
-		err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
-						   btr_cur, FALSE, thr, &mtr);
+		err = btr_cur_del_mark_set_sec_rec(
+			BTR_NO_LOCKING_FLAG,
+			btr_cur, FALSE, thr, &mtr);
 		ut_a(err == DB_SUCCESS);
-		heap = mem_heap_create(100);
-
+		heap = mem_heap_create(
+			sizeof(upd_t)
+			+ dtuple_get_n_fields(entry) * sizeof(upd_field_t));
+		offsets_heap = NULL;
+		offsets = rec_get_offsets(
+			btr_cur_get_rec(btr_cur),
+			index, NULL, ULINT_UNDEFINED, &offsets_heap);
 		update = row_upd_build_sec_rec_difference_binary(
-			index, entry, btr_cur_get_rec(btr_cur), trx, heap);
+			btr_cur_get_rec(btr_cur), index, offsets, entry, heap);
 		if (upd_get_n_fields(update) == 0) {
 
 			/* Do nothing */
 
-		} else if (mode == BTR_MODIFY_LEAF) {
+		} else if (mode != BTR_MODIFY_TREE) {
 			/* Try an optimistic updating of the record, keeping
 			changes within the page */
 
+			/* TODO: pass offsets, not &offsets */
 			err = btr_cur_optimistic_update(
-				BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
-				btr_cur, update, 0, thr, &mtr);
+				flags, btr_cur, &offsets, &offsets_heap,
+				update, 0, thr, thr_get_trx(thr)->id, &mtr);
 			switch (err) {
 			case DB_OVERFLOW:
 			case DB_UNDERFLOW:
 			case DB_ZIP_OVERFLOW:
 				err = DB_FAIL;
+			default:
+				break;
 			}
 		} else {
-			ut_a(mode == BTR_MODIFY_TREE);
 			err = btr_cur_pessimistic_update(
-				BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
-				btr_cur, &heap, &dummy_big_rec,
-				update, 0, thr, &mtr);
+				flags, btr_cur, &offsets, &offsets_heap,
+				heap, &dummy_big_rec,
+				update, 0, thr, thr_get_trx(thr)->id, &mtr);
 			ut_a(!dummy_big_rec);
 		}
 
 		mem_heap_free(heap);
+		mem_heap_free(offsets_heap);
 	}
 
 	btr_pcur_close(&pcur);
+func_exit_no_pcur:
 	mtr_commit(&mtr);
 
 	return(err);
 }
 
 /***********************************************************//**
+Flags a secondary index corrupted. */
+static __attribute__((nonnull))
+void
+row_undo_mod_sec_flag_corrupted(
+/*============================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	dict_index_t*	index)	/*!< in: secondary index */
+{
+	ut_ad(!dict_index_is_clust(index));
+
+	switch (trx->dict_operation_lock_mode) {
+	case RW_S_LATCH:
+		/* Because row_undo() is holding an S-latch
+		on the data dictionary during normal rollback,
+		we can only mark the index corrupted in the
+		data dictionary cache. TODO: fix this somehow.*/
+		mutex_enter(&dict_sys->mutex);
+		dict_set_corrupted_index_cache_only(index, index->table);
+		mutex_exit(&dict_sys->mutex);
+		break;
+	default:
+		ut_ad(0);
+		/* fall through */
+	case RW_X_LATCH:
+		/* This should be the rollback of a data dictionary
+		transaction. */
+		dict_set_corrupted(index, trx, "rollback");
+	}
+}
+
+/***********************************************************//**
 Undoes a modify in secondary indexes when undo record type is UPD_DEL.
 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_undo_mod_upd_del_sec(
 /*=====================*/
 	undo_node_t*	node,	/*!< in: row undo node */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
 	mem_heap_t*	heap;
-	ulint		err	= DB_SUCCESS;
+	dberr_t		err	= DB_SUCCESS;
 
 	ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
+	ut_ad(!node->undo_row);
 
 	heap = mem_heap_create(1024);
 
@@ -530,6 +765,13 @@ row_undo_mod_upd_del_sec(
 			continue;
 		}
 
+		/* During online index creation,
+		HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should
+		guarantee that any active transaction has not modified
+		indexed columns such that col->ord_part was 0 at the
+		time when the undo log record was written. When we get
+		to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
+		it should always cover all affected indexes. */
 		entry = row_build_index_entry(
 			node->row, node->ext, index, heap);
 
@@ -566,15 +808,17 @@ row_undo_mod_upd_del_sec(
 /***********************************************************//**
 Undoes a modify in secondary indexes when undo record type is DEL_MARK.
 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_undo_mod_del_mark_sec(
 /*======================*/
 	undo_node_t*	node,	/*!< in: row undo node */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
 	mem_heap_t*	heap;
-	ulint		err	= DB_SUCCESS;
+	dberr_t		err	= DB_SUCCESS;
+
+	ut_ad(!node->undo_row);
 
 	heap = mem_heap_create(1024);
 
@@ -587,6 +831,13 @@ row_undo_mod_del_mark_sec(
 			continue;
 		}
 
+		/* During online index creation,
+		HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should
+		guarantee that any active transaction has not modified
+		indexed columns such that col->ord_part was 0 at the
+		time when the undo log record was written. When we get
+		to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
+		it should always cover all affected indexes. */
 		entry = row_build_index_entry(
 			node->row, node->ext, index, heap);
 
@@ -599,8 +850,17 @@ row_undo_mod_del_mark_sec(
 				BTR_MODIFY_TREE, thr, index, entry);
 		}
 
-		if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
-
+		if (err == DB_DUPLICATE_KEY) {
+			row_undo_mod_sec_flag_corrupted(
+				thr_get_trx(thr), index);
+			err = DB_SUCCESS;
+			/* Do not return any error to the caller. The
+			duplicate will be reported by ALTER TABLE or
+			CREATE UNIQUE INDEX. Unfortunately we cannot
+			report the duplicate key value to the DDL
+			thread, because the altered_table object is
+			private to its call stack. */
+		} else if (err != DB_SUCCESS) {
 			break;
 		}
 
@@ -616,18 +876,18 @@ row_undo_mod_del_mark_sec(
 /***********************************************************//**
 Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_undo_mod_upd_exist_sec(
 /*=======================*/
 	undo_node_t*	node,	/*!< in: row undo node */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
 	mem_heap_t*	heap;
-	ulint		err	= DB_SUCCESS;
+	dberr_t		err	= DB_SUCCESS;
 
 	if (node->index == NULL
-	    || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+	    || ((node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) {
 		/* No change in secondary indexes */
 
 		return(err);
@@ -713,7 +973,11 @@ row_undo_mod_upd_exist_sec(
 				BTR_MODIFY_TREE, thr, index, entry);
 		}
 
-		if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+		if (err == DB_DUPLICATE_KEY) {
+			row_undo_mod_sec_flag_corrupted(
+				thr_get_trx(thr), index);
+			err = DB_SUCCESS;
+		} else if (err != DB_SUCCESS) {
 			break;
 		}
 
@@ -728,12 +992,11 @@ row_undo_mod_upd_exist_sec(
 
 /***********************************************************//**
 Parses the row reference and other info in a modify undo log record. */
-static
+static __attribute__((nonnull))
 void
 row_undo_mod_parse_undo_rec(
 /*========================*/
 	undo_node_t*	node,		/*!< in: row undo node */
-	que_thr_t*	thr,		/*!< in: query thread */
 	ibool		dict_locked)	/*!< in: TRUE if own dict_sys->mutex */
 {
 	dict_index_t*	clust_index;
@@ -745,16 +1008,13 @@ row_undo_mod_parse_undo_rec(
 	ulint		info_bits;
 	ulint		type;
 	ulint		cmpl_info;
-	ibool		dummy_extern;
-	trx_t*		trx;
+	bool		dummy_extern;
 
-	ut_ad(node && thr);
-	trx = thr_get_trx(thr);
 	ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
 				    &dummy_extern, &undo_no, &table_id);
 	node->rec_type = type;
 
-	node->table = dict_table_open_on_id(table_id, dict_locked);
+	node->table = dict_table_open_on_id(table_id, dict_locked, FALSE);
 
 	/* TODO: other fixes associated with DROP TABLE + rollback in the
 	same table by another user */
@@ -765,7 +1025,7 @@ row_undo_mod_parse_undo_rec(
 	}
 
 	if (node->table->ibd_file_missing) {
-		dict_table_close(node->table, dict_locked);
+		dict_table_close(node->table, dict_locked, FALSE);
 
 		/* We skip undo operations to missing .ibd files */
 		node->table = NULL;
@@ -782,14 +1042,14 @@ row_undo_mod_parse_undo_rec(
 				       node->heap);
 
 	trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
-				       roll_ptr, info_bits, trx,
+				       roll_ptr, info_bits, node->trx,
 				       node->heap, &(node->update));
 	node->new_trx_id = trx_id;
 	node->cmpl_info = cmpl_info;
 
 	if (!row_undo_search_clust_to_pcur(node)) {
 
-		dict_table_close(node->table, dict_locked);
+		dict_table_close(node->table, dict_locked, FALSE);
 
 		node->table = NULL;
 	}
@@ -799,21 +1059,23 @@ row_undo_mod_parse_undo_rec(
 Undoes a modify operation on a row of a table.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 row_undo_mod(
 /*=========*/
 	undo_node_t*	node,	/*!< in: row undo node */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
-	ulint		err;
-	ibool		dict_locked;
+	dberr_t	err;
+	ibool	dict_locked;
 
 	ut_ad(node && thr);
 	ut_ad(node->state == UNDO_NODE_MODIFY);
 
 	dict_locked = thr_get_trx(thr)->dict_operation_lock_mode == RW_X_LATCH;
 
-	row_undo_mod_parse_undo_rec(node, thr, dict_locked);
+	ut_ad(thr_get_trx(thr) == node->trx);
+
+	row_undo_mod_parse_undo_rec(node, dict_locked);
 
 	if (node->table == NULL) {
 		/* It is already undone, or will be undone by another query
@@ -825,8 +1087,18 @@ row_undo_mod(
 		return(DB_SUCCESS);
 	}
 
-	node->index = dict_table_get_next_index(
-		dict_table_get_first_index(node->table));
+	node->index = dict_table_get_first_index(node->table);
+	ut_ad(dict_index_is_clust(node->index));
+
+	if (dict_index_is_online_ddl(node->index)) {
+		/* Note that we are rolling back this transaction, so
+		that all inserts and updates with this DB_TRX_ID can
+		be skipped. */
+		row_log_table_rollback(node->index, node->trx->id);
+	}
+
+	/* Skip the clustered index (the first index) */
+	node->index = dict_table_get_next_index(node->index);
 
 	/* Skip all corrupted secondary index */
 	dict_table_skip_corrupt_index(node->index);
@@ -851,7 +1123,7 @@ row_undo_mod(
 		err = row_undo_mod_clust(node, thr);
 	}
 
-	dict_table_close(node->table, dict_locked);
+	dict_table_close(node->table, dict_locked, FALSE);
 
 	node->table = NULL;
 
diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc
index 757d3544ba4..9977a1e8f04 100644
--- a/storage/innobase/row/row0undo.cc
+++ b/storage/innobase/row/row0undo.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -216,8 +216,9 @@ row_undo_search_clust_to_pcur(
 		}
 
 		node->row = row_build(ROW_COPY_DATA, clust_index, rec,
-				      offsets, NULL, ext, node->heap);
-		if (node->update) {
+				      offsets, NULL,
+				      NULL, NULL, ext, node->heap);
+		if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
 			node->undo_row = dtuple_copy(node->row, node->heap);
 			row_upd_replace(node->undo_row, &node->undo_ext,
 					clust_index, node->update, node->heap);
@@ -244,14 +245,14 @@ Fetches an undo log record and does the undo for the recorded operation.
 If none left, or a partial rollback completed, returns control to the
 parent node, which is always a query thread node.
 @return	DB_SUCCESS if operation successfully completed, else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_undo(
 /*=====*/
 	undo_node_t*	node,	/*!< in: row undo node */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
-	ulint		err;
+	dberr_t		err;
 	trx_t*		trx;
 	roll_ptr_t	roll_ptr;
 	ibool		locked_data_dict;
@@ -332,7 +333,7 @@ row_undo_step(
 /*==========*/
 	que_thr_t*	thr)	/*!< in: query thread */
 {
-	ulint		err;
+	dberr_t		err;
 	undo_node_t*	node;
 	trx_t*		trx;
 
@@ -348,17 +349,17 @@ row_undo_step(
 
 	err = row_undo(node, thr);
 
-	trx->error_state = static_cast<enum db_err>(err);
+	trx->error_state = err;
 
 	if (err != DB_SUCCESS) {
 		/* SQL error detected */
 
-		fprintf(stderr, "InnoDB: Fatal error %lu in rollback.\n",
-			(ulong) err);
+		fprintf(stderr, "InnoDB: Fatal error (%s) in rollback.\n",
+			ut_strerr(err));
 
 		if (err == DB_OUT_OF_FILE_SPACE) {
 			fprintf(stderr,
-				"InnoDB: Error 13 means out of tablespace.\n"
+				"InnoDB: Out of tablespace.\n"
 				"InnoDB: Consider increasing"
 				" your tablespace.\n");
 
diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
index 28faa59add8..f97c0c3c82b 100644
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@@ -23,14 +23,13 @@ Update of a row
 Created 12/27/1996 Heikki Tuuri
 *******************************************************/
 
-#include "m_string.h" /* for my_sys.h */
-#include "my_sys.h" /* DEBUG_SYNC_C */
 #include "row0upd.h"
 
 #ifdef UNIV_NONINL
 #include "row0upd.ic"
 #endif
 
+#include "ha_prototypes.h"
 #include "dict0dict.h"
 #include "trx0undo.h"
 #include "rem0rec.h"
@@ -43,8 +42,9 @@ Created 12/27/1996 Heikki Tuuri
 #include "que0que.h"
 #include "row0ext.h"
 #include "row0ins.h"
-#include "row0sel.h"
+#include "row0log.h"
 #include "row0row.h"
+#include "row0sel.h"
 #include "rem0cmp.h"
 #include "lock0lock.h"
 #include "log0log.h"
@@ -178,8 +178,8 @@ NOTE that this function will temporarily commit mtr and lose the
 pcur position!
 
 @return	DB_SUCCESS or an error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_upd_check_references_constraints(
 /*=================================*/
 	upd_node_t*	node,	/*!< in: row update node */
@@ -197,7 +197,7 @@ row_upd_check_references_constraints(
 	trx_t*		trx;
 	const rec_t*	rec;
 	ulint		n_ext;
-	ulint		err;
+	dberr_t		err;
 	ibool		got_s_lock	= FALSE;
 
 	if (UT_LIST_GET_FIRST(table->referenced_list) == NULL) {
@@ -212,11 +212,12 @@ row_upd_check_references_constraints(
 
 	heap = mem_heap_create(500);
 
-	entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
-				       &n_ext, heap);
+	entry = row_rec_to_index_entry(rec, index, offsets, &n_ext, heap);
 
 	mtr_commit(mtr);
 
+	DEBUG_SYNC_C("foreign_constraint_check_for_update");
+
 	mtr_start(mtr);
 
 	if (trx->dict_operation_lock_mode == 0) {
@@ -225,6 +226,7 @@ row_upd_check_references_constraints(
 		row_mysql_freeze_data_dictionary(trx);
 	}
 
+run_again:
 	foreign = UT_LIST_GET_FIRST(table->referenced_list);
 
 	while (foreign) {
@@ -238,18 +240,20 @@ row_upd_check_references_constraints(
 			|| row_upd_changes_first_fields_binary(
 				entry, index, node->update,
 				foreign->n_fields))) {
+			dict_table_t*	foreign_table = foreign->foreign_table;
 
 			dict_table_t*	ref_table = NULL;
 
-			if (foreign->foreign_table == NULL) {
+			if (foreign_table == NULL) {
 
 				ref_table = dict_table_open_on_name(
-					foreign->foreign_table_name_lookup, FALSE);
+					foreign->foreign_table_name_lookup,
+					FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 			}
 
-			if (foreign->foreign_table) {
+			if (foreign_table) {
 				os_inc_counter(dict_sys->mutex,
-					       foreign->foreign_table
+					       foreign_table
 					       ->n_foreign_key_checks_running);
 			}
 
@@ -261,18 +265,20 @@ row_upd_check_references_constraints(
 			err = row_ins_check_foreign_constraint(
 				FALSE, foreign, table, entry, thr);
 
-			if (foreign->foreign_table) {
+			if (foreign_table) {
 				os_dec_counter(dict_sys->mutex,
-					       foreign->foreign_table
+					       foreign_table
 					       ->n_foreign_key_checks_running);
 			}
 
 			if (ref_table != NULL) {
-				dict_table_close(ref_table, FALSE);
+				dict_table_close(ref_table, FALSE, FALSE);
 			}
 
-			if (err != DB_SUCCESS) {
-
+			/* Some table foreign key dropped, try again */
+			if (err == DB_DICT_CHANGED) {
+				goto run_again;
+			} else if (err != DB_SUCCESS) {
 				goto func_exit;
 			}
 		}
@@ -289,6 +295,8 @@ func_exit:
 
 	mem_heap_free(heap);
 
+	DEBUG_SYNC_C("foreign_constraint_check_for_update_done");
+
 	return(err);
 }
 
@@ -465,6 +473,47 @@ row_upd_changes_field_size_or_external(
 
 	return(FALSE);
 }
+
+/***********************************************************//**
+Returns true if row update contains disowned external fields.
+@return true if the update contains disowned external fields. */
+UNIV_INTERN
+bool
+row_upd_changes_disowned_external(
+/*==============================*/
+	const upd_t*	update)	/*!< in: update vector */
+{
+	const upd_field_t*	upd_field;
+	const dfield_t*		new_val;
+	ulint			new_len;
+	ulint                   n_fields;
+	ulint			i;
+
+	n_fields = upd_get_n_fields(update);
+
+	for (i = 0; i < n_fields; i++) {
+		const byte*	field_ref;
+
+		upd_field = upd_get_nth_field(update, i);
+		new_val = &(upd_field->new_val);
+		new_len = dfield_get_len(new_val);
+
+		if (!dfield_is_ext(new_val)) {
+			continue;
+		}
+
+		ut_ad(new_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+		field_ref = static_cast<const byte*>(dfield_get_data(new_val))
+			    + new_len - BTR_EXTERN_FIELD_REF_SIZE;
+
+		if (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG) {
+			return(true);
+		}
+	}
+
+	return(false);
+}
 #endif /* !UNIV_HOTBACKUP */
 
 /***********************************************************//**
@@ -560,7 +609,7 @@ byte*
 row_upd_write_sys_vals_to_log(
 /*==========================*/
 	dict_index_t*	index,	/*!< in: clustered index */
-	trx_t*		trx,	/*!< in: transaction */
+	trx_id_t	trx_id,	/*!< in: transaction id */
 	roll_ptr_t	roll_ptr,/*!< in: roll ptr of the undo log record */
 	byte*		log_ptr,/*!< pointer to a buffer of size > 20 opened
 				in mlog */
@@ -576,7 +625,7 @@ row_upd_write_sys_vals_to_log(
 	trx_write_roll_ptr(log_ptr, roll_ptr);
 	log_ptr += DATA_ROLL_PTR_LEN;
 
-	log_ptr += mach_ull_write_compressed(log_ptr, trx->id);
+	log_ptr += mach_ull_write_compressed(log_ptr, trx_id);
 
 	return(log_ptr);
 }
@@ -779,10 +828,10 @@ UNIV_INTERN
 upd_t*
 row_upd_build_sec_rec_difference_binary(
 /*====================================*/
+	const rec_t*	rec,	/*!< in: secondary index record */
 	dict_index_t*	index,	/*!< in: index */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
 	const dtuple_t*	entry,	/*!< in: entry to insert */
-	const rec_t*	rec,	/*!< in: secondary index record */
-	trx_t*		trx,	/*!< in: transaction */
 	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
 {
 	upd_field_t*	upd_field;
@@ -792,18 +841,16 @@ row_upd_build_sec_rec_difference_binary(
 	upd_t*		update;
 	ulint		n_diff;
 	ulint		i;
-	ulint		offsets_[REC_OFFS_SMALL_SIZE];
-	const ulint*	offsets;
-	rec_offs_init(offsets_);
 
 	/* This function is used only for a secondary index */
 	ut_a(!dict_index_is_clust(index));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(rec_offs_n_fields(offsets) == dtuple_get_n_fields(entry));
+	ut_ad(!rec_offs_any_extern(offsets));
 
 	update = upd_create(dtuple_get_n_fields(entry), heap);
 
 	n_diff = 0;
-	offsets = rec_get_offsets(rec, index, offsets_,
-				  ULINT_UNDEFINED, &heap);
 
 	for (i = 0; i < dtuple_get_n_fields(entry); i++) {
 
@@ -828,7 +875,7 @@ row_upd_build_sec_rec_difference_binary(
 
 			dfield_copy(&(upd_field->new_val), dfield);
 
-			upd_field_set_field_no(upd_field, i, index, trx);
+			upd_field_set_field_no(upd_field, i, index, NULL);
 
 			n_diff++;
 		}
@@ -846,12 +893,15 @@ the equal ordering fields. NOTE: we compare the fields as binary strings!
 @return own: update vector of differing fields, excluding roll ptr and
 trx id */
 UNIV_INTERN
-upd_t*
+const upd_t*
 row_upd_build_difference_binary(
 /*============================*/
 	dict_index_t*	index,	/*!< in: clustered index */
 	const dtuple_t*	entry,	/*!< in: entry to insert */
 	const rec_t*	rec,	/*!< in: clustered index record */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index), or NULL */
+	bool		no_sys,	/*!< in: skip the system columns
+				DB_TRX_ID and DB_ROLL_PTR */
 	trx_t*		trx,	/*!< in: transaction */
 	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
 {
@@ -861,11 +911,9 @@ row_upd_build_difference_binary(
 	ulint		len;
 	upd_t*		update;
 	ulint		n_diff;
-	ulint		roll_ptr_pos;
 	ulint		trx_id_pos;
 	ulint		i;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	const ulint*	offsets;
 	rec_offs_init(offsets_);
 
 	/* This function is used only for a clustered index */
@@ -875,11 +923,16 @@ row_upd_build_difference_binary(
 
 	n_diff = 0;
 
-	roll_ptr_pos = dict_index_get_sys_col_pos(index, DATA_ROLL_PTR);
 	trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+	ut_ad(dict_index_get_sys_col_pos(index, DATA_ROLL_PTR)
+	      == trx_id_pos + 1);
 
-	offsets = rec_get_offsets(rec, index, offsets_,
-				  ULINT_UNDEFINED, &heap);
+	if (!offsets) {
+		offsets = rec_get_offsets(rec, index, offsets_,
+					  ULINT_UNDEFINED, &heap);
+	} else {
+		ut_ad(rec_offs_validate(rec, index, offsets));
+	}
 
 	for (i = 0; i < dtuple_get_n_fields(entry); i++) {
 
@@ -890,9 +943,9 @@ row_upd_build_difference_binary(
 		/* NOTE: we compare the fields as binary strings!
 		(No collation) */
 
-		if (i == trx_id_pos || i == roll_ptr_pos) {
+		if (no_sys && (i == trx_id_pos || i == trx_id_pos + 1)) {
 
-			goto skip_compare;
+			continue;
 		}
 
 		if (!dfield_is_ext(dfield)
@@ -907,8 +960,6 @@ row_upd_build_difference_binary(
 
 			n_diff++;
 		}
-skip_compare:
-		;
 	}
 
 	update->n_fields = n_diff;
@@ -1386,9 +1437,9 @@ row_upd_changes_some_index_ord_field_binary(
 
 /***********************************************************//**
 Checks if an FTS Doc ID column is affected by an UPDATE.
-@return TRUE if the Doc ID column is changed */
+@return whether the Doc ID column is changed */
 UNIV_INTERN
-ulint
+bool
 row_upd_changes_doc_id(
 /*===================*/
 	dict_table_t*	table,		/*!< in: table */
@@ -1431,61 +1482,6 @@ row_upd_changes_fts_column(
 }
 
 /***********************************************************//**
-Checks if an update vector changes the table's FTS-indexed columns.
-NOTE: must not be called for tables which do not have an FTS-index.
-Also, the vector returned must be explicitly freed as it's allocated
-using the ut_malloc() allocator.
-@return vector of FTS indexes that were affected by the update */
-UNIV_INTERN
-ib_vector_t*
-row_upd_changes_fts_columns(
-/*========================*/
-	dict_table_t*	table,		/*!< in: table */
-	upd_t*		update)		/*!< in: update vector for the row */
-{
-	ulint		i;
-	ulint		offset;
-	fts_t*		fts = table->fts;
-	ib_vector_t*	updated_fts_indexes = NULL;
-
-	for (i = 0; i < upd_get_n_fields(update); ++i) {
-		upd_field_t*	upd_field = upd_get_nth_field(update, i);
-
-		offset = row_upd_changes_fts_column(table, upd_field);
-
-		if (offset != ULINT_UNDEFINED) {
-
-			dict_index_t*	index;
-
-			/* TODO: Investigate if we can check whether the
-			existing set of affected indexes matches the new
-			affected set. If matched then we don't need to
-			do the extra malloc()/free(). */
-
-			/* This vector is created from the ut_malloc()
-			allocator because we only want to keep one instance
-			around not matter how many times this row is
-			updated. The old entry should be deleted when
-			we update the FTS row info with this new vector. */
-			if (updated_fts_indexes == NULL) {
-				ib_alloc_t*	ut_alloc;
-
-				ut_alloc = ib_ut_allocator_create();
-
-				updated_fts_indexes = ib_vector_create(
-					ut_alloc, sizeof(dict_index_t*), 2);
-			}
-
-			index = static_cast<dict_index_t*>(
-				ib_vector_getp(fts->indexes, offset));
-			ib_vector_push(updated_fts_indexes, &index);
-		}
-	}
-
-	return(updated_fts_indexes);
-}
-
-/***********************************************************//**
 Checks if an update vector changes some of the first ordering fields of an
 index record. This is only used in foreign key checks and we can assume
 that index does not contain column prefixes.
@@ -1633,7 +1629,7 @@ row_upd_store_row(
 	}
 
 	node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets,
-			      NULL, ext, node->heap);
+			      NULL, NULL, NULL, ext, node->heap);
 	if (node->is_delete) {
 		node->upd_row = NULL;
 		node->upd_ext = NULL;
@@ -1652,8 +1648,8 @@ row_upd_store_row(
 Updates a secondary index entry of a row.
 @return DB_SUCCESS if operation successfully completed, else error
 code or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_upd_sec_index_entry(
 /*====================*/
 	upd_node_t*	node,	/*!< in: row update node */
@@ -1667,11 +1663,13 @@ row_upd_sec_index_entry(
 	dict_index_t*		index;
 	btr_cur_t*		btr_cur;
 	ibool			referenced;
-	ulint			err	= DB_SUCCESS;
+	dberr_t			err	= DB_SUCCESS;
 	trx_t*			trx	= thr_get_trx(thr);
-	ulint			mode	= BTR_MODIFY_LEAF;
+	ulint			mode;
 	enum row_search_result	search_result;
 
+	ut_ad(trx->id);
+
 	index = node->index;
 
 	referenced = row_upd_index_is_referenced(index, trx);
@@ -1682,19 +1680,74 @@ row_upd_sec_index_entry(
 	entry = row_build_index_entry(node->row, node->ext, index, heap);
 	ut_a(entry);
 
+	log_free_check();
+
+#ifdef UNIV_DEBUG
+	/* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
+	Once it is fixed, remove the 'ifdef', 'if' and this comment. */
+	if (!trx->ddl) {
+		DEBUG_SYNC_C_IF_THD(trx->mysql_thd,
+				    "before_row_upd_sec_index_entry");
+	}
+#endif /* UNIV_DEBUG */
+
 	mtr_start(&mtr);
 
+	if (*index->name == TEMP_INDEX_PREFIX) {
+		/* The index->online_status may change if the
+		index->name starts with TEMP_INDEX_PREFIX (meaning
+		that the index is or was being created online). It is
+		protected by index->lock. */
+
+		mtr_s_lock(dict_index_get_lock(index), &mtr);
+
+		switch (dict_index_get_online_status(index)) {
+		case ONLINE_INDEX_COMPLETE:
+			/* This is a normal index. Do not log anything.
+			Perform the update on the index tree directly. */
+			break;
+		case ONLINE_INDEX_CREATION:
+			/* Log a DELETE and optionally INSERT. */
+			row_log_online_op(index, entry, 0);
+
+			if (!node->is_delete) {
+				mem_heap_empty(heap);
+				entry = row_build_index_entry(
+					node->upd_row, node->upd_ext,
+					index, heap);
+				ut_a(entry);
+				row_log_online_op(index, entry, trx->id);
+			}
+			/* fall through */
+		case ONLINE_INDEX_ABORTED:
+		case ONLINE_INDEX_ABORTED_DROPPED:
+			mtr_commit(&mtr);
+			goto func_exit;
+		}
+
+		/* We can only buffer delete-mark operations if there
+		are no foreign key constraints referring to the index. */
+		mode = referenced
+			? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
+			: BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
+			| BTR_DELETE_MARK;
+	} else {
+		/* For secondary indexes,
+		index->online_status==ONLINE_INDEX_CREATION unless
+		index->name starts with TEMP_INDEX_PREFIX. */
+		ut_ad(!dict_index_is_online_ddl(index));
+
+		/* We can only buffer delete-mark operations if there
+		are no foreign key constraints referring to the index. */
+		mode = referenced
+			? BTR_MODIFY_LEAF
+			: BTR_MODIFY_LEAF | BTR_DELETE_MARK;
+	}
+
 	/* Set the query thread, so that ibuf_insert_low() will be
 	able to invoke thd_get_trx(). */
 	btr_pcur_get_btr_cur(&pcur)->thr = thr;
 
-	/* We can only try to use the insert/delete buffer to buffer
-	delete-mark operations if the index we're modifying has no foreign
-	key constraints referring to it. */
-	if (!referenced) {
-		mode |= BTR_DELETE_MARK;
-	}
-
 	search_result = row_search_index_entry(index, entry, mode,
 					       &pcur, &mtr);
 
@@ -1711,6 +1764,20 @@ row_upd_sec_index_entry(
 		break;
 
 	case ROW_NOT_FOUND:
+		if (*index->name == TEMP_INDEX_PREFIX) {
+			/* When online CREATE INDEX copied the update
+			that we already made to the clustered index,
+			and completed the secondary index creation
+			before we got here, the old secondary index
+			record would not exist. The CREATE INDEX
+			should be waiting for a MySQL meta-data lock
+			upgrade at least until this UPDATE
+			returns. After that point, the
+			TEMP_INDEX_PREFIX would be dropped from the
+			index name in commit_inplace_alter_table(). */
+			break;
+		}
+
 		fputs("InnoDB: error in sec index entry update in\n"
 		      "InnoDB: ", stderr);
 		dict_index_name_print(stderr, trx, index);
@@ -1730,11 +1797,9 @@ row_upd_sec_index_entry(
 	case ROW_FOUND:
 		/* Delete mark the old index record; it can already be
 		delete marked if we return after a lock wait in
-		row_ins_index_entry below */
-
+		row_ins_sec_index_entry() below */
 		if (!rec_get_deleted_flag(
-			rec, dict_table_is_comp(index->table))) {
-
+			    rec, dict_table_is_comp(index->table))) {
 			err = btr_cur_del_mark_set_sec_rec(
 				0, btr_cur, TRUE, thr, &mtr);
 
@@ -1764,13 +1829,15 @@ row_upd_sec_index_entry(
 		goto func_exit;
 	}
 
+	mem_heap_empty(heap);
+
 	/* Build a new index entry */
 	entry = row_build_index_entry(node->upd_row, node->upd_ext,
 				      index, heap);
 	ut_a(entry);
 
 	/* Insert new index entry */
-	err = row_ins_index_entry(index, entry, 0, TRUE, thr);
+	err = row_ins_sec_index_entry(index, entry, thr);
 
 func_exit:
 	mem_heap_free(heap);
@@ -1783,8 +1850,8 @@ Updates the secondary index record if it is changed in the row update or
 deletes it if this is a delete.
 @return DB_SUCCESS if operation successfully completed, else error
 code or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_upd_sec_step(
 /*=============*/
 	upd_node_t*	node,	/*!< in: row update node */
@@ -1897,8 +1964,8 @@ fields of the clustered index record change. This should be quite rare in
 database applications.
 @return DB_SUCCESS if operation successfully completed, else error
 code or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_upd_clust_rec_by_insert(
 /*========================*/
 	upd_node_t*	node,	/*!< in/out: row update node */
@@ -1914,7 +1981,7 @@ row_upd_clust_rec_by_insert(
 	trx_t*		trx;
 	dict_table_t*	table;
 	dtuple_t*	entry;
-	ulint		err;
+	dberr_t		err;
 	ibool		change_ownership	= FALSE;
 	rec_t*		rec;
 	ulint*		offsets			= NULL;
@@ -1939,7 +2006,7 @@ row_upd_clust_rec_by_insert(
 	default:
 		ut_error;
 	case UPD_NODE_INSERT_BLOB:
-		/* A lock wait occurred in row_ins_index_entry() in
+		/* A lock wait occurred in row_ins_clust_index_entry() in
 		the previous invocation of this function. Mark the
 		off-page columns in the entry inherited. */
 
@@ -1948,7 +2015,7 @@ row_upd_clust_rec_by_insert(
 		ut_a(change_ownership);
 		/* fall through */
 	case UPD_NODE_INSERT_CLUSTERED:
-		/* A lock wait occurred in row_ins_index_entry() in
+		/* A lock wait occurred in row_ins_clust_index_entry() in
 		the previous invocation of this function. */
 		break;
 	case UPD_NODE_UPDATE_CLUSTERED:
@@ -1961,8 +2028,8 @@ row_upd_clust_rec_by_insert(
 		ut_ad(page_rec_is_user_rec(rec));
 
 		err = btr_cur_del_mark_set_clust_rec(
-			BTR_NO_LOCKING_FLAG, btr_cur_get_block(btr_cur),
-			rec, index, offsets, TRUE, thr, mtr);
+			btr_cur_get_block(btr_cur), rec, index, offsets,
+			thr, mtr);
 		if (err != DB_SUCCESS) {
 err_exit:
 			mtr_commit(mtr);
@@ -1999,9 +2066,9 @@ err_exit:
 
 	mtr_commit(mtr);
 
-	err = row_ins_index_entry(index, entry,
-				  node->upd_ext ? node->upd_ext->n_ext : 0,
-				  TRUE, thr);
+	err = row_ins_clust_index_entry(
+		index, entry, thr,
+		node->upd_ext ? node->upd_ext->n_ext : 0);
 	node->state = change_ownership
 		? UPD_NODE_INSERT_BLOB
 		: UPD_NODE_INSERT_CLUSTERED;
@@ -2027,11 +2094,17 @@ err_exit:
 		offsets = rec_get_offsets(rec, index, offsets,
 					  ULINT_UNDEFINED, &heap);
 		ut_ad(page_rec_is_user_rec(rec));
+		ut_ad(rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
 
 		btr_cur_disown_inherited_fields(
 			btr_cur_get_page_zip(btr_cur),
 			rec, index, offsets, node->update, mtr);
 
+		/* It is not necessary to call row_log_table for
+		this, because during online table rebuild, purge will
+		not free any BLOBs in the table, whether or not they
+		are owned by the clustered index record. */
+
 		mtr_commit(mtr);
 	}
 
@@ -2045,20 +2118,24 @@ Updates a clustered index record of a row when the ordering fields do
 not change.
 @return DB_SUCCESS if operation successfully completed, else error
 code or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_upd_clust_rec(
 /*==============*/
 	upd_node_t*	node,	/*!< in: row update node */
 	dict_index_t*	index,	/*!< in: clustered index */
+	ulint*		offsets,/*!< in: rec_get_offsets() on node->pcur */
+	mem_heap_t**	offsets_heap,
+				/*!< in/out: memory heap, can be emptied */
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr)	/*!< in: mtr; gets committed here */
 {
-	mem_heap_t*	heap	= NULL;
-	big_rec_t*	big_rec	= NULL;
+	mem_heap_t*	heap		= NULL;
+	big_rec_t*	big_rec		= NULL;
 	btr_pcur_t*	pcur;
 	btr_cur_t*	btr_cur;
-	ulint		err;
+	dberr_t		err;
+	const dtuple_t*	rebuilt_old_pk	= NULL;
 
 	ut_ad(node);
 	ut_ad(dict_index_is_clust(index));
@@ -2066,33 +2143,48 @@ row_upd_clust_rec(
 	pcur = node->pcur;
 	btr_cur = btr_pcur_get_btr_cur(pcur);
 
-	ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
+	ut_ad(btr_cur_get_index(btr_cur) == index);
+	ut_ad(!rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
 				    dict_table_is_comp(index->table)));
+	ut_ad(rec_offs_validate(btr_cur_get_rec(btr_cur), index, offsets));
+
+	if (dict_index_is_online_ddl(index)) {
+		rebuilt_old_pk = row_log_table_get_pk(
+			btr_cur_get_rec(btr_cur), index, offsets, &heap);
+	}
 
 	/* Try optimistic updating of the record, keeping changes within
 	the page; we do not check locks because we assume the x-lock on the
 	record to update */
 
 	if (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) {
-		err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG,
-					      btr_cur, node->update,
-					      node->cmpl_info, thr, mtr);
+		err = btr_cur_update_in_place(
+			BTR_NO_LOCKING_FLAG, btr_cur,
+			offsets, node->update,
+			node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
 	} else {
-		err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG,
-						btr_cur, node->update,
-						node->cmpl_info, thr, mtr);
+		err = btr_cur_optimistic_update(
+			BTR_NO_LOCKING_FLAG, btr_cur,
+			&offsets, offsets_heap, node->update,
+			node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
+	}
+
+	if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
+		row_log_table_update(btr_cur_get_rec(btr_cur),
+				     index, offsets, rebuilt_old_pk);
 	}
 
 	mtr_commit(mtr);
 
 	if (UNIV_LIKELY(err == DB_SUCCESS)) {
 
-		return(DB_SUCCESS);
+		goto func_exit;
 	}
 
 	if (buf_LRU_buf_pool_running_out()) {
 
-		return(DB_LOCK_TABLE_FULL);
+		err = DB_LOCK_TABLE_FULL;
+		goto func_exit;
 	}
 	/* We may have to modify the tree structure: do a pessimistic descent
 	down the index tree */
@@ -2110,14 +2202,16 @@ row_upd_clust_rec(
 	ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
 				    dict_table_is_comp(index->table)));
 
+	if (!heap) {
+		heap = mem_heap_create(1024);
+	}
+
 	err = btr_cur_pessimistic_update(
 		BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur,
-		&heap, &big_rec, node->update, node->cmpl_info, thr, mtr);
+		&offsets, offsets_heap, heap, &big_rec,
+		node->update, node->cmpl_info,
+		thr, thr_get_trx(thr)->id, mtr);
 	if (big_rec) {
-		ulint	offsets_[REC_OFFS_NORMAL_SIZE];
-		rec_t*	rec;
-		rec_offs_init(offsets_);
-
 		ut_a(err == DB_SUCCESS);
 		/* Write out the externally stored
 		columns while still x-latching
@@ -2140,12 +2234,10 @@ row_upd_clust_rec(
 		portion of the file, in case the file was somehow
 		truncated in the crash. */
 
-		rec = btr_cur_get_rec(btr_cur);
 		DEBUG_SYNC_C("before_row_upd_extern");
 		err = btr_store_big_rec_extern_fields(
-			index, btr_cur_get_block(btr_cur), rec,
-			rec_get_offsets(rec, index, offsets_,
-					ULINT_UNDEFINED, &heap),
+			index, btr_cur_get_block(btr_cur),
+			btr_cur_get_rec(btr_cur), offsets,
 			big_rec, mtr, BTR_STORE_UPDATE);
 		DEBUG_SYNC_C("after_row_upd_extern");
 		/* If writing big_rec fails (for example, because of
@@ -2164,9 +2256,14 @@ row_upd_clust_rec(
 		ut_a(err == DB_SUCCESS);
 	}
 
-	mtr_commit(mtr);
+	if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
+		row_log_table_update(btr_cur_get_rec(btr_cur),
+				     index, offsets, rebuilt_old_pk);
+	}
 
-	if (UNIV_LIKELY_NULL(heap)) {
+	mtr_commit(mtr);
+func_exit:
+	if (heap) {
 		mem_heap_free(heap);
 	}
 
@@ -2180,8 +2277,8 @@ row_upd_clust_rec(
 /***********************************************************//**
 Delete marks a clustered index record.
 @return	DB_SUCCESS if operation successfully completed, else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_upd_del_mark_clust_rec(
 /*=======================*/
 	upd_node_t*	node,	/*!< in: row update node */
@@ -2196,7 +2293,7 @@ row_upd_del_mark_clust_rec(
 {
 	btr_pcur_t*	pcur;
 	btr_cur_t*	btr_cur;
-	ulint		err;
+	dberr_t		err;
 
 	ut_ad(node);
 	ut_ad(dict_index_is_clust(index));
@@ -2214,8 +2311,8 @@ row_upd_del_mark_clust_rec(
 	locks, because we assume that we have an x-lock on the record */
 
 	err = btr_cur_del_mark_set_clust_rec(
-		BTR_NO_LOCKING_FLAG, btr_cur_get_block(btr_cur),
-		btr_cur_get_rec(btr_cur), index, offsets, TRUE, thr, mtr);
+		btr_cur_get_block(btr_cur), btr_cur_get_rec(btr_cur),
+		index, offsets, thr, mtr);
 	if (err == DB_SUCCESS && referenced) {
 		/* NOTE that the following call loses the position of pcur ! */
 
@@ -2232,8 +2329,8 @@ row_upd_del_mark_clust_rec(
 Updates the clustered index record.
 @return DB_SUCCESS if operation successfully completed, DB_LOCK_WAIT
 in case of a lock wait, else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_upd_clust_step(
 /*===============*/
 	upd_node_t*	node,	/*!< in: row update node */
@@ -2242,11 +2339,10 @@ row_upd_clust_step(
 	dict_index_t*	index;
 	btr_pcur_t*	pcur;
 	ibool		success;
-	ulint		err;
-	mtr_t*		mtr;
-	mtr_t		mtr_buf;
+	dberr_t		err;
+	mtr_t		mtr;
 	rec_t*		rec;
-	mem_heap_t*	heap		= NULL;
+	mem_heap_t*	heap	= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets;
 	ibool		referenced;
@@ -2259,9 +2355,8 @@ row_upd_clust_step(
 	pcur = node->pcur;
 
 	/* We have to restore the cursor to its position */
-	mtr = &mtr_buf;
 
-	mtr_start(mtr);
+	mtr_start(&mtr);
 
 	/* If the restoration does not succeed, then the same
 	transaction has deleted the record on which the cursor was,
@@ -2273,12 +2368,32 @@ row_upd_clust_step(
 
 	ut_a(pcur->rel_pos == BTR_PCUR_ON);
 
-	success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
+	ulint	mode;
+
+#ifdef UNIV_DEBUG
+	/* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
+	Once it is fixed, remove the 'ifdef', 'if' and this comment. */
+	if (!thr_get_trx(thr)->ddl) {
+		DEBUG_SYNC_C_IF_THD(
+			thr_get_trx(thr)->mysql_thd,
+			"innodb_row_upd_clust_step_enter");
+	}
+#endif /* UNIV_DEBUG */
+
+	if (dict_index_is_online_ddl(index)) {
+		ut_ad(node->table->id != DICT_INDEXES_ID);
+		mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
+		mtr_s_lock(dict_index_get_lock(index), &mtr);
+	} else {
+		mode = BTR_MODIFY_LEAF;
+	}
+
+	success = btr_pcur_restore_position(mode, pcur, &mtr);
 
 	if (!success) {
 		err = DB_RECORD_NOT_FOUND;
 
-		mtr_commit(mtr);
+		mtr_commit(&mtr);
 
 		return(err);
 	}
@@ -2289,18 +2404,20 @@ row_upd_clust_step(
 
 	if (node->is_delete && node->table->id == DICT_INDEXES_ID) {
 
-		dict_drop_index_tree(btr_pcur_get_rec(pcur), mtr);
+		ut_ad(!dict_index_is_online_ddl(index));
 
-		mtr_commit(mtr);
+		dict_drop_index_tree(btr_pcur_get_rec(pcur), &mtr);
 
-		mtr_start(mtr);
+		mtr_commit(&mtr);
+
+		mtr_start(&mtr);
 
 		success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur,
-						    mtr);
+						    &mtr);
 		if (!success) {
 			err = DB_ERROR;
 
-			mtr_commit(mtr);
+			mtr_commit(&mtr);
 
 			return(err);
 		}
@@ -2315,7 +2432,7 @@ row_upd_clust_step(
 			0, btr_pcur_get_block(pcur),
 			rec, index, offsets, thr);
 		if (err != DB_SUCCESS) {
-			mtr_commit(mtr);
+			mtr_commit(&mtr);
 			goto exit_func;
 		}
 	}
@@ -2324,17 +2441,14 @@ row_upd_clust_step(
 
 	if (node->is_delete) {
 		err = row_upd_del_mark_clust_rec(
-			node, index, offsets, thr, referenced, mtr);
+			node, index, offsets, thr, referenced, &mtr);
 
 		if (err == DB_SUCCESS) {
 			node->state = UPD_NODE_UPDATE_ALL_SEC;
 			node->index = dict_table_get_next_index(index);
 		}
-exit_func:
-		if (UNIV_LIKELY_NULL(heap)) {
-			mem_heap_free(heap);
-		}
-		return(err);
+
+		goto exit_func;
 	}
 
 	/* If the update is made for MySQL, we already have the update vector
@@ -2348,13 +2462,11 @@ exit_func:
 		row_upd_eval_new_vals(node->update);
 	}
 
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
-
 	if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
 
-		return(row_upd_clust_rec(node, index, thr, mtr));
+		err = row_upd_clust_rec(
+			node, index, offsets, &heap, thr, &mtr);
+		goto exit_func;
 	}
 
 	row_upd_store_row(node);
@@ -2374,20 +2486,21 @@ exit_func:
 		externally! */
 
 		err = row_upd_clust_rec_by_insert(
-			node, index, thr, referenced, mtr);
+			node, index, thr, referenced, &mtr);
 
 		if (err != DB_SUCCESS) {
 
-			return(err);
+			goto exit_func;
 		}
 
 		node->state = UPD_NODE_UPDATE_ALL_SEC;
 	} else {
-		err = row_upd_clust_rec(node, index, thr, mtr);
+		err = row_upd_clust_rec(
+			node, index, offsets, &heap, thr, &mtr);
 
 		if (err != DB_SUCCESS) {
 
-			return(err);
+			goto exit_func;
 		}
 
 		node->state = UPD_NODE_UPDATE_SOME_SEC;
@@ -2395,6 +2508,10 @@ exit_func:
 
 	node->index = dict_table_get_next_index(index);
 
+exit_func:
+	if (heap) {
+		mem_heap_free(heap);
+	}
 	return(err);
 }
 
@@ -2404,14 +2521,14 @@ to this node, we assume that we have a persistent cursor which was on a
 record, and the position of the cursor is stored in the cursor.
 @return DB_SUCCESS if operation successfully completed, else error
 code or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 row_upd(
 /*====*/
 	upd_node_t*	node,	/*!< in: row update node */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
-	ulint	err	= DB_SUCCESS;
+	dberr_t		err	= DB_SUCCESS;
 
 	ut_ad(node && thr);
 
@@ -2449,6 +2566,17 @@ row_upd(
 		return(DB_SUCCESS);
 	}
 
+#ifdef UNIV_DEBUG
+	/* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
+	Once it is fixed, remove the 'ifdef', 'if' and this comment. */
+	if (!thr_get_trx(thr)->ddl) {
+		DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+				    "after_row_upd_clust");
+	}
+#endif /* UNIV_DEBUG */
+
+	DBUG_EXECUTE_IF("row_upd_skip_sec", node->index = NULL;);
+
 	do {
 		/* Skip corrupted index */
 		dict_table_skip_corrupt_index(node->index);
@@ -2458,7 +2586,6 @@ row_upd(
 		}
 
 		if (node->index->type != DICT_FTS) {
-			log_free_check();
 			err = row_upd_sec_step(node, thr);
 
 			if (err != DB_SUCCESS) {
@@ -2500,7 +2627,7 @@ row_upd_step(
 	upd_node_t*	node;
 	sel_node_t*	sel_node;
 	que_node_t*	parent;
-	ulint		err		= DB_SUCCESS;
+	dberr_t		err		= DB_SUCCESS;
 	trx_t*		trx;
 
 	ut_ad(thr);
@@ -2579,7 +2706,7 @@ row_upd_step(
 	err = row_upd(node, thr);
 
 error_handling:
-	trx->error_state = static_cast<enum db_err>(err);
+	trx->error_state = err;
 
 	if (err != DB_SUCCESS) {
 		return(NULL);
diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc
index 0aad8675ff8..2c3191928fd 100644
--- a/storage/innobase/row/row0vers.cc
+++ b/storage/innobase/row/row0vers.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -114,7 +114,6 @@ row_vers_impl_x_locked_low(
 	on rec. */
 
 	for (version = clust_rec;; version = prev_version) {
-		ulint		err;
 		row_ext_t*	ext;
 		const dtuple_t*	row;
 		dtuple_t*	entry;
@@ -128,24 +127,22 @@ row_vers_impl_x_locked_low(
 
 		heap = mem_heap_create(1024);
 
-		err = trx_undo_prev_version_build(
+		trx_undo_prev_version_build(
 			clust_rec, mtr, version, clust_index, clust_offsets,
 			heap, &prev_version);
 
- 		/* Free version and clust_offsets. */
+		/* Free version and clust_offsets. */
 
 		mem_heap_free(old_heap);
 
 		if (prev_version == NULL) {
 
-			/* clust_rec must be a fresh insert, because
+			/* clust_rec should be a fresh insert, because
 			no previous version was found or the transaction
 			has committed. The caller has to recheck as the
 			synopsis of this function states, whether trx_id
 			is active or not. */
 
-			ut_a(err == DB_SUCCESS || err == DB_MISSING_HISTORY);
-
 			break;
 		}
 
@@ -155,15 +152,16 @@ row_vers_impl_x_locked_low(
 
 		vers_del = rec_get_deleted_flag(prev_version, comp);
 
-		prev_trx_id = row_get_rec_trx_id(
-			prev_version, clust_index, clust_offsets);
+		prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
+						 clust_offsets);
 
 		/* The stack of versions is locked by mtr.  Thus, it
 		is safe to fetch the prefixes for externally stored
 		columns. */
 
 		row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
-				clust_offsets, NULL, &ext, heap);
+				clust_offsets,
+				NULL, NULL, NULL, &ext, heap);
 
 		entry = row_build_index_entry(row, ext, index, heap);
 
@@ -183,8 +181,6 @@ row_vers_impl_x_locked_low(
 		There is no guarantee that the transaction is still
 		active. */
 
-		ut_ad(err == DB_SUCCESS);
-
 		/* We check if entry and rec are identified in the alphabetical
 		ordering */
 
@@ -355,7 +351,6 @@ row_vers_old_has_index_entry(
 	mem_heap_t*	heap2;
 	const dtuple_t*	row;
 	const dtuple_t*	entry;
-	ulint		err;
 	ulint		comp;
 
 	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
@@ -383,7 +378,8 @@ row_vers_old_has_index_entry(
 		Thus, it is safe to fetch the prefixes for
 		externally stored columns. */
 		row = row_build(ROW_COPY_POINTERS, clust_index,
-				rec, clust_offsets, NULL, &ext, heap);
+				rec, clust_offsets,
+				NULL, NULL, NULL, &ext, heap);
 		entry = row_build_index_entry(row, ext, index, heap);
 
 		/* If entry == NULL, the record contains unset BLOB
@@ -420,12 +416,12 @@ row_vers_old_has_index_entry(
 	for (;;) {
 		heap2 = heap;
 		heap = mem_heap_create(1024);
-		err = trx_undo_prev_version_build(rec, mtr, version,
-						  clust_index, clust_offsets,
-						  heap, &prev_version);
+		trx_undo_prev_version_build(rec, mtr, version,
+					    clust_index, clust_offsets,
+					    heap, &prev_version);
 		mem_heap_free(heap2); /* free version and clust_offsets */
 
-		if (err != DB_SUCCESS || !prev_version) {
+		if (!prev_version) {
 			/* Versions end here */
 
 			mem_heap_free(heap);
@@ -444,7 +440,7 @@ row_vers_old_has_index_entry(
 			externally stored columns. */
 			row = row_build(ROW_COPY_POINTERS, clust_index,
 					prev_version, clust_offsets,
-					NULL, &ext, heap);
+					NULL, NULL, NULL, &ext, heap);
 			entry = row_build_index_entry(row, ext, index, heap);
 
 			/* If entry == NULL, the record contains unset
@@ -477,7 +473,7 @@ read should see. We assume that the trx id stored in rec is such that
 the consistent read should not see rec in its present version.
 @return	DB_SUCCESS or DB_MISSING_HISTORY */
 UNIV_INTERN
-ulint
+dberr_t
 row_vers_build_for_consistent_read(
 /*===============================*/
 	const rec_t*	rec,	/*!< in: record in a clustered index; the
@@ -495,8 +491,9 @@ row_vers_build_for_consistent_read(
 				*old_vers is allocated; memory for possible
 				intermediate versions is allocated and freed
 				locally within the function */
-	rec_t**		old_vers)/*!< out, own: old version, or NULL if the
-				record does not exist in the view, that is,
+	rec_t**		old_vers)/*!< out, own: old version, or NULL
+				if the history is missing or the record
+				does not exist in the view, that is,
 				it was freshly inserted afterwards */
 {
 	const rec_t*	version;
@@ -504,7 +501,7 @@ row_vers_build_for_consistent_read(
 	trx_id_t	trx_id;
 	mem_heap_t*	heap		= NULL;
 	byte*		buf;
-	ulint		err;
+	dberr_t		err;
 
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
@@ -558,27 +555,21 @@ row_vers_build_for_consistent_read(
 				rec_offs_make_valid(*old_vers, index,
 						    *offsets);
 				err = DB_SUCCESS;
-
 				break;
 			}
 		}
 
 		err = trx_undo_prev_version_build(rec, mtr, version, index,
 						  *offsets, heap,
-						  &prev_version);
+						  &prev_version)
+			? DB_SUCCESS : DB_MISSING_HISTORY;
 		if (heap2) {
 			mem_heap_free(heap2); /* free version */
 		}
 
-		if (err != DB_SUCCESS) {
-			break;
-		}
-
 		if (prev_version == NULL) {
 			/* It was a freshly inserted version */
 			*old_vers = NULL;
-			err = DB_SUCCESS;
-
 			break;
 		}
 
@@ -602,8 +593,6 @@ row_vers_build_for_consistent_read(
 
 			*old_vers = rec_copy(buf, prev_version, *offsets);
 			rec_offs_make_valid(*old_vers, index, *offsets);
-			err = DB_SUCCESS;
-
 			break;
 		}
 
@@ -617,10 +606,9 @@ row_vers_build_for_consistent_read(
 
 /*****************************************************************//**
 Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read.
-@return	DB_SUCCESS or DB_MISSING_HISTORY */
+which should be seen by a semi-consistent read. */
 UNIV_INTERN
-ulint
+void
 row_vers_build_for_semi_consistent_read(
 /*====================================*/
 	const rec_t*	rec,	/*!< in: record in a clustered index; the
@@ -644,7 +632,6 @@ row_vers_build_for_semi_consistent_read(
 	const rec_t*	version;
 	mem_heap_t*	heap		= NULL;
 	byte*		buf;
-	ulint		err;
 	trx_id_t	rec_trx_id	= 0;
 
 	ut_ad(dict_index_is_clust(index));
@@ -683,7 +670,7 @@ row_vers_build_for_semi_consistent_read(
 		mutex_exit(&trx_sys->mutex);
 
 		if (!version_trx) {
-
+committed_version_trx:
 			/* We found a version that belongs to a
 			committed transaction: return it. */
 
@@ -693,7 +680,6 @@ row_vers_build_for_semi_consistent_read(
 
 			if (rec == version) {
 				*old_vers = rec;
-				err = DB_SUCCESS;
 				break;
 			}
 
@@ -721,30 +707,30 @@ row_vers_build_for_semi_consistent_read(
 
 			*old_vers = rec_copy(buf, version, *offsets);
 			rec_offs_make_valid(*old_vers, index, *offsets);
-			err = DB_SUCCESS;
-
 			break;
 		}
 
+		DEBUG_SYNC_C("after_row_vers_check_trx_active");
+
 		heap2 = heap;
 		heap = mem_heap_create(1024);
 
-		err = trx_undo_prev_version_build(rec, mtr, version, index,
-						  *offsets, heap,
-						  &prev_version);
-		if (heap2) {
-			mem_heap_free(heap2); /* free version */
+		if (!trx_undo_prev_version_build(rec, mtr, version, index,
+						 *offsets, heap,
+						 &prev_version)) {
+			mem_heap_free(heap);
+			heap = heap2;
+			heap2 = NULL;
+			goto committed_version_trx;
 		}
 
-		if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
-			break;
+		if (heap2) {
+			mem_heap_free(heap2); /* free version */
 		}
 
 		if (prev_version == NULL) {
 			/* It was a freshly inserted version */
 			*old_vers = NULL;
-			err = DB_SUCCESS;
-
 			break;
 		}
 
@@ -759,6 +745,4 @@ row_vers_build_for_semi_consistent_read(
 	if (heap) {
 		mem_heap_free(heap);
 	}
-
-	return(err);
 }
diff --git a/storage/innobase/srv/srv0conc.cc b/storage/innobase/srv/srv0conc.cc
index d5c949f3a06..820700a95a8 100644
--- a/storage/innobase/srv/srv0conc.cc
+++ b/storage/innobase/srv/srv0conc.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -40,7 +40,6 @@ Created 2011/04/18 Sunny Bains
 #include "srv0srv.h"
 #include "sync0sync.h"
 #include "trx0trx.h"
-#include "ha_prototypes.h"
 
 #include "mysql/plugin.h"
 
@@ -73,13 +72,11 @@ UNIV_INTERN ulong	srv_thread_concurrency	= 0;
 /** This mutex protects srv_conc data structures */
 static os_fast_mutex_t	srv_conc_mutex;
 
-/** Slot for a thread waiting in the concurrency control queue. */
-typedef struct srv_conc_slot_struct	srv_conc_slot_t;
-
 /** Concurrency list node */
-typedef UT_LIST_NODE_T(srv_conc_slot_t)	srv_conc_node_t;
+typedef UT_LIST_NODE_T(struct srv_conc_slot_t)	srv_conc_node_t;
 
-struct srv_conc_slot_struct{
+/** Slot for a thread waiting in the concurrency control queue. */
+struct srv_conc_slot_t{
 	os_event_t	event;		/*!< event to wait */
 	ibool		reserved;	/*!< TRUE if slot
 					reserved */
@@ -106,10 +103,8 @@ UNIV_INTERN mysql_pfs_key_t	srv_conc_mutex_key;
 
 #endif /* !HAVE_ATOMIC_BUILTINS */
 
-typedef struct srv_conc_struct srv_conc_t;
-
 /** Variables tracking the active and waiting threads. */
-struct srv_conc_struct {
+struct srv_conc_t {
 	char		pad[64  - (sizeof(ulint) + sizeof(lint))];
 
 	/** Number of transactions that have declared_to_be_inside_innodb set.
@@ -148,7 +143,7 @@ srv_conc_init(void)
 	for (i = 0; i < OS_THREAD_MAX_N; i++) {
 		srv_conc_slot_t*	conc_slot = &srv_conc_slots[i];
 
-		conc_slot->event = os_event_create(NULL);
+		conc_slot->event = os_event_create();
 		ut_a(conc_slot->event);
 	}
 #endif /* !HAVE_ATOMIC_BUILTINS */
@@ -224,9 +219,7 @@ srv_conc_enter_innodb_with_atomics(
 					(void) os_atomic_decrement_lint(
 						&srv_conc.n_waiting, 1);
 
-					thd_wait_end(
-						static_cast<THD*>(
-							trx->mysql_thd));
+					thd_wait_end(trx->mysql_thd);
 				}
 
 				if (srv_adaptive_max_sleep_delay > 0) {
@@ -262,9 +255,7 @@ srv_conc_enter_innodb_with_atomics(
 				trx_search_latch_release_if_reserved(trx);
 			}
 
-			thd_wait_begin(
-				static_cast<THD*>(trx->mysql_thd),
-				THD_WAIT_USER_LOCK);
+			thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
 
 			notified_mysql = TRUE;
 		}
@@ -477,10 +468,10 @@ retry:
 #endif /* UNIV_SYNC_DEBUG */
 	trx->op_info = "waiting in InnoDB queue";
 
-	thd_wait_begin(static_cast<THD*>(trx->mysql_thd), THD_WAIT_USER_LOCK);
+	thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
 
 	os_event_wait(slot->event);
-	thd_wait_end(static_cast<THD*>(trx->mysql_thd));
+	thd_wait_end(trx->mysql_thd);
 
 	trx->op_info = "";
 
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index 9c6e56bcb9d..3b3da2f070f 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +34,6 @@ Created 12/9/2009 Jimmy Yang
 #include "trx0rseg.h"
 #include "lock0lock.h"
 #include "ibuf0ibuf.h"
-#include "btr0cur.h"
 #ifdef UNIV_NONINL
 #include "srv0mon.ic"
 #endif
@@ -215,11 +215,6 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_WRITE_REQUEST},
 
-	{"buffer_pool_pages_in_flush", "buffer",
-	 "Number of pages in flush list",
-	 MONITOR_NONE,
-	 MONITOR_DEFAULT_START, MONITOR_PAGE_INFLUSH},
-
 	{"buffer_pool_wait_free", "buffer",
 	 "Number of times waited for free buffer"
 	 " (innodb_buffer_pool_wait_free)",
@@ -259,12 +254,24 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DATA},
 
+	{"buffer_pool_bytes_data", "buffer",
+	 "Buffer bytes containing data (innodb_buffer_pool_bytes_data)",
+	 static_cast<monitor_type_t>(
+	 MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DATA},
+
 	{"buffer_pool_pages_dirty", "buffer",
 	 "Buffer pages currently dirty (innodb_buffer_pool_pages_dirty)",
 	 static_cast<monitor_type_t>(
 	 MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DIRTY},
 
+	{"buffer_pool_bytes_dirty", "buffer",
+	 "Buffer bytes currently dirty (innodb_buffer_pool_bytes_dirty)",
+	 static_cast<monitor_type_t>(
+	 MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DIRTY},
+
 	{"buffer_pool_pages_free", "buffer",
 	 "Buffer pages currently free (innodb_buffer_pool_pages_free)",
 	 static_cast<monitor_type_t>(
@@ -350,25 +357,40 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_SET_MEMBER, MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
 	 MONITOR_FLUSH_NEIGHBOR_PAGES},
 
-	/* Cumulative counter for flush batches because of max_dirty */
-	{"buffer_flush_max_dirty_total_pages", "buffer",
-	 "Total pages flushed as part of max_dirty batches",
-	 MONITOR_SET_OWNER, MONITOR_FLUSH_MAX_DIRTY_COUNT,
-	 MONITOR_FLUSH_MAX_DIRTY_TOTAL_PAGE},
+	{"buffer_flush_n_to_flush_requested", "buffer",
+	 "Number of pages requested for flushing.",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_REQUESTED},
+
+	{"buffer_flush_avg_page_rate", "buffer",
+	 "Average number of pages at which flushing is happening",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_FLUSH_AVG_PAGE_RATE},
+
+	{"buffer_flush_lsn_avg_rate", "buffer",
+	 "Average redo generation rate",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_FLUSH_LSN_AVG_RATE},
+
+	{"buffer_flush_pct_for_dirty", "buffer",
+	 "Percent of IO capacity used to avoid max dirty page limit",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_DIRTY},
 
-	{"buffer_flush_max_dirty", "buffer",
-	 "Number of max_dirty batches",
-	 MONITOR_SET_MEMBER, MONITOR_FLUSH_MAX_DIRTY_TOTAL_PAGE,
-	 MONITOR_FLUSH_MAX_DIRTY_COUNT},
+	{"buffer_flush_pct_for_lsn", "buffer",
+	 "Percent of IO capacity used to avoid reusable redo space limit",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_LSN},
+
+	{"buffer_flush_sync_waits", "buffer",
+	 "Number of times a wait happens due to sync flushing",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_FLUSH_SYNC_WAITS},
 
-	{"buffer_flush_max_dirty_pages", "buffer",
-	 "Pages queued as a max_dirty batch",
-	 MONITOR_SET_MEMBER, MONITOR_FLUSH_MAX_DIRTY_TOTAL_PAGE,
-	 MONITOR_FLUSH_MAX_DIRTY_PAGES},
 
-	/* Cumulative counter for flush batches because of adaptive */
+	/* Cumulative counter for flush batches for adaptive flushing  */
 	{"buffer_flush_adaptive_total_pages", "buffer",
-	 "Total pages flushed as part of adaptive batches",
+	 "Total pages flushed as part of adaptive flushing",
 	 MONITOR_SET_OWNER, MONITOR_FLUSH_ADAPTIVE_COUNT,
 	 MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE},
 
@@ -382,22 +404,6 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_SET_MEMBER, MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
 	 MONITOR_FLUSH_ADAPTIVE_PAGES},
 
-	/* Cumulative counter for flush batches because of async */
-	{"buffer_flush_async_total_pages", "buffer",
-	 "Total pages flushed as part of async batches",
-	 MONITOR_SET_OWNER, MONITOR_FLUSH_ASYNC_COUNT,
-	 MONITOR_FLUSH_ASYNC_TOTAL_PAGE},
-
-	{"buffer_flush_async", "buffer",
-	 "Number of async batches",
-	 MONITOR_SET_MEMBER, MONITOR_FLUSH_ASYNC_TOTAL_PAGE,
-	 MONITOR_FLUSH_ASYNC_COUNT},
-
-	{"buffer_flush_async_pages", "buffer",
-	 "Pages queued as an async batch",
-	 MONITOR_SET_MEMBER, MONITOR_FLUSH_ASYNC_TOTAL_PAGE,
-	 MONITOR_FLUSH_ASYNC_PAGES},
-
 	/* Cumulative counter for flush batches because of sync */
 	{"buffer_flush_sync_total_pages", "buffer",
 	 "Total pages flushed as part of sync batches",
@@ -859,6 +865,16 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_NONE,
 	 MONITOR_DEFAULT_START, MONITOR_PAGE_DECOMPRESS},
 
+	{"compression_pad_increments", "compression",
+	 "Number of times padding is incremented to avoid compression failures",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_PAD_INCREMENTS},
+
+	{"compression_pad_decrements", "compression",
+	 "Number of times padding is decremented due to good compressibility",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_PAD_DECREMENTS},
+
 	/* ========== Counters for Index ========== */
 	{"module_index", "index", "Index Manager",
 	 MONITOR_MODULE,
@@ -1130,11 +1146,26 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_MODULE,
 	 MONITOR_DEFAULT_START, MONITOR_MODULE_DDL_STATS},
 
+	{"ddl_background_drop_indexes", "ddl",
+	 "Number of indexes waiting to be dropped after failed index creation",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_INDEX},
+
 	{"ddl_background_drop_tables", "ddl",
 	 "Number of tables in background drop table list",
 	 MONITOR_NONE,
 	 MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_TABLE},
 
+	{"ddl_online_create_index", "ddl",
+	 "Number of indexes being created online",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_ONLINE_CREATE_INDEX},
+
+	{"ddl_pending_alter_table", "ddl",
+	 "Number of ALTER TABLE, CREATE INDEX, DROP INDEX in progress",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_PENDING_ALTER_TABLE},
+
 	/* ===== Counters for ICP (Index Condition Pushdown) Module ===== */
 	{"module_icp", "icp", "Index Condition Pushdown",
 	 MONITOR_MODULE,
@@ -1171,6 +1202,34 @@ has been turned on/off. */
 UNIV_INTERN ulint		monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT
 						- 1) / NUM_BITS_ULINT];
 
+#ifndef HAVE_ATOMIC_BUILTINS_64
+/** Mutex protecting atomic operations on platforms that lack
+built-in operations for atomic memory access */
+ib_mutex_t	monitor_mutex;
+
+/** Key to register monitor_mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t	monitor_mutex_key;
+
+/****************************************************************//**
+Initialize the monitor subsystem. */
+UNIV_INTERN
+void
+srv_mon_create(void)
+/*================*/
+{
+	mutex_create(monitor_mutex_key, &monitor_mutex, SYNC_ANY_LATCH);
+}
+/****************************************************************//**
+Close the monitor subsystem. */
+UNIV_INTERN
+void
+srv_mon_free(void)
+/*==============*/
+{
+	mutex_free(&monitor_mutex);
+}
+#endif /* !HAVE_ATOMIC_BUILTINS_64 */
+
 /****************************************************************//**
 Get a monitor's "monitor_info" by its monitor id (index into the
 innodb_counter_info array.
@@ -1359,13 +1418,14 @@ srv_mon_process_existing_counter(
 	mon_option_t	set_option)	/*!< in: Turn on/off reset the
 					counter */
 {
-	mon_type_t	value;
-	monitor_info_t*	monitor_info;
-	ibool		update_min = FALSE;
-	buf_pool_stat_t	stat;
-	ulint		LRU_len;
-	ulint		free_len;
-	ulint		flush_list_len;
+	mon_type_t		value;
+	monitor_info_t*		monitor_info;
+	ibool			update_min = FALSE;
+	buf_pool_stat_t		stat;
+	buf_pools_list_size_t	buf_pools_list_size;
+	ulint			LRU_len;
+	ulint			free_len;
+	ulint			flush_list_len;
 
 	monitor_info = srv_mon_get_info(monitor_id);
 
@@ -1381,7 +1441,7 @@ srv_mon_process_existing_counter(
 	/* export_vars.innodb_buffer_pool_reads. Num Reads from
 	disk (page not in buffer) */
 	case MONITOR_OVLD_BUF_POOL_READS:
-		value = srv_buf_pool_reads;
+		value = srv_stats.buf_pool_reads;
 		break;
 
 	/* innodb_buffer_pool_read_requests, the number of logical
@@ -1394,12 +1454,12 @@ srv_mon_process_existing_counter(
 	/* innodb_buffer_pool_write_requests, the number of
 	write request */
 	case MONITOR_OVLD_BUF_POOL_WRITE_REQUEST:
-		value = srv_buf_pool_write_requests;
+		value = srv_stats.buf_pool_write_requests;
 		break;
 
 	/* innodb_buffer_pool_wait_free */
 	case MONITOR_OVLD_BUF_POOL_WAIT_FREE:
-		value = srv_buf_pool_wait_free;
+		value = srv_stats.buf_pool_wait_free;
 		break;
 
 	/* innodb_buffer_pool_read_ahead */
@@ -1431,12 +1491,25 @@ srv_mon_process_existing_counter(
 		value = LRU_len;
 		break;
 
+	/* innodb_buffer_pool_bytes_data */
+	case MONITOR_OVLD_BUF_POOL_BYTES_DATA:
+		buf_get_total_list_size_in_bytes(&buf_pools_list_size);
+		value = buf_pools_list_size.LRU_bytes
+			+ buf_pools_list_size.unzip_LRU_bytes;
+		break;
+
 	/* innodb_buffer_pool_pages_dirty */
 	case MONITOR_OVLD_BUF_POOL_PAGES_DIRTY:
 		buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
 		value = flush_list_len;
 		break;
 
+	/* innodb_buffer_pool_bytes_dirty */
+	case MONITOR_OVLD_BUF_POOL_BYTES_DIRTY:
+		buf_get_total_list_size_in_bytes(&buf_pools_list_size);
+		value = buf_pools_list_size.flush_list_bytes;
+		break;
+
 	/* innodb_buffer_pool_pages_free */
 	case MONITOR_OVLD_BUF_POOL_PAGES_FREE:
 		buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
@@ -1463,12 +1536,12 @@ srv_mon_process_existing_counter(
 
 	/* innodb_data_reads, the total number of data reads */
 	case MONITOR_OVLD_BYTE_READ:
-		value = srv_data_read;
+		value = srv_stats.data_read;
 		break;
 
 	/* innodb_data_writes, the total number of data writes. */
 	case MONITOR_OVLD_BYTE_WRITTEN:
-		value = srv_data_written;
+		value = srv_stats.data_written;
 		break;
 
 	/* innodb_data_reads, the total number of data reads. */
@@ -1488,7 +1561,7 @@ srv_mon_process_existing_counter(
 
 	/* innodb_os_log_written */
 	case MONITOR_OVLD_OS_LOG_WRITTEN:
-		value = (mon_type_t) srv_os_log_written;
+		value = (mon_type_t) srv_stats.os_log_written;
 		break;
 
 	/* innodb_os_log_fsyncs */
@@ -1504,33 +1577,33 @@ srv_mon_process_existing_counter(
 
 	/* innodb_os_log_pending_writes */
 	case MONITOR_OVLD_OS_LOG_PENDING_WRITES:
-		value = srv_os_log_pending_writes;
+		value = srv_stats.os_log_pending_writes;
 		update_min = TRUE;
 		break;
 
 	/* innodb_log_waits */
 	case MONITOR_OVLD_LOG_WAITS:
-		value = srv_log_waits;
+		value = srv_stats.log_waits;
 		break;
 
 	/* innodb_log_write_requests */
 	case MONITOR_OVLD_LOG_WRITE_REQUEST:
-		value = srv_log_write_requests;
+		value = srv_stats.log_write_requests;
 		break;
 
 	/* innodb_log_writes */
 	case MONITOR_OVLD_LOG_WRITES:
-		value = srv_log_writes;
+		value = srv_stats.log_writes;
 		break;
 
 	/* innodb_dblwr_writes */
 	case MONITOR_OVLD_SRV_DBLWR_WRITES:
-		value = srv_dblwr_writes;
+		value = srv_stats.dblwr_writes;
 		break;
 
 	/* innodb_dblwr_pages_written */
 	case MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN:
-		value = srv_dblwr_pages_written;
+		value = srv_stats.dblwr_pages_written;
 		break;
 
 	/* innodb_page_size */
@@ -1539,27 +1612,27 @@ srv_mon_process_existing_counter(
 		break;
 
 	case MONITOR_OVLD_RWLOCK_S_SPIN_WAITS:
-		value = rw_s_spin_wait_count;
+		value = rw_lock_stats.rw_s_spin_wait_count;
 		break;
 
 	case MONITOR_OVLD_RWLOCK_X_SPIN_WAITS:
-		value = rw_x_os_wait_count;
+		value = rw_lock_stats.rw_x_os_wait_count;
 		break;
 
 	case MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS:
-		value = rw_s_spin_round_count;
+		value = rw_lock_stats.rw_s_spin_round_count;
 		break;
 
 	case MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS:
-		value = rw_x_spin_round_count;
+		value = rw_lock_stats.rw_x_spin_round_count;
 		break;
 
 	case MONITOR_OVLD_RWLOCK_S_OS_WAITS:
-		value = rw_s_os_wait_count;
+		value = rw_lock_stats.rw_s_os_wait_count;
 		break;
 
 	case MONITOR_OVLD_RWLOCK_X_OS_WAITS:
-		value = rw_x_os_wait_count;
+		value = rw_lock_stats.rw_x_os_wait_count;
 		break;
 
 	case MONITOR_OVLD_BUFFER_POOL_SIZE:
@@ -1568,44 +1641,44 @@ srv_mon_process_existing_counter(
 
 	/* innodb_rows_read */
 	case MONITOR_OLVD_ROW_READ:
-		value = srv_n_rows_read;
+		value = srv_stats.n_rows_read;
 		break;
 
 	/* innodb_rows_inserted */
 	case MONITOR_OLVD_ROW_INSERTED:
-		value = srv_n_rows_inserted;
+		value = srv_stats.n_rows_inserted;
 		break;
 
 	/* innodb_rows_deleted */
 	case MONITOR_OLVD_ROW_DELETED:
-		value = srv_n_rows_deleted;
+		value = srv_stats.n_rows_deleted;
 		break;
 
 	/* innodb_rows_updated */
 	case MONITOR_OLVD_ROW_UPDTATED:
-		value = srv_n_rows_updated;
+		value = srv_stats.n_rows_updated;
 		break;
 
 	/* innodb_row_lock_current_waits */
 	case MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT:
-		value = srv_n_lock_wait_current_count;
+		value = srv_stats.n_lock_wait_current_count;
 		break;
 
 	/* innodb_row_lock_time */
 	case MONITOR_OVLD_LOCK_WAIT_TIME:
-		value = srv_n_lock_wait_time / 1000;
+		value = srv_stats.n_lock_wait_time / 1000;
 		break;
 
 	/* innodb_row_lock_time_max */
 	case MONITOR_OVLD_LOCK_MAX_WAIT_TIME:
-		value = srv_n_lock_max_wait_time / 1000;
+		value = lock_sys->n_lock_max_wait_time / 1000;
 		break;
 
 	/* innodb_row_lock_time_avg */
 	case MONITOR_OVLD_LOCK_AVG_WAIT_TIME:
-		if (srv_n_lock_wait_count > 0) {
-			value = srv_n_lock_wait_time / 1000
-				/ srv_n_lock_wait_count;
+		if (srv_stats.n_lock_wait_count > 0) {
+			value = srv_stats.n_lock_wait_time / 1000
+				/ srv_stats.n_lock_wait_count;
 		} else {
 			value = 0;
 		}
@@ -1613,7 +1686,7 @@ srv_mon_process_existing_counter(
 
 	/* innodb_row_lock_waits */
 	case MONITOR_OVLD_ROW_LOCK_WAIT:
-		value = srv_n_lock_wait_count;
+		value = srv_stats.n_lock_wait_count;
 		break;
 
 	case MONITOR_RSEG_HISTORY_LEN:
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index e64cc006f02..5c0ca903417 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -59,6 +59,7 @@ Created 10/8/1995 Heikki Tuuri
 #include "btr0sea.h"
 #include "dict0load.h"
 #include "dict0boot.h"
+#include "dict0stats_bg.h" /* dict_stats_event */
 #include "srv0start.h"
 #include "row0mysql.h"
 #include "ha_prototypes.h"
@@ -70,10 +71,6 @@ Created 10/8/1995 Heikki Tuuri
 #include "mysql/plugin.h"
 #include "mysql/service_thd_wait.h"
 
-/* The following counter is incremented whenever there is some user activity
-in the server */
-UNIV_INTERN ulint	srv_activity_count	= 0;
-
 /* The following is the maximum allowed duration of a lock wait. */
 UNIV_INTERN ulint	srv_fatal_semaphore_wait_threshold = 600;
 
@@ -86,6 +83,8 @@ UNIV_INTERN ibool	srv_error_monitor_active = FALSE;
 
 UNIV_INTERN ibool	srv_buf_dump_thread_active = FALSE;
 
+UNIV_INTERN ibool	srv_dict_stats_thread_active = FALSE;
+
 UNIV_INTERN const char*	srv_main_thread_op_info = "";
 
 /** Prefix used by MySQL to indicate pre-5.1 table name encoding */
@@ -104,6 +103,9 @@ UNIV_INTERN char*	srv_undo_dir = NULL;
 /** The number of tablespaces to use for rollback segments. */
 UNIV_INTERN ulong	srv_undo_tablespaces = 8;
 
+/** The number of UNDO tablespaces that are open and ready to use. */
+UNIV_INTERN ulint	srv_undo_tablespaces_open = 8;
+
 /* The number of rollback segments to use */
 UNIV_INTERN ulong	srv_undo_logs = 1;
 
@@ -111,6 +113,10 @@ UNIV_INTERN ulong	srv_undo_logs = 1;
 UNIV_INTERN char*	srv_arch_dir	= NULL;
 #endif /* UNIV_LOG_ARCHIVE */
 
+/** Set if InnoDB must operate in read-only mode. We don't do any
+recovery and open all tables in RO mode instead of RW mode. We don't
+sync the max trx id to disk either. */
+UNIV_INTERN my_bool	srv_read_only_mode;
 /** store to its own file each table created by an user; data
 dictionary tables are in the system tablespace 0 */
 UNIV_INTERN my_bool	srv_file_per_table;
@@ -128,6 +134,10 @@ UNIV_INTERN ulint	srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
 /** Place locks to records only i.e. do not use next-key locking except
 on duplicate key checking and foreign key checking */
 UNIV_INTERN ibool	srv_locks_unsafe_for_binlog = FALSE;
+/** Sort buffer size in index creation */
+UNIV_INTERN ulong	srv_sort_buf_size = 1048576;
+/** Maximum modification log file size for online index creation */
+UNIV_INTERN unsigned long long	srv_online_max_size;
 
 /* If this flag is TRUE, then we will use the native aio of the
 OS (provided we compiled Innobase with it in), otherwise we will
@@ -170,15 +180,16 @@ the user from forgetting the 'newraw' keyword to my.cnf */
 
 UNIV_INTERN ibool	srv_created_new_raw	= FALSE;
 
-UNIV_INTERN char**	srv_log_group_home_dirs = NULL;
+UNIV_INTERN char*	srv_log_group_home_dir	= NULL;
 
-UNIV_INTERN ulint	srv_n_log_groups	= ULINT_MAX;
-UNIV_INTERN ulint	srv_n_log_files		= ULINT_MAX;
+UNIV_INTERN ulong	srv_n_log_files		= SRV_N_LOG_FILES_MAX;
 /* size in database pages */
 UNIV_INTERN ib_uint64_t	srv_log_file_size	= IB_UINT64_MAX;
+UNIV_INTERN ib_uint64_t	srv_log_file_size_requested;
 /* size in database pages */
 UNIV_INTERN ulint	srv_log_buffer_size	= ULINT_MAX;
 UNIV_INTERN ulong	srv_flush_log_at_trx_commit = 1;
+UNIV_INTERN uint	srv_flush_log_at_timeout = 1;
 UNIV_INTERN ulong	srv_page_size		= UNIV_PAGE_SIZE_DEF;
 UNIV_INTERN ulong	srv_page_size_shift	= UNIV_PAGE_SIZE_SHIFT_DEF;
 
@@ -211,7 +222,7 @@ UNIV_INTERN ulong	srv_n_page_hash_locks = 16;
 /** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
 UNIV_INTERN ulong	srv_LRU_scan_depth	= 1024;
 /** whether or not to flush neighbors of a block */
-UNIV_INTERN my_bool	srv_flush_neighbors	= TRUE;
+UNIV_INTERN ulong	srv_flush_neighbors	= 1;
 /* previously requested size */
 UNIV_INTERN ulint	srv_buf_pool_old_size;
 /* current size in kilobytes */
@@ -256,7 +267,8 @@ UNIV_INTERN ulint	srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
 UNIV_INTERN ulint	srv_max_n_open_files	  = 300;
 
 /* Number of IO operations per second the server can do */
-UNIV_INTERN ulong	srv_io_capacity         = 400;
+UNIV_INTERN ulong	srv_io_capacity         = 200;
+UNIV_INTERN ulong	srv_max_io_capacity     = 400;
 
 /* The InnoDB main thread tries to keep the ratio of modified pages
 in the buffer pool to all database pages in the buffer pool smaller than
@@ -264,76 +276,49 @@ the following number. But it is not guaranteed that the value stays below
 that during a time of heavy update/insert activity. */
 
 UNIV_INTERN ulong	srv_max_buf_pool_modified_pct	= 75;
+UNIV_INTERN ulong	srv_max_dirty_pages_pct_lwm	= 50;
+
+/* This is the percentage of log capacity at which adaptive flushing,
+if enabled, will kick in. */
+UNIV_INTERN ulong	srv_adaptive_flushing_lwm	= 10;
+
+/* Number of iterations over which adaptive flushing is averaged. */
+UNIV_INTERN ulong	srv_flushing_avg_loops		= 30;
 
 /* The number of purge threads to use.*/
-UNIV_INTERN ulong srv_n_purge_threads = 1;
+UNIV_INTERN ulong	srv_n_purge_threads = 1;
 
 /* the number of pages to purge in one batch */
-UNIV_INTERN ulong srv_purge_batch_size = 20;
-
-/* variable counts amount of data read in total (in bytes) */
-UNIV_INTERN ulint srv_data_read = 0;
+UNIV_INTERN ulong	srv_purge_batch_size = 20;
 
 /* Internal setting for "innodb_stats_method". Decides how InnoDB treats
 NULL value when collecting statistics. By default, it is set to
 SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
-ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
-
-/* here we count the amount of data written in total (in bytes) */
-UNIV_INTERN ulint srv_data_written = 0;
-
-/* the number of the log write requests done */
-UNIV_INTERN ulint srv_log_write_requests = 0;
-
-/* the number of physical writes to the log performed */
-UNIV_INTERN ulint srv_log_writes = 0;
-
-/* amount of data written to the log files in bytes */
-UNIV_INTERN lsn_t srv_os_log_written = 0;
+UNIV_INTERN ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
 
-/* amount of writes being done to the log files */
-UNIV_INTERN ulint srv_os_log_pending_writes = 0;
-
-/* we increase this counter, when there we don't have enough space in the
-log buffer and have to flush it */
-UNIV_INTERN ulint srv_log_waits = 0;
-
-/* this variable counts the amount of times, when the doublewrite buffer
-was flushed */
-UNIV_INTERN ulint srv_dblwr_writes = 0;
-
-/* here we store the number of pages that have been flushed to the
-doublewrite buffer */
-UNIV_INTERN ulint srv_dblwr_pages_written = 0;
-
-/* in this variable we store the number of write requests issued */
-UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
-
-/* here we store the number of times when we had to wait for a free page
-in the buffer pool. It happens when the buffer pool is full and we need
-to make a flush, in order to be able to read or create a page. */
-UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
-
-/* variable to count the number of pages that were written from buffer
-pool to the disk */
-UNIV_INTERN ulint srv_buf_pool_flushed = 0;
-
-/** Number of buffer pool reads that led to the
-reading of a disk page */
-UNIV_INTERN ulint srv_buf_pool_reads = 0;
+UNIV_INTERN srv_stats_t	srv_stats;
 
 /* structure to pass status variables to MySQL */
-UNIV_INTERN export_struc export_vars;
-
-/* If the following is != 0 we do not allow inserts etc. This protects
-the user from forgetting the innodb_force_recovery keyword to my.cnf */
-
-UNIV_INTERN ulint	srv_force_recovery	= 0;
+UNIV_INTERN export_var_t export_vars;
+
+/** Normally 0. When nonzero, skip some phases of crash recovery,
+starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
+by SELECT or mysqldump. When this is nonzero, we do not allow any user
+modifications to the data. */
+UNIV_INTERN ulong	srv_force_recovery;
+#ifndef DBUG_OFF
+/** Inject a crash at different steps of the recovery process.
+This is for testing and debugging only. */
+UNIV_INTERN ulong	srv_force_recovery_crash;
+#endif /* !DBUG_OFF */
 
 /** Print all user-level transactions deadlocks to mysqld stderr */
 
 UNIV_INTERN my_bool	srv_print_all_deadlocks = FALSE;
 
+/** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */
+UNIV_INTERN my_bool	srv_cmp_per_index_enabled = FALSE;
+
 /* If the following is set to 1 then we do not run purge and insert buffer
 merge to completion before shutdown. If it is set to 2, do not even flush the
 buffer pool to data files at the shutdown: we effectively 'crash'
@@ -350,7 +335,9 @@ this many index pages, there are 2 ways to calculate statistics:
 * quick transient stats, that are used if persistent stats for the given
   table/index are not found in the innodb database */
 UNIV_INTERN unsigned long long	srv_stats_transient_sample_pages = 8;
+UNIV_INTERN my_bool		srv_stats_persistent = TRUE;
 UNIV_INTERN unsigned long long	srv_stats_persistent_sample_pages = 20;
+UNIV_INTERN my_bool		srv_stats_auto_recalc = TRUE;
 
 UNIV_INTERN ibool	srv_use_doublewrite_buf	= TRUE;
 
@@ -375,11 +362,6 @@ UNIV_INTERN ibool	srv_print_log_io		= FALSE;
 UNIV_INTERN ibool	srv_print_latch_waits		= FALSE;
 #endif /* UNIV_DEBUG */
 
-UNIV_INTERN ulint	srv_n_rows_inserted		= 0;
-UNIV_INTERN ulint	srv_n_rows_updated		= 0;
-UNIV_INTERN ulint	srv_n_rows_deleted		= 0;
-UNIV_INTERN ulint	srv_n_rows_read			= 0;
-
 static ulint		srv_n_rows_inserted_old		= 0;
 static ulint		srv_n_rows_updated_old		= 0;
 static ulint		srv_n_rows_deleted_old		= 0;
@@ -404,58 +386,58 @@ UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
 
 UNIV_INTERN time_t	srv_last_monitor_time;
 
-UNIV_INTERN mutex_t	srv_innodb_monitor_mutex;
+UNIV_INTERN ib_mutex_t	srv_innodb_monitor_mutex;
 
-/* Mutex for locking srv_monitor_file */
-UNIV_INTERN mutex_t	srv_monitor_file_mutex;
+/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
+UNIV_INTERN ib_mutex_t	srv_monitor_file_mutex;
 
 #ifdef UNIV_PFS_MUTEX
 # ifndef HAVE_ATOMIC_BUILTINS
 /* Key to register server_mutex with performance schema */
 UNIV_INTERN mysql_pfs_key_t	server_mutex_key;
 # endif /* !HAVE_ATOMIC_BUILTINS */
-/* Key to register srv_innodb_monitor_mutex with performance schema */
+/** Key to register srv_innodb_monitor_mutex with performance schema */
 UNIV_INTERN mysql_pfs_key_t	srv_innodb_monitor_mutex_key;
-/* Key to register srv_monitor_file_mutex with performance schema */
+/** Key to register srv_monitor_file_mutex with performance schema */
 UNIV_INTERN mysql_pfs_key_t	srv_monitor_file_mutex_key;
-/* Key to register srv_dict_tmpfile_mutex with performance schema */
+/** Key to register srv_dict_tmpfile_mutex with performance schema */
 UNIV_INTERN mysql_pfs_key_t	srv_dict_tmpfile_mutex_key;
-/* Key to register the mutex with performance schema */
+/** Key to register the mutex with performance schema */
 UNIV_INTERN mysql_pfs_key_t	srv_misc_tmpfile_mutex_key;
-/* Key to register srv_sys_t::mutex with performance schema */
+/** Key to register srv_sys_t::mutex with performance schema */
 UNIV_INTERN mysql_pfs_key_t	srv_sys_mutex_key;
-/* Key to register srv_sys_t::tasks_mutex with performance schema */
+/** Key to register srv_sys_t::tasks_mutex with performance schema */
 UNIV_INTERN mysql_pfs_key_t	srv_sys_tasks_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
 
-/* Temporary file for innodb monitor output */
+/** Temporary file for innodb monitor output */
 UNIV_INTERN FILE*	srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile.
+/** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode.
 This mutex has a very high rank; threads reserving it should not
 be holding any InnoDB latches. */
-UNIV_INTERN mutex_t	srv_dict_tmpfile_mutex;
-/* Temporary file for output from the data dictionary */
+UNIV_INTERN ib_mutex_t	srv_dict_tmpfile_mutex;
+/** Temporary file for output from the data dictionary */
 UNIV_INTERN FILE*	srv_dict_tmpfile;
-/* Mutex for locking srv_misc_tmpfile.
+/** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
 This mutex has a very low rank; threads reserving it should not
 acquire any further latches or sleep before releasing this one. */
-UNIV_INTERN mutex_t	srv_misc_tmpfile_mutex;
-/* Temporary file for miscellanous diagnostic output */
+UNIV_INTERN ib_mutex_t	srv_misc_tmpfile_mutex;
+/** Temporary file for miscellanous diagnostic output */
 UNIV_INTERN FILE*	srv_misc_tmpfile;
 
 UNIV_INTERN ulint	srv_main_thread_process_no	= 0;
 UNIV_INTERN ulint	srv_main_thread_id		= 0;
 
-/* The following count work done by srv_master_thread. */
+/* The following counts are used by the srv_master_thread. */
 
-/* Iterations of the loop bounded by 'srv_active' label. */
-static ulint   srv_main_active_loops		= 0;
-/* Iterations of the loop bounded by the 'srv_idle' label. */
-static ulint   srv_main_idle_loops		= 0;
-/* Iterations of the loop bounded by the 'srv_shutdown' label. */
-static ulint   srv_main_shutdown_loops		= 0;
-/* Log writes involving flush. */
-static ulint   srv_log_writes_and_flush		= 0;
+/** Iterations of the loop bounded by 'srv_active' label. */
+static ulint		srv_main_active_loops		= 0;
+/** Iterations of the loop bounded by the 'srv_idle' label. */
+static ulint		srv_main_idle_loops		= 0;
+/** Iterations of the loop bounded by the 'srv_shutdown' label. */
+static ulint		srv_main_shutdown_loops		= 0;
+/** Log writes involving flush. */
+static ulint		srv_log_writes_and_flush	= 0;
 
 /* This is only ever touched by the master thread. It records the
 time when the last flush of log file has happened. The master
@@ -484,7 +466,8 @@ current_time % 5 != 0. */
 } while (0)
 
 /** Test if the system mutex is owned. */
-#define srv_sys_mutex_own() mutex_own(&srv_sys->mutex)
+#define srv_sys_mutex_own() (mutex_own(&srv_sys->mutex)	\
+			     && !srv_read_only_mode)
 
 /** Release the system mutex. */
 #define srv_sys_mutex_exit() do {			\
@@ -492,7 +475,7 @@ current_time % 5 != 0. */
 } while (0)
 
 #define fetch_lock_wait_timeout(trx)			\
-	((trx)->lock.allowed_to_wait				\
+	((trx)->lock.allowed_to_wait			\
 	 ? thd_lock_wait_timeout((trx)->mysql_thd)	\
 	 : 0)
 
@@ -568,35 +551,32 @@ suspending the master thread and utility threads when they have nothing
 to do.  The thread table can be seen as an analogue to the process table
 in a traditional Unix implementation. */
 
-/** The server system */
-typedef struct srv_sys_struct	srv_sys_t;
-
 /** The server system struct */
-struct srv_sys_struct{
-	mutex_t		tasks_mutex;		/*!< variable protecting the
+struct srv_sys_t{
+	ib_mutex_t	tasks_mutex;		/*!< variable protecting the
 						tasks queue */
 	UT_LIST_BASE_NODE_T(que_thr_t)
 			tasks;			/*!< task queue */
 
-	mutex_t		mutex;			/*!< variable protecting the
-
+	ib_mutex_t	mutex;			/*!< variable protecting the
 						fields below. */
 	ulint		n_sys_threads;		/*!< size of the sys_threads
 						array */
 
-	srv_table_t*	sys_threads;		/*!< server thread table */
+	srv_slot_t*	sys_threads;		/*!< server thread table */
 
 	ulint		n_threads_active[SRV_MASTER + 1];
 						/*!< number of threads active
 						in a thread class */
 
-	ulint		activity_count;		/*!< For tracking server
+	srv_stats_t::ulint_ctr_1_t
+			activity_count;		/*!< For tracking server
 						activity */
 };
 
 #ifndef HAVE_ATOMIC_BUILTINS
 /** Mutex protecting some server global variables. */
-UNIV_INTERN mutex_t	server_mutex;
+UNIV_INTERN ib_mutex_t	server_mutex;
 #endif /* !HAVE_ATOMIC_BUILTINS */
 
 static srv_sys_t*	srv_sys	= NULL;
@@ -656,6 +636,18 @@ srv_set_io_thread_op_info(
 	srv_io_thread_op_info[i] = str;
 }
 
+/*********************************************************************//**
+Resets the info describing an i/o thread current state. */
+UNIV_INTERN
+void
+srv_reset_io_thread_op_info()
+/*=========================*/
+{
+	for (ulint i = 0; i < UT_ARR_SIZE(srv_io_thread_op_info); ++i) {
+		srv_io_thread_op_info[i] = "not started yet";
+	}
+}
+
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Validates the type of a thread table slot.
@@ -756,6 +748,8 @@ srv_suspend_thread_low(
 /*===================*/
 	srv_slot_t*	slot)	/*!< in/out: thread slot */
 {
+
+	ut_ad(!srv_read_only_mode);
 	ut_ad(srv_sys_mutex_own());
 
 	ut_ad(slot->in_use);
@@ -915,9 +909,8 @@ void
 srv_init(void)
 /*==========*/
 {
-	ulint			i;
-	ulint			srv_sys_sz;
-	ulint			n_sys_threads;
+	ulint	n_sys_threads = 0;
+	ulint	srv_sys_sz = sizeof(*srv_sys);
 
 #ifndef HAVE_ATOMIC_BUILTINS
 	mutex_create(server_mutex_key, &server_mutex, SYNC_ANY_LATCH);
@@ -926,38 +919,55 @@ srv_init(void)
 	mutex_create(srv_innodb_monitor_mutex_key,
 		     &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
 
-	/* Number of purge threads + master thread */
-	n_sys_threads = srv_n_purge_threads + 1;
+	if (!srv_read_only_mode) {
 
-	srv_sys_sz = sizeof(*srv_sys) + (n_sys_threads * sizeof(srv_slot_t));
+		/* Number of purge threads + master thread */
+		n_sys_threads = srv_n_purge_threads + 1;
+
+		srv_sys_sz += n_sys_threads * sizeof(*srv_sys->sys_threads);
+	}
 
 	srv_sys = static_cast<srv_sys_t*>(mem_zalloc(srv_sys_sz));
 
-	mutex_create(srv_sys_mutex_key, &srv_sys->mutex, SYNC_THREADS);
+	srv_sys->n_sys_threads = n_sys_threads;
 
-	mutex_create(srv_sys_tasks_mutex_key,
-		     &srv_sys->tasks_mutex, SYNC_ANY_LATCH);
+	if (!srv_read_only_mode) {
 
-	srv_sys->n_sys_threads = n_sys_threads;
-	srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1];
+		mutex_create(srv_sys_mutex_key, &srv_sys->mutex, SYNC_THREADS);
 
-	for (i = 0; i < srv_sys->n_sys_threads; i++) {
-		srv_slot_t*	slot;
+		mutex_create(srv_sys_tasks_mutex_key,
+			     &srv_sys->tasks_mutex, SYNC_ANY_LATCH);
 
-		slot = srv_sys->sys_threads + i;
+		srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1];
 
-		slot->event = os_event_create(NULL);
+		for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
+			srv_slot_t*	slot = &srv_sys->sys_threads[i];
 
-		ut_a(slot->event);
-	}
+			slot->event = os_event_create();
+
+			ut_a(slot->event);
+		}
+
+		srv_error_event = os_event_create();
 
-	srv_error_event = os_event_create(NULL);
+		srv_monitor_event = os_event_create();
 
-	srv_monitor_event = os_event_create(NULL);
+		srv_buf_dump_event = os_event_create();
 
-	srv_buf_dump_event = os_event_create("buf_dump_event");
+		UT_LIST_INIT(srv_sys->tasks);
+	}
+
+	/* page_zip_stat_per_index_mutex is acquired from:
+	1. page_zip_compress() (after SYNC_FSP)
+	2. page_zip_decompress()
+	3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired)
+	4. innodb_cmp_per_index_update(), no other latches
+	since we do not acquire any other latches while holding this mutex,
+	it can have very low level. We pick SYNC_ANY_LATCH for it. */
 
-	UT_LIST_INIT(srv_sys->tasks);
+	mutex_create(
+		page_zip_stat_per_index_mutex_key,
+		&page_zip_stat_per_index_mutex, SYNC_ANY_LATCH);
 
 	/* Create dummy indexes for infimum and supremum records */
 
@@ -987,8 +997,10 @@ srv_free(void)
 
 	trx_i_s_cache_free(trx_i_s_cache);
 
-	os_event_free(srv_buf_dump_event);
-	srv_buf_dump_event = NULL;
+	if (!srv_read_only_mode) {
+		os_event_free(srv_buf_dump_event);
+		srv_buf_dump_event = NULL;
+	}
 }
 
 /*********************************************************************//**
@@ -1010,10 +1022,9 @@ srv_general_init(void)
 }
 
 /*********************************************************************//**
-Normalizes init parameter values to use units we use inside InnoDB.
-@return	DB_SUCCESS or error code */
+Normalizes init parameter values to use units we use inside InnoDB. */
 static
-ulint
+void
 srv_normalize_init_values(void)
 /*===========================*/
 {
@@ -1035,28 +1046,19 @@ srv_normalize_init_values(void)
 	srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
 
 	srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
-
-	return(DB_SUCCESS);
 }
 
 /*********************************************************************//**
-Boots the InnoDB server.
-@return	DB_SUCCESS or error code */
+Boots the InnoDB server. */
 UNIV_INTERN
-ulint
+void
 srv_boot(void)
 /*==========*/
 {
-	ulint	err;
-
 	/* Transform the init parameter values given by MySQL to
 	use units we use inside InnoDB: */
 
-	err = srv_normalize_init_values();
-
-	if (err != DB_SUCCESS) {
-		return(err);
-	}
+	srv_normalize_init_values();
 
 	/* Initialize synchronization primitives, memory management, and thread
 	local storage */
@@ -1066,8 +1068,7 @@ srv_boot(void)
 	/* Initialize this module */
 
 	srv_init();
-
-	return(DB_SUCCESS);
+	srv_mon_create();
 }
 
 /******************************************************************//**
@@ -1090,10 +1091,10 @@ srv_refresh_innodb_monitor_stats(void)
 
 	buf_refresh_io_stats_all();
 
-	srv_n_rows_inserted_old = srv_n_rows_inserted;
-	srv_n_rows_updated_old = srv_n_rows_updated;
-	srv_n_rows_deleted_old = srv_n_rows_deleted;
-	srv_n_rows_read_old = srv_n_rows_read;
+	srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
+	srv_n_rows_updated_old = srv_stats.n_rows_updated;
+	srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
+	srv_n_rows_read_old = srv_stats.n_rows_read;
 
 	mutex_exit(&srv_innodb_monitor_mutex);
 }
@@ -1158,7 +1159,7 @@ srv_printf_innodb_monitor(
 
 	mutex_enter(&dict_foreign_err_mutex);
 
-	if (ftell(dict_foreign_err_file) != 0L) {
+	if (!srv_read_only_mode && ftell(dict_foreign_err_file) != 0L) {
 		fputs("------------------------\n"
 		      "LATEST FOREIGN KEY ERROR\n"
 		      "------------------------\n", file);
@@ -1271,26 +1272,26 @@ srv_printf_innodb_monitor(
 		"Number of rows inserted " ULINTPF
 		", updated " ULINTPF ", deleted " ULINTPF
 		", read " ULINTPF "\n",
-		srv_n_rows_inserted,
-		srv_n_rows_updated,
-		srv_n_rows_deleted,
-		srv_n_rows_read);
+		(ulint) srv_stats.n_rows_inserted,
+		(ulint) srv_stats.n_rows_updated,
+		(ulint) srv_stats.n_rows_deleted,
+		(ulint) srv_stats.n_rows_read);
 	fprintf(file,
 		"%.2f inserts/s, %.2f updates/s,"
 		" %.2f deletes/s, %.2f reads/s\n",
-		(srv_n_rows_inserted - srv_n_rows_inserted_old)
+		((ulint) srv_stats.n_rows_inserted - srv_n_rows_inserted_old)
 		/ time_elapsed,
-		(srv_n_rows_updated - srv_n_rows_updated_old)
+		((ulint) srv_stats.n_rows_updated - srv_n_rows_updated_old)
 		/ time_elapsed,
-		(srv_n_rows_deleted - srv_n_rows_deleted_old)
+		((ulint) srv_stats.n_rows_deleted - srv_n_rows_deleted_old)
 		/ time_elapsed,
-		(srv_n_rows_read - srv_n_rows_read_old)
+		((ulint) srv_stats.n_rows_read - srv_n_rows_read_old)
 		/ time_elapsed);
 
-	srv_n_rows_inserted_old = srv_n_rows_inserted;
-	srv_n_rows_updated_old = srv_n_rows_updated;
-	srv_n_rows_deleted_old = srv_n_rows_deleted;
-	srv_n_rows_read_old = srv_n_rows_read;
+	srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
+	srv_n_rows_updated_old = srv_stats.n_rows_updated;
+	srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
+	srv_n_rows_read_old = srv_stats.n_rows_read;
 
 	fputs("----------------------------\n"
 	      "END OF INNODB MONITOR OUTPUT\n"
@@ -1308,89 +1309,168 @@ void
 srv_export_innodb_status(void)
 /*==========================*/
 {
-	buf_pool_stat_t	stat;
-	ulint		LRU_len;
-	ulint		free_len;
-	ulint		flush_list_len;
+	buf_pool_stat_t		stat;
+	buf_pools_list_size_t	buf_pools_list_size;
+	ulint			LRU_len;
+	ulint			free_len;
+	ulint			flush_list_len;
 
 	buf_get_total_stat(&stat);
 	buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
+	buf_get_total_list_size_in_bytes(&buf_pools_list_size);
 
 	mutex_enter(&srv_innodb_monitor_mutex);
 
-	export_vars.innodb_data_pending_reads
-		= os_n_pending_reads;
-	export_vars.innodb_data_pending_writes
-		= os_n_pending_writes;
-	export_vars.innodb_data_pending_fsyncs
-		= fil_n_pending_log_flushes
+	export_vars.innodb_data_pending_reads =
+		os_n_pending_reads;
+
+	export_vars.innodb_data_pending_writes =
+		os_n_pending_writes;
+
+	export_vars.innodb_data_pending_fsyncs =
+		fil_n_pending_log_flushes
 		+ fil_n_pending_tablespace_flushes;
+
 	export_vars.innodb_data_fsyncs = os_n_fsyncs;
-	export_vars.innodb_data_read = srv_data_read;
+
+	export_vars.innodb_data_read = srv_stats.data_read;
+
 	export_vars.innodb_data_reads = os_n_file_reads;
+
 	export_vars.innodb_data_writes = os_n_file_writes;
-	export_vars.innodb_data_written = srv_data_written;
+
+	export_vars.innodb_data_written = srv_stats.data_written;
+
 	export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
-	export_vars.innodb_buffer_pool_write_requests
-		= srv_buf_pool_write_requests;
-	export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
-	export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
-	export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
-	export_vars.innodb_buffer_pool_read_ahead_rnd
-		= stat.n_ra_pages_read_rnd;
-	export_vars.innodb_buffer_pool_read_ahead
-		= stat.n_ra_pages_read;
-	export_vars.innodb_buffer_pool_read_ahead_evicted
-		= stat.n_ra_pages_evicted;
+
+	export_vars.innodb_buffer_pool_write_requests =
+		srv_stats.buf_pool_write_requests;
+
+	export_vars.innodb_buffer_pool_wait_free =
+		srv_stats.buf_pool_wait_free;
+
+	export_vars.innodb_buffer_pool_pages_flushed =
+		srv_stats.buf_pool_flushed;
+
+	export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads;
+
+	export_vars.innodb_buffer_pool_read_ahead_rnd =
+		stat.n_ra_pages_read_rnd;
+
+	export_vars.innodb_buffer_pool_read_ahead =
+		stat.n_ra_pages_read;
+
+	export_vars.innodb_buffer_pool_read_ahead_evicted =
+		stat.n_ra_pages_evicted;
+
 	export_vars.innodb_buffer_pool_pages_data = LRU_len;
+
+	export_vars.innodb_buffer_pool_bytes_data =
+		buf_pools_list_size.LRU_bytes
+		+ buf_pools_list_size.unzip_LRU_bytes;
+
 	export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
+
+	export_vars.innodb_buffer_pool_bytes_dirty =
+		buf_pools_list_size.flush_list_bytes;
+
 	export_vars.innodb_buffer_pool_pages_free = free_len;
+
 #ifdef UNIV_DEBUG
-	export_vars.innodb_buffer_pool_pages_latched
-		= buf_get_latched_pages_number();
+	export_vars.innodb_buffer_pool_pages_latched =
+		buf_get_latched_pages_number();
 #endif /* UNIV_DEBUG */
 	export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
 
-	export_vars.innodb_buffer_pool_pages_misc
-		= buf_pool_get_n_pages() - LRU_len - free_len;
+	export_vars.innodb_buffer_pool_pages_misc =
+		buf_pool_get_n_pages() - LRU_len - free_len;
+
 #ifdef HAVE_ATOMIC_BUILTINS
 	export_vars.innodb_have_atomic_builtins = 1;
 #else
 	export_vars.innodb_have_atomic_builtins = 0;
 #endif
 	export_vars.innodb_page_size = UNIV_PAGE_SIZE;
-	export_vars.innodb_log_waits = srv_log_waits;
-	export_vars.innodb_os_log_written = srv_os_log_written;
+
+	export_vars.innodb_log_waits = srv_stats.log_waits;
+
+	export_vars.innodb_os_log_written = srv_stats.os_log_written;
+
 	export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
+
 	export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
-	export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
-	export_vars.innodb_log_write_requests = srv_log_write_requests;
-	export_vars.innodb_log_writes = srv_log_writes;
-	export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
-	export_vars.innodb_dblwr_writes = srv_dblwr_writes;
+
+	export_vars.innodb_os_log_pending_writes =
+		srv_stats.os_log_pending_writes;
+
+	export_vars.innodb_log_write_requests = srv_stats.log_write_requests;
+
+	export_vars.innodb_log_writes = srv_stats.log_writes;
+
+	export_vars.innodb_dblwr_pages_written =
+		srv_stats.dblwr_pages_written;
+
+	export_vars.innodb_dblwr_writes = srv_stats.dblwr_writes;
+
 	export_vars.innodb_pages_created = stat.n_pages_created;
+
 	export_vars.innodb_pages_read = stat.n_pages_read;
+
 	export_vars.innodb_pages_written = stat.n_pages_written;
-	export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
-	export_vars.innodb_row_lock_current_waits
-		= srv_n_lock_wait_current_count;
-	export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
-	if (srv_n_lock_wait_count > 0) {
+
+	export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count;
+
+	export_vars.innodb_row_lock_current_waits =
+		srv_stats.n_lock_wait_current_count;
+
+	export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000;
+
+	if (srv_stats.n_lock_wait_count > 0) {
+
 		export_vars.innodb_row_lock_time_avg = (ulint)
-			(srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
+			(srv_stats.n_lock_wait_time
+			 / 1000 / srv_stats.n_lock_wait_count);
+
 	} else {
 		export_vars.innodb_row_lock_time_avg = 0;
 	}
-	export_vars.innodb_row_lock_time_max
-		= srv_n_lock_max_wait_time / 1000;
-	export_vars.innodb_rows_read = srv_n_rows_read;
-	export_vars.innodb_rows_inserted = srv_n_rows_inserted;
-	export_vars.innodb_rows_updated = srv_n_rows_updated;
-	export_vars.innodb_rows_deleted = srv_n_rows_deleted;
+
+	export_vars.innodb_row_lock_time_max =
+		lock_sys->n_lock_max_wait_time / 1000;
+
+	export_vars.innodb_rows_read = srv_stats.n_rows_read;
+
+	export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted;
+
+	export_vars.innodb_rows_updated = srv_stats.n_rows_updated;
+
+	export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted;
+
 	export_vars.innodb_num_open_files = fil_n_file_opened;
-	export_vars.innodb_truncated_status_writes = srv_truncated_status_writes;
+
+	export_vars.innodb_truncated_status_writes =
+		srv_truncated_status_writes;
+
 	export_vars.innodb_available_undo_logs = srv_available_undo_logs;
 
+#ifdef UNIV_DEBUG
+	if (purge_sys->done.trx_no == 0
+	    || trx_sys->rw_max_trx_id < purge_sys->done.trx_no - 1) {
+		export_vars.innodb_purge_trx_id_age = 0;
+	} else {
+		export_vars.innodb_purge_trx_id_age =
+		  trx_sys->rw_max_trx_id - purge_sys->done.trx_no + 1;
+	}
+
+	if (!purge_sys->view
+	    || trx_sys->rw_max_trx_id < purge_sys->view->up_limit_id) {
+		export_vars.innodb_purge_view_trx_id_age = 0;
+	} else {
+		export_vars.innodb_purge_view_trx_id_age =
+		  trx_sys->rw_max_trx_id - purge_sys->view->up_limit_id;
+	}
+#endif /* UNIV_DEBUG */
+
 	mutex_exit(&srv_innodb_monitor_mutex);
 }
 
@@ -1414,14 +1494,16 @@ DECLARE_THREAD(srv_monitor_thread)(
 	ulint		mutex_skipped;
 	ibool		last_srv_print_monitor;
 
+	ut_ad(!srv_read_only_mode);
+
 #ifdef UNIV_DEBUG_THREAD_CREATION
 	fprintf(stderr, "Lock timeout thread starts, id %lu\n",
 		os_thread_pf(os_thread_get_curr_id()));
-#endif
+#endif /* UNIV_DEBUG_THREAD_CREATION */
 
 #ifdef UNIV_PFS_THREAD
 	pfs_register_thread(srv_monitor_thread_key);
-#endif
+#endif /* UNIV_PFS_THREAD */
 	srv_monitor_active = TRUE;
 
 	UT_NOT_USED(arg);
@@ -1470,7 +1552,10 @@ loop:
 		}
 
 
-		if (srv_innodb_status) {
+		/* We don't create the temp files or associated
+		mutexes in read-only-mode */
+
+		if (!srv_read_only_mode && srv_innodb_status) {
 			mutex_enter(&srv_monitor_file_mutex);
 			rewind(srv_monitor_file);
 			if (!srv_printf_innodb_monitor(srv_monitor_file,
@@ -1587,16 +1672,18 @@ DECLARE_THREAD(srv_error_monitor_thread)(
 	const void*	sema		= NULL;
 	const void*	old_sema	= NULL;
 
+	ut_ad(!srv_read_only_mode);
+
 	old_lsn = srv_start_lsn;
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
 	fprintf(stderr, "Error monitor thread starts, id %lu\n",
 		os_thread_pf(os_thread_get_curr_id()));
-#endif
+#endif /* UNIV_DEBUG_THREAD_CREATION */
 
 #ifdef UNIV_PFS_THREAD
 	pfs_register_thread(srv_error_monitor_thread_key);
-#endif
+#endif /* UNIV_PFS_THREAD */
 	srv_error_monitor_active = TRUE;
 
 loop:
@@ -1630,9 +1717,6 @@ loop:
 	eviction policy. */
 	buf_LRU_stat_update();
 
-	/* Update the statistics collected for flush rate policy. */
-	buf_flush_stat_update();
-
 	/* In case mutex_exit is not a memory barrier, it is
 	theoretically possible some threads are left waiting though
 	the semaphore is already released. Wake up those threads: */
@@ -1690,7 +1774,7 @@ void
 srv_inc_activity_count(void)
 /*========================*/
 {
-	++srv_sys->activity_count;
+	srv_sys->activity_count.inc();
 }
 
 /**********************************************************************//**
@@ -1703,12 +1787,15 @@ srv_thread_type
 srv_get_active_thread_type(void)
 /*============================*/
 {
-	ulint	i;
 	srv_thread_type ret = SRV_NONE;
 
+	if (srv_read_only_mode) {
+		return(SRV_NONE);
+	}
+
 	srv_sys_mutex_enter();
 
-	for (i = SRV_WORKER; i <= SRV_MASTER; ++i) {
+	for (ulint i = SRV_WORKER; i <= SRV_MASTER; ++i) {
 		if (srv_sys->n_threads_active[i] != 0) {
 			ret = static_cast<srv_thread_type>(i);
 			break;
@@ -1720,6 +1807,7 @@ srv_get_active_thread_type(void)
 	/* Check only on shutdown. */
 	if (ret == SRV_NONE
 	    && srv_shutdown_state != SRV_SHUTDOWN_NONE
+	    && trx_purge_state() != PURGE_STATE_DISABLED
 	    && trx_purge_state() != PURGE_STATE_EXIT) {
 
 		ret = SRV_PURGE;
@@ -1739,20 +1827,25 @@ srv_any_background_threads_are_active(void)
 {
 	const char*	thread_active = NULL;
 
-	if (srv_error_monitor_active) {
+	if (srv_read_only_mode) {
+		return(NULL);
+	} else if (srv_error_monitor_active) {
 		thread_active = "srv_error_monitor_thread";
-	} else if (srv_lock_timeout_active) {
+	} else if (lock_sys->timeout_thread_active) {
 		thread_active = "srv_lock_timeout thread";
 	} else if (srv_monitor_active) {
 		thread_active = "srv_monitor_thread";
 	} else if (srv_buf_dump_thread_active) {
 		thread_active = "buf_dump_thread";
+	} else if (srv_dict_stats_thread_active) {
+		thread_active = "dict_stats_thread";
 	}
 
 	os_event_set(srv_error_event);
 	os_event_set(srv_monitor_event);
-	os_event_set(srv_timeout_event);
 	os_event_set(srv_buf_dump_event);
+	os_event_set(lock_sys->timeout_event);
+	os_event_set(dict_stats_event);
 
 	return(thread_active);
 }
@@ -1768,6 +1861,10 @@ void
 srv_active_wake_master_thread(void)
 /*===============================*/
 {
+	if (srv_read_only_mode) {
+		return;
+	}
+
 	ut_ad(!srv_sys_mutex_own());
 
 	srv_inc_activity_count();
@@ -1869,7 +1966,8 @@ srv_sync_log_buffer_in_background(void)
 	time_t	current_time = time(NULL);
 
 	srv_main_thread_op_info = "flushing log";
-	if (difftime(current_time, srv_last_log_flush_time) >= 1) {
+	if (difftime(current_time, srv_last_log_flush_time)
+	    >= srv_flush_log_at_timeout) {
 		log_buffer_sync_in_background(TRUE);
 		srv_last_log_flush_time = current_time;
 		srv_log_writes_and_flush++;
@@ -1986,7 +2084,7 @@ srv_master_do_active_tasks(void)
 	/* Do an ibuf merge */
 	srv_main_thread_op_info = "doing insert buffer merge";
 	counter_time = ut_time_us(NULL);
-	ibuf_contract_in_background(FALSE);
+	ibuf_contract_in_background(0, FALSE);
 	MONITOR_INC_TIME_IN_MICRO_SECS(
 		MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
 
@@ -2078,7 +2176,7 @@ srv_master_do_idle_tasks(void)
 	/* Do an ibuf merge */
 	counter_time = ut_time_us(NULL);
 	srv_main_thread_op_info = "doing insert buffer merge";
-	ibuf_contract_in_background(TRUE);
+	ibuf_contract_in_background(0, TRUE);
 	MONITOR_INC_TIME_IN_MICRO_SECS(
 		MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
 
@@ -2125,6 +2223,8 @@ srv_master_do_shutdown_tasks(
 	ulint		n_bytes_merged = 0;
 	ulint		n_tables_to_drop = 0;
 
+	ut_ad(!srv_read_only_mode);
+
 	++srv_main_shutdown_loops;
 
 	ut_a(srv_shutdown_state > 0);
@@ -2152,7 +2252,7 @@ srv_master_do_shutdown_tasks(
 
 	/* Do an ibuf merge */
 	srv_main_thread_op_info = "doing insert buffer merge";
-	n_bytes_merged = ibuf_contract_in_background(TRUE);
+	n_bytes_merged = ibuf_contract_in_background(0, TRUE);
 
 	/* Flush logs if needed */
 	srv_sync_log_buffer_in_background();
@@ -2200,14 +2300,16 @@ DECLARE_THREAD(srv_master_thread)(
 	ulint		old_activity_count = srv_get_activity_count();
 	ib_time_t	last_print_time;
 
+	ut_ad(!srv_read_only_mode);
+
 #ifdef UNIV_DEBUG_THREAD_CREATION
 	fprintf(stderr, "Master thread starts, id %lu\n",
 		os_thread_pf(os_thread_get_curr_id()));
-#endif
+#endif /* UNIV_DEBUG_THREAD_CREATION */
 
 #ifdef UNIV_PFS_THREAD
 	pfs_register_thread(srv_master_thread_key);
-#endif
+#endif /* UNIV_PFS_THREAD */
 
 	srv_main_thread_process_no = os_proc_get_number();
 	srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
@@ -2300,6 +2402,7 @@ srv_task_execute(void)
 {
 	que_thr_t*	thr = NULL;
 
+	ut_ad(!srv_read_only_mode);
 	ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
 
 	mutex_enter(&srv_sys->tasks_mutex);
@@ -2338,6 +2441,7 @@ DECLARE_THREAD(srv_worker_thread)(
 {
 	srv_slot_t*	slot;
 
+	ut_ad(!srv_read_only_mode);
 	ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
@@ -2418,6 +2522,7 @@ srv_do_purge(
 	ulint		old_activity_count = srv_get_activity_count();
 
 	ut_a(n_threads > 0);
+	ut_ad(!srv_read_only_mode);
 
 	/* Purge until there are no more records to purge and there is
 	no change in configuration or server state. If the user has
@@ -2464,7 +2569,7 @@ srv_do_purge(
 		n_pages_purged = trx_purge(
 			n_use_threads, srv_purge_batch_size, false);
 
-		if (!(count++ % TRX_SYS_N_RSEGS) || n_pages_purged == 0) {
+		if (!(count++ % TRX_SYS_N_RSEGS)) {
 			/* Force a truncate of the history list. */
 			trx_purge(1, srv_purge_batch_size, true);
 		}
@@ -2487,14 +2592,9 @@ srv_purge_coordinator_suspend(
 	ulint		rseg_history_len)	/*!< in: history list length
 						before last purge */
 {
+	ut_ad(!srv_read_only_mode);
 	ut_a(slot->type == SRV_PURGE);
 
-	rw_lock_x_lock(&purge_sys->latch);
-
-	purge_sys->running = false;
-
-	rw_lock_x_unlock(&purge_sys->latch);
-
 	bool		stop = false;
 
 	/** Maximum wait time on the purge event, in micro-seconds. */
@@ -2504,6 +2604,12 @@ srv_purge_coordinator_suspend(
 		ulint		ret;
 		ib_int64_t	sig_count = srv_suspend_thread(slot);
 
+		rw_lock_x_lock(&purge_sys->latch);
+
+		purge_sys->running = false;
+
+		rw_lock_x_unlock(&purge_sys->latch);
+
 		/* We don't wait right away on the the non-timed wait because
 		we want to signal the thread that wants to suspend purge. */
 
@@ -2514,8 +2620,8 @@ srv_purge_coordinator_suspend(
 			ret = os_event_wait_time_low(
 				slot->event, SRV_PURGE_MAX_TIMEOUT, sig_count);
 		} else {
-			/* We don't want to waste time waiting if the
-			history list has increased by the time we get here
+			/* We don't want to waste time waiting, if the
+			history list increased by the time we got here,
 			unless purge has been stopped. */
 			ret = 0;
 		}
@@ -2582,6 +2688,7 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
 	srv_slot_t*	slot;
 	ulint           n_total_purged = ULINT_UNDEFINED;
 
+	ut_ad(!srv_read_only_mode);
 	ut_a(srv_n_purge_threads >= 1);
 	ut_a(trx_purge_state() == PURGE_STATE_INIT);
 	ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
@@ -2689,6 +2796,7 @@ srv_que_task_enqueue_low(
 /*=====================*/
 	que_thr_t*	thr)	/*!< in: query thread */
 {
+	ut_ad(!srv_read_only_mode);
 	mutex_enter(&srv_sys->tasks_mutex);
 
 	UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
@@ -2708,6 +2816,8 @@ srv_get_task_queue_length(void)
 {
 	ulint	n_tasks;
 
+	ut_ad(!srv_read_only_mode);
+
 	mutex_enter(&srv_sys->tasks_mutex);
 
 	n_tasks = UT_LIST_GET_LEN(srv_sys->tasks);
@@ -2724,6 +2834,8 @@ void
 srv_purge_wakeup(void)
 /*==================*/
 {
+	ut_ad(!srv_read_only_mode);
+
 	if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
 
 		srv_release_threads(SRV_PURGE, 1);
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 9d1600cff23..efe9f094c0d 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -71,6 +71,7 @@ Created 2/16/1996 Heikki Tuuri
 # include "buf0rea.h"
 # include "dict0boot.h"
 # include "dict0load.h"
+# include "dict0stats_bg.h"
 # include "que0que.h"
 # include "usr0sess.h"
 # include "lock0lock.h"
@@ -87,9 +88,9 @@ Created 2/16/1996 Heikki Tuuri
 # include "row0row.h"
 # include "row0mysql.h"
 # include "btr0pcur.h"
-# include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
-# include "zlib.h" /* for ZLIB_VERSION */
-# include "buf0dblwr.h"
+# include "os0sync.h"
+# include "zlib.h"
+# include "ut0crc32.h"
 
 /** Log sequence number immediately after startup */
 UNIV_INTERN lsn_t	srv_start_lsn;
@@ -188,6 +189,63 @@ srv_parse_megabytes(
 }
 
 /*********************************************************************//**
+Check if a file can be opened in read-write mode.
+@return	true if it doesn't exist or can be opened in rw mode. */
+static
+bool
+srv_file_check_mode(
+/*================*/
+	const char*	name)		/*!< in: filename to check */
+{
+	os_file_stat_t	stat;
+
+	memset(&stat, 0x0, sizeof(stat));
+
+	dberr_t		err = os_file_get_status(name, &stat, true);
+
+	if (err == DB_FAIL) {
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"os_file_get_status() failed on '%s'. Can't determine "
+			"file permissions", name);
+
+		return(false);
+
+	} else if (err == DB_SUCCESS) {
+
+		/* Note: stat.rw_perm is only valid of files */
+
+		if (stat.type == OS_FILE_TYPE_FILE) {
+			if (!stat.rw_perm) {
+
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"%s can't be opened in %s mode",
+					srv_read_only_mode
+					? "read-write" : "read",
+					name);
+
+				return(false);
+			}
+		} else {
+			/* Not a regular file, bail out. */
+
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"'%s' not a regular file.", name);
+
+			return(false);
+		}
+	} else {
+
+		/* This is OK. If the file create fails on RO media, there
+		is nothing we can do. */
+
+		ut_a(err == DB_NOT_FOUND);
+	}
+
+	return(true);
+}
+
+/*********************************************************************//**
 Reads the data files and their sizes from a character string given in
 the .cnf file.
 @return	TRUE if ok, FALSE on parse error */
@@ -376,79 +434,6 @@ srv_parse_data_file_paths_and_sizes(
 }
 
 /*********************************************************************//**
-Reads log group home directories from a character string given in
-the .cnf file.
-@return	TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_log_group_home_dirs(
-/*==========================*/
-	char*	str)	/*!< in/out: character string */
-{
-	char*	input_str;
-	char*	path;
-	ulint	i	= 0;
-
-	srv_log_group_home_dirs = NULL;
-
-	input_str = str;
-
-	/* First calculate the number of directories and check syntax:
-	path;path;... */
-
-	while (*str != '\0') {
-		path = str;
-
-		while (*str != ';' && *str != '\0') {
-			str++;
-		}
-
-		i++;
-
-		if (*str == ';') {
-			str++;
-		} else if (*str != '\0') {
-
-			return(FALSE);
-		}
-	}
-
-	if (i != 1) {
-		/* If innodb_log_group_home_dir was defined it must
-		contain exactly one path definition under current MySQL */
-
-		return(FALSE);
-	}
-
-	srv_log_group_home_dirs = static_cast<char**>(
-		malloc(i * sizeof *srv_log_group_home_dirs));
-
-	/* Then store the actual values to our array */
-
-	str = input_str;
-	i = 0;
-
-	while (*str != '\0') {
-		path = str;
-
-		while (*str != ';' && *str != '\0') {
-			str++;
-		}
-
-		if (*str == ';') {
-			*str = '\0';
-			str++;
-		}
-
-		srv_log_group_home_dirs[i] = path;
-
-		i++;
-	}
-
-	return(TRUE);
-}
-
-/*********************************************************************//**
 Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
 and srv_parse_log_group_home_dirs(). */
 UNIV_INTERN
@@ -462,8 +447,6 @@ srv_free_paths_and_sizes(void)
 	srv_data_file_sizes = NULL;
 	free(srv_data_file_is_raw_partition);
 	srv_data_file_is_raw_partition = NULL;
-	free(srv_log_group_home_dirs);
-	srv_log_group_home_dirs = NULL;
 }
 
 #ifndef UNIV_HOTBACKUP
@@ -526,175 +509,230 @@ srv_normalize_path_for_win(
 
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
-Creates or opens the log files and closes them.
+Creates a log file.
 @return	DB_SUCCESS or error code */
-static
-ulint
-open_or_create_log_file(
-/*====================*/
-	ibool	create_new_db,		/*!< in: TRUE if we should create a
-					new database */
-	ibool*	log_file_created,	/*!< out: TRUE if new log file
-					created */
-	ibool	log_file_has_been_opened,/*!< in: TRUE if a log file has been
-					opened before: then it is an error
-					to try to create another log file */
-	ulint	k,			/*!< in: log group number */
-	ulint	i)			/*!< in: log file number in group */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+create_log_file(
+/*============*/
+	os_file_t*	file,	/*!< out: file handle */
+	const char*	name)	/*!< in: log file name */
 {
 	ibool		ret;
-	os_offset_t	size;
-	char		name[10000];
-	ulint		dirnamelen;
 
-	UT_NOT_USED(create_new_db);
+	*file = os_file_create(
+		innodb_file_log_key, name,
+		OS_FILE_CREATE, OS_FILE_NORMAL, OS_LOG_FILE, &ret);
 
-	*log_file_created = FALSE;
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Setting log file %s size to %lu MB",
+		name, (ulong) srv_log_file_size
+		>> (20 - UNIV_PAGE_SIZE_SHIFT));
 
-	srv_normalize_path_for_win(srv_log_group_home_dirs[k]);
+	ret = os_file_set_size(name, *file,
+			       (os_offset_t) srv_log_file_size
+			       << UNIV_PAGE_SIZE_SHIFT);
+	if (!ret) {
+		ib_logf(IB_LOG_LEVEL_ERROR, "Error in creating %s", name);
+		return(DB_ERROR);
+	}
 
-	dirnamelen = strlen(srv_log_group_home_dirs[k]);
-	ut_a(dirnamelen < (sizeof name) - 10 - sizeof "ib_logfile");
-	memcpy(name, srv_log_group_home_dirs[k], dirnamelen);
+	ret = os_file_close(*file);
+	ut_a(ret);
 
-	/* Add a path separator if needed. */
-	if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
-		name[dirnamelen++] = SRV_PATH_SEPARATOR;
+	return(DB_SUCCESS);
+}
+
+/** Initial number of the first redo log file */
+#define INIT_LOG_FILE0	(SRV_N_LOG_FILES_MAX + 1)
+
+#ifdef DBUG_OFF
+# define RECOVERY_CRASH(x) do {} while(0)
+#else
+# define RECOVERY_CRASH(x) do {						\
+	if (srv_force_recovery_crash == x) {				\
+		fprintf(stderr, "innodb_force_recovery_crash=%lu\n",	\
+			srv_force_recovery_crash);			\
+		fflush(stderr);						\
+		exit(3);						\
+	}								\
+} while (0)
+#endif
+
+/*********************************************************************//**
+Creates all log files.
+@return	DB_SUCCESS or error code */
+static
+dberr_t
+create_log_files(
+/*=============*/
+	char*	logfilename,	/*!< in/out: buffer for log file name */
+	size_t	dirnamelen,	/*!< in: length of the directory path */
+	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
+	char*&	logfile0)	/*!< out: name of the first log file */
+{
+	if (srv_read_only_mode) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Cannot create log files in read-only mode");
+		return(DB_READ_ONLY);
 	}
 
-	sprintf(name + dirnamelen, "%s%lu", "ib_logfile", (ulong) i);
+	/* Remove any old log files. */
+	for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
+		sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
 
-	files[i] = os_file_create(innodb_file_log_key, name,
-				  OS_FILE_CREATE, OS_FILE_NORMAL,
-				  OS_LOG_FILE, &ret);
-	if (ret == FALSE) {
-		if (os_file_get_last_error(FALSE) != OS_FILE_ALREADY_EXISTS
-#ifdef UNIV_AIX
-		    /* AIX 5.1 after security patch ML7 may have errno set
-		    to 0 here, which causes our function to return 100;
-		    work around that AIX problem */
-		    && os_file_get_last_error(FALSE) != 100
+		/* Ignore errors about non-existent files or files
+		that cannot be removed. The create_log_file() will
+		return an error when the file exists. */
+#ifdef __WIN__
+		DeleteFile((LPCTSTR) logfilename);
+#else
+		unlink(logfilename);
 #endif
-		    ) {
-			fprintf(stderr,
-				"InnoDB: Error in creating"
-				" or opening %s\n", name);
+		/* Crashing after deleting the first
+		file should be recoverable. The buffer
+		pool was clean, and we can simply create
+		all log files from the scratch. */
+		RECOVERY_CRASH(6);
+	}
 
-			return(DB_ERROR);
-		}
+	ut_ad(!buf_pool_check_no_pending_io());
 
-		files[i] = os_file_create(innodb_file_log_key, name,
-					  OS_FILE_OPEN, OS_FILE_AIO,
-					  OS_LOG_FILE, &ret);
-		if (!ret) {
-			fprintf(stderr,
-				"InnoDB: Error in opening %s\n", name);
+	RECOVERY_CRASH(7);
 
-			return(DB_ERROR);
+	for (unsigned i = 0; i < srv_n_log_files; i++) {
+		sprintf(logfilename + dirnamelen,
+			"ib_logfile%u", i ? i : INIT_LOG_FILE0);
+
+		dberr_t err = create_log_file(&files[i], logfilename);
+
+		if (err != DB_SUCCESS) {
+			return(err);
 		}
+	}
 
-		size = os_file_get_size(files[i]);
-		ut_a(size != (os_offset_t) -1);
+	RECOVERY_CRASH(8);
 
-		if (UNIV_UNLIKELY(size != (os_offset_t) srv_log_file_size
-				  << UNIV_PAGE_SIZE_SHIFT)) {
+	/* We did not create the first log file initially as
+	ib_logfile0, so that crash recovery cannot find it until it
+	has been completed and renamed. */
+	sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
 
-			fprintf(stderr,
-				"InnoDB: Error: log file %s is"
-				" of different size "UINT64PF" bytes\n"
-				"InnoDB: than specified in the .cnf"
-				" file "UINT64PF" bytes!\n",
-				name, size,
-				(os_offset_t) srv_log_file_size
-				<< UNIV_PAGE_SIZE_SHIFT);
+	fil_space_create(
+		logfilename, SRV_LOG_SPACE_FIRST_ID,
+		fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
+		FIL_LOG);
+	ut_a(fil_validate());
 
-			return(DB_ERROR);
+	logfile0 = fil_node_create(
+		logfilename, (ulint) srv_log_file_size,
+		SRV_LOG_SPACE_FIRST_ID, FALSE);
+	ut_a(logfile0);
+
+	for (unsigned i = 1; i < srv_n_log_files; i++) {
+		sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
+
+		if (!fil_node_create(
+			    logfilename,
+			    (ulint) srv_log_file_size,
+			    SRV_LOG_SPACE_FIRST_ID, FALSE)) {
+			ut_error;
 		}
-	} else {
-		*log_file_created = TRUE;
+	}
 
-		ut_print_timestamp(stderr);
+	log_group_init(0, srv_n_log_files,
+		       srv_log_file_size * UNIV_PAGE_SIZE,
+		       SRV_LOG_SPACE_FIRST_ID,
+		       SRV_LOG_SPACE_FIRST_ID + 1);
 
-		fprintf(stderr,
-			" InnoDB: Log file %s did not exist:"
-			" new to be created\n",
-			name);
-		if (log_file_has_been_opened) {
+	fil_open_log_and_system_tablespace_files();
 
-			return(DB_ERROR);
-		}
+	/* Create a log checkpoint. */
+	mutex_enter(&log_sys->mutex);
+	ut_d(recv_no_log_write = FALSE);
+	recv_reset_logs(lsn);
+	mutex_exit(&log_sys->mutex);
 
-		fprintf(stderr, "InnoDB: Setting log file %s size to %lu MB\n",
-			name, (ulong) srv_log_file_size
-			>> (20 - UNIV_PAGE_SIZE_SHIFT));
+	return(DB_SUCCESS);
+}
 
-		fprintf(stderr,
-			"InnoDB: Database physically writes the file"
-			" full: wait...\n");
+/*********************************************************************//**
+Renames the first log file. */
+static
+void
+create_log_files_rename(
+/*====================*/
+	char*	logfilename,	/*!< in/out: buffer for log file name */
+	size_t	dirnamelen,	/*!< in: length of the directory path */
+	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
+	char*	logfile0)	/*!< in/out: name of the first log file */
+{
+	/* If innodb_flush_method=O_DSYNC,
+	we need to explicitly flush the log buffers. */
+	fil_flush(SRV_LOG_SPACE_FIRST_ID);
+	/* Close the log files, so that we can rename
+	the first one. */
+	fil_close_log_files(false);
 
-		ret = os_file_set_size(name, files[i],
-				       (os_offset_t) srv_log_file_size
-				       << UNIV_PAGE_SIZE_SHIFT);
-		if (!ret) {
-			fprintf(stderr,
-				"InnoDB: Error in creating %s:"
-				" probably out of disk space\n",
-				name);
+	/* Rename the first log file, now that a log
+	checkpoint has been created. */
+	sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
 
-			return(DB_ERROR);
-		}
-	}
+	RECOVERY_CRASH(9);
 
-	ret = os_file_close(files[i]);
-	ut_a(ret);
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Renaming log file %s to %s", logfile0, logfilename);
 
-	if (i == 0) {
-		/* Create in memory the file space object
-		which is for this log group */
+	mutex_enter(&log_sys->mutex);
+	ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
+	ibool success = os_file_rename(
+		innodb_file_log_key, logfile0, logfilename);
+	ut_a(success);
 
-		fil_space_create(name,
-				 2 * k + SRV_LOG_SPACE_FIRST_ID,
-				 fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
-				 FIL_LOG);
-	}
+	RECOVERY_CRASH(10);
 
-	ut_a(fil_validate());
+	/* Replace the first file with ib_logfile0. */
+	strcpy(logfile0, logfilename);
+	mutex_exit(&log_sys->mutex);
 
-	/* srv_log_file_size is measured in pages; if page size is 16KB,
-	then we have a limit of 64TB on 32 bit systems */
-	ut_a(srv_log_file_size <= ULINT_MAX);
+	fil_open_log_and_system_tablespace_files();
 
-	fil_node_create(name, (ulint) srv_log_file_size,
-			2 * k + SRV_LOG_SPACE_FIRST_ID, FALSE);
-#ifdef UNIV_LOG_ARCHIVE
-	/* If this is the first log group, create the file space object
-	for archived logs.
-	Under MySQL, no archiving ever done. */
+	ib_logf(IB_LOG_LEVEL_WARN, "New log files created, LSN=" LSN_PF, lsn);
+}
 
-	if (k == 0 && i == 0) {
-		arch_space_id = 2 * k + 1 + SRV_LOG_SPACE_FIRST_ID;
+/*********************************************************************//**
+Opens a log file.
+@return	DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+open_log_file(
+/*==========*/
+	os_file_t*	file,	/*!< out: file handle */
+	const char*	name,	/*!< in: log file name */
+	os_offset_t*	size)	/*!< out: file size */
+{
+	ibool	ret;
 
-		fil_space_create("arch_log_space", arch_space_id, 0, FIL_LOG);
-	} else {
-		arch_space_id = ULINT_UNDEFINED;
-	}
-#endif /* UNIV_LOG_ARCHIVE */
-	if (i == 0) {
-		log_group_init(k, srv_n_log_files,
-			       srv_log_file_size * UNIV_PAGE_SIZE,
-			       2 * k + SRV_LOG_SPACE_FIRST_ID,
-			       SRV_LOG_SPACE_FIRST_ID + 1); /* dummy arch
-							    space id */
+	*file = os_file_create(innodb_file_log_key, name,
+			       OS_FILE_OPEN, OS_FILE_AIO,
+			       OS_LOG_FILE, &ret);
+	if (!ret) {
+		ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
+		return(DB_ERROR);
 	}
 
+	*size = os_file_get_size(*file);
+
+	ret = os_file_close(*file);
+	ut_a(ret);
 	return(DB_SUCCESS);
 }
 
 /*********************************************************************//**
 Creates or opens database data files and closes them.
 @return	DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 open_or_create_data_files(
 /*======================*/
 	ibool*		create_new_db,	/*!< out: TRUE if new database should be
@@ -718,13 +756,16 @@ open_or_create_data_files(
 	ibool		one_created	= FALSE;
 	os_offset_t	size;
 	ulint		flags;
+	ulint		space;
 	ulint		rounded_size_pages;
 	char		name[10000];
 
 	if (srv_n_data_files >= 1000) {
-		fprintf(stderr, "InnoDB: can only have < 1000 data files\n"
-			"InnoDB: you have defined %lu\n",
-			(ulong) srv_n_data_files);
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Can only have < 1000 data files, you have "
+			"defined %lu", (ulong) srv_n_data_files);
+
 		return(DB_ERROR);
 	}
 
@@ -742,7 +783,9 @@ open_or_create_data_files(
 
 		ut_a(dirnamelen + strlen(srv_data_file_names[i])
 		     < (sizeof name) - 1);
+
 		memcpy(name, srv_data_home, dirnamelen);
+
 		/* Add a path separator if needed. */
 		if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
 			name[dirnamelen++] = SRV_PATH_SEPARATOR;
@@ -750,46 +793,67 @@ open_or_create_data_files(
 
 		strcpy(name + dirnamelen, srv_data_file_names[i]);
 
-		if (srv_data_file_is_raw_partition[i] == 0) {
+		/* Note: It will return true if the file doesn' exist. */
+
+		if (!srv_file_check_mode(name)) {
+
+			return(DB_FAIL);
+
+		} else if (srv_data_file_is_raw_partition[i] == 0) {
 
 			/* First we try to create the file: if it already
 			exists, ret will get value FALSE */
 
-			files[i] = os_file_create(innodb_file_data_key,
-						  name, OS_FILE_CREATE,
-						  OS_FILE_NORMAL,
-						  OS_DATA_FILE, &ret);
+			files[i] = os_file_create(
+				innodb_file_data_key, name, OS_FILE_CREATE,
+				OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+
+			if (srv_read_only_mode) {
+
+				if (ret) {
+					goto size_check;
+				}
 
-			if (ret == FALSE && os_file_get_last_error(FALSE)
-			    != OS_FILE_ALREADY_EXISTS
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Opening %s failed!", name);
+
+				return(DB_ERROR);
+
+			} else if (!ret
+				   && os_file_get_last_error(false)
+				   != OS_FILE_ALREADY_EXISTS
 #ifdef UNIV_AIX
-			    /* AIX 5.1 after security patch ML7 may have
-			    errno set to 0 here, which causes our function
-			    to return 100; work around that AIX problem */
-			    && os_file_get_last_error(FALSE) != 100
-#endif
+			    	   /* AIX 5.1 after security patch ML7 may have
+			           errno set to 0 here, which causes our
+				   function to return 100; work around that
+				   AIX problem */
+				   && os_file_get_last_error(false) != 100
+#endif /* UNIV_AIX */
 			    ) {
-				fprintf(stderr,
-					"InnoDB: Error in creating"
-					" or opening %s\n",
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Creating or opening %s failed!",
 					name);
 
 				return(DB_ERROR);
 			}
+
 		} else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
+
+			ut_a(!srv_read_only_mode);
+
 			/* The partition is opened, not created; then it is
 			written over */
 
 			srv_start_raw_disk_in_use = TRUE;
 			srv_created_new_raw = TRUE;
 
-			files[i] = os_file_create(innodb_file_data_key,
-						  name, OS_FILE_OPEN_RAW,
-						  OS_FILE_NORMAL,
-						  OS_DATA_FILE, &ret);
+			files[i] = os_file_create(
+				innodb_file_data_key, name, OS_FILE_OPEN_RAW,
+				OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+
 			if (!ret) {
-				fprintf(stderr,
-					"InnoDB: Error in opening %s\n", name);
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Error in opening %s", name);
 
 				return(DB_ERROR);
 			}
@@ -805,17 +869,15 @@ open_or_create_data_files(
 			/* We open the data file */
 
 			if (one_created) {
-				fprintf(stderr,
-					"InnoDB: Error: data files can only"
-					" be added at the end\n");
-				fprintf(stderr,
-					"InnoDB: of a tablespace, but"
-					" data file %s existed beforehand.\n",
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Data files can only be added at "
+					"the end of a tablespace, but "
+					"data file %s existed beforehand.",
 					name);
 				return(DB_ERROR);
 			}
-
 			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
+				ut_a(!srv_read_only_mode);
 				files[i] = os_file_create(
 					innodb_file_data_key,
 					name, OS_FILE_OPEN_RAW,
@@ -833,9 +895,11 @@ open_or_create_data_files(
 			}
 
 			if (!ret) {
-				fprintf(stderr,
-					"InnoDB: Error in opening %s\n", name);
-				os_file_get_last_error(TRUE);
+
+				os_file_get_last_error(true);
+
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Can't open '%s'", name);
 
 				return(DB_ERROR);
 			}
@@ -845,6 +909,7 @@ open_or_create_data_files(
 				goto skip_size_check;
 			}
 
+size_check:
 			size = os_file_get_size(files[i]);
 			ut_a(size != (os_offset_t) -1);
 			/* Round size downward to megabytes */
@@ -860,16 +925,16 @@ open_or_create_data_files(
 					&& srv_last_file_size_max
 					< rounded_size_pages)) {
 
-					fprintf(stderr,
-						"InnoDB: Error: auto-extending"
-						" data file %s is"
-						" of a different size\n"
-						"InnoDB: %lu pages (rounded"
-						" down to MB) than specified"
-						" in the .cnf file:\n"
-						"InnoDB: initial %lu pages,"
-						" max %lu (relevant if"
-						" non-zero) pages!\n",
+					ib_logf(IB_LOG_LEVEL_ERROR,
+						"auto-extending "
+						"data file %s is "
+						"of a different size "
+						"%lu pages (rounded "
+						"down to MB) than specified "
+						"in the .cnf file: "
+						"initial %lu pages, "
+						"max %lu (relevant if "
+						"non-zero) pages!",
 						name,
 						(ulong) rounded_size_pages,
 						(ulong) srv_data_file_sizes[i],
@@ -884,13 +949,11 @@ open_or_create_data_files(
 
 			if (rounded_size_pages != srv_data_file_sizes[i]) {
 
-				fprintf(stderr,
-					"InnoDB: Error: data file %s"
-					" is of a different size\n"
-					"InnoDB: %lu pages"
-					" (rounded down to MB)\n"
-					"InnoDB: than specified"
-					" in the .cnf file %lu pages!\n",
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Data file %s is of a different "
+					"size %lu pages (rounded down to MB) "
+					"than specified in the .cnf file "
+					"%lu pages!",
 					name,
 					(ulong) rounded_size_pages,
 					(ulong) srv_data_file_sizes[i]);
@@ -899,63 +962,65 @@ open_or_create_data_files(
 			}
 skip_size_check:
 			fil_read_first_page(
-				files[i], one_opened, &flags,
+				files[i], one_opened, &flags, &space,
 #ifdef UNIV_LOG_ARCHIVE
 				min_arch_log_no, max_arch_log_no,
 #endif /* UNIV_LOG_ARCHIVE */
 				min_flushed_lsn, max_flushed_lsn);
 
+			/* The first file of the system tablespace must
+			have space ID = TRX_SYS_SPACE.  The FSP_SPACE_ID
+			field in files greater than ibdata1 are unreliable. */
+			ut_a(one_opened || space == TRX_SYS_SPACE);
+
+			/* Check the flags for the first system tablespace
+			file only. */
 			if (!one_opened
 			    && UNIV_PAGE_SIZE
 			       != fsp_flags_get_page_size(flags)) {
 
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					" InnoDB: Error: data file %s"
-					" uses page size %lu,\n",
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Data file \"%s\" uses page size %lu,"
+					"but the start-up parameter "
+					"is --innodb-page-size=%lu",
 					name,
-					fsp_flags_get_page_size(flags));
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					" InnoDB: but the start-up parameter"
-					" is innodb-page-size=%lu\n",
+					fsp_flags_get_page_size(flags),
 					UNIV_PAGE_SIZE);
 
 				return(DB_ERROR);
 			}
 
 			one_opened = TRUE;
-		} else {
+		} else if (!srv_read_only_mode) {
 			/* We created the data file and now write it full of
 			zeros */
 
 			one_created = TRUE;
 
 			if (i > 0) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					" InnoDB: Data file %s did not"
-					" exist: new to be created\n",
+				ib_logf(IB_LOG_LEVEL_INFO,
+					"Data file %s did not"
+					" exist: new to be created",
 					name);
 			} else {
-				fprintf(stderr,
-					"InnoDB: The first specified"
-					" data file %s did not exist:\n"
-					"InnoDB: a new database"
-					" to be created!\n", name);
+				ib_logf(IB_LOG_LEVEL_INFO,
+					"The first specified "
+					"data file %s did not exist: "
+					"a new database to be created!",
+					name);
+
 				*create_new_db = TRUE;
 			}
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Setting file %s size to %lu MB\n",
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Setting file %s size to %lu MB",
 				name,
 				(ulong) (srv_data_file_sizes[i]
 					 >> (20 - UNIV_PAGE_SIZE_SHIFT)));
 
-			fprintf(stderr,
-				"InnoDB: Database physically writes the"
-				" file full: wait...\n");
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Database physically writes the"
+				" file full: wait...");
 
 			ret = os_file_set_size(
 				name, files[i],
@@ -963,9 +1028,10 @@ skip_size_check:
 				<< UNIV_PAGE_SIZE_SHIFT);
 
 			if (!ret) {
-				fprintf(stderr,
-					"InnoDB: Error in creating %s:"
-					" probably out of disk space\n", name);
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Error in creating %s: "
+					"probably out of disk space",
+					name);
 
 				return(DB_ERROR);
 			}
@@ -983,8 +1049,10 @@ skip_size_check:
 
 		ut_a(fil_validate());
 
-		fil_node_create(name, srv_data_file_sizes[i], 0,
-				srv_data_file_is_raw_partition[i] != 0);
+		if (!fil_node_create(name, srv_data_file_sizes[i], 0,
+				     srv_data_file_is_raw_partition[i] != 0)) {
+			return(DB_ERROR);
+		}
 	}
 
 	return(DB_SUCCESS);
@@ -994,7 +1062,7 @@ skip_size_check:
 Create undo tablespace.
 @return	DB_SUCCESS or error code */
 static
-enum db_err
+dberr_t
 srv_undo_tablespace_create(
 /*=======================*/
 	const char*	name,		/*!< in: tablespace name */
@@ -1002,48 +1070,55 @@ srv_undo_tablespace_create(
 {
 	os_file_t	fh;
 	ibool		ret;
-	enum db_err	err = DB_SUCCESS;
+	dberr_t		err = DB_SUCCESS;
 
 	os_file_create_subdirs_if_needed(name);
 
 	fh = os_file_create(
-		innodb_file_data_key, name, OS_FILE_CREATE,
+		innodb_file_data_key,
+		name,
+		srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
 		OS_FILE_NORMAL, OS_DATA_FILE, &ret);
 
-	if (ret == FALSE
-	    && os_file_get_last_error(FALSE) != OS_FILE_ALREADY_EXISTS
+	if (srv_read_only_mode && ret) {
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"%s opened in read-only mode", name);
+	} else if (ret == FALSE
+		   && os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
 #ifdef UNIV_AIX
-	    /* AIX 5.1 after security patch ML7 may have
-	    errno set to 0 here, which causes our function
-	    to return 100; work around that AIX problem */
-	    && os_file_get_last_error(FALSE) != 100
-#endif
+		/* AIX 5.1 after security patch ML7 may have
+		errno set to 0 here, which causes our function
+		to return 100; work around that AIX problem */
+		   && os_file_get_last_error(false) != 100
+#endif /* UNIV_AIX */
 		) {
 
-		fprintf(stderr, "InnoDB: Error in creating %s\n", name);
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Can't create UNDO tablespace %s", name);
 
 		err = DB_ERROR;
 	} else {
+		ut_a(!srv_read_only_mode);
+
 		/* We created the data file and now write it full of zeros */
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Data file %s did not"
-				" exist: new to be created\n", name);
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"Data file %s did not exist: new to be created",
+			name);
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Setting file %s size to %lu MB\n",
-				name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"Setting file %s size to %lu MB",
+			name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Database physically writes the"
-				" file full: wait...\n");
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"Database physically writes the file full: wait...");
 
 		ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT);
 
 		if (!ret) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: Error in creating %s:"
-					" probably out of disk space\n", name);
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Error in creating %s: probably out of "
+				"disk space", name);
 
 			err = DB_ERROR;
 		}
@@ -1058,17 +1133,25 @@ srv_undo_tablespace_create(
 Open an undo tablespace.
 @return	DB_SUCCESS or error code */
 static
-enum db_err
+dberr_t
 srv_undo_tablespace_open(
 /*=====================*/
 	const char*	name,		/*!< in: tablespace name */
 	ulint		space)		/*!< in: tablespace id */
 {
 	os_file_t	fh;
-	enum db_err	err;
+	dberr_t		err	= DB_ERROR;
 	ibool		ret;
 	ulint		flags;
 
+	if (!srv_file_check_mode(name)) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"UNDO tablespaces must be %s!",
+			srv_read_only_mode ? "writable" : "readable");
+
+		return(DB_ERROR);
+	}
+
 	fh = os_file_create(
 		innodb_file_data_key, name,
 		OS_FILE_OPEN_RETRY
@@ -1082,7 +1165,6 @@ srv_undo_tablespace_open(
 
 	if (ret) {
 		os_offset_t	size;
-		os_offset_t	n_pages;
 
 		size = os_file_get_size(fh);
 		ut_a(size != (os_offset_t) -1);
@@ -1105,17 +1187,15 @@ srv_undo_tablespace_open(
 
 		ut_a(fil_validate());
 
-		n_pages = size / UNIV_PAGE_SIZE;
+		os_offset_t	n_pages = size / UNIV_PAGE_SIZE;
 
 		/* On 64 bit Windows ulint can be 32 bit and os_offset_t
 		is 64 bit. It is OK to cast the n_pages to ulint because
 		the unit has been scaled to pages and they are always
 		32 bit. */
-		fil_node_create(name, (ulint) n_pages, space, FALSE);
-
-		err = DB_SUCCESS;
-	} else {
-		err = DB_ERROR;
+		if (fil_node_create(name, (ulint) n_pages, space, FALSE)) {
+			err = DB_SUCCESS;
+		}
 	}
 
 	return(err);
@@ -1125,20 +1205,25 @@ srv_undo_tablespace_open(
 Opens the configured number of undo tablespaces.
 @return	DB_SUCCESS or error code */
 static
-enum db_err
+dberr_t
 srv_undo_tablespaces_init(
 /*======================*/
 	ibool		create_new_db,		/*!< in: TRUE if new db being
 						created */
-	const ulint	n_conf_tablespaces)	/*!< in: configured undo
+	const ulint	n_conf_tablespaces,	/*!< in: configured undo
 						tablespaces */
+	ulint*		n_opened)		/*!< out: number of UNDO
+						tablespaces successfully
+						discovered and opened */
 {
 	ulint		i;
-	enum db_err	err = DB_SUCCESS;
+	dberr_t		err = DB_SUCCESS;
 	ulint		prev_space_id = 0;
 	ulint		n_undo_tablespaces;
 	ulint		undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
 
+	*n_opened = 0;
+
 	ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS);
 
 	memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids));
@@ -1164,10 +1249,10 @@ srv_undo_tablespaces_init(
 			name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
 
 		if (err != DB_SUCCESS) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Could not create "
-				"undo tablespace '%s'.\n", name);
+
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Could not create undo tablespace '%s'.",
+				name);
 
 			return(err);
 		}
@@ -1217,15 +1302,16 @@ srv_undo_tablespaces_init(
 		err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]);
 
 		if (err != DB_SUCCESS) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Error opening undo "
-				"tablespace %s.\n", name);
+
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"Unable to open undo tablespace '%s'.", name);
 
 			return(err);
 		}
 
 		prev_space_id = undo_tablespace_ids[i];
+
+		++*n_opened;
 	}
 
 	/* Open any extra unused undo tablespaces. These must be contiguous.
@@ -1248,6 +1334,8 @@ srv_undo_tablespaces_init(
 		}
 
 		++n_undo_tablespaces;
+
+		++*n_opened;
 	}
 
 	/* If the user says that there are fewer than what we find we
@@ -1275,13 +1363,17 @@ srv_undo_tablespaces_init(
 			"value is %lu\n", n_undo_tablespaces);
 
 		return(err != DB_SUCCESS ? err : DB_ERROR);
-	}
 
-	if (n_undo_tablespaces > 0) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Opened %lu undo tablespaces\n",
-			n_conf_tablespaces);
+	} else  if (n_undo_tablespaces > 0) {
+
+		ib_logf(IB_LOG_LEVEL_INFO, "Opened %lu undo tablespaces",
+			n_undo_tablespaces);
+
+		if (n_conf_tablespaces == 0) {
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Using the system tablespace for all UNDO "
+				"logging because innodb_undo_tablespaces=0");
+		}
 	}
 
 	if (create_new_db) {
@@ -1303,18 +1395,51 @@ srv_undo_tablespaces_init(
 }
 
 /********************************************************************
+Wait for the purge thread(s) to start up. */
+static
+void
+srv_start_wait_for_purge_to_start()
+/*===============================*/
+{
+	/* Wait for the purge coordinator and master thread to startup. */
+
+	purge_state_t	state = trx_purge_state();
+
+	ut_a(state != PURGE_STATE_DISABLED);
+
+	while (srv_shutdown_state == SRV_SHUTDOWN_NONE
+	       && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
+	       && state == PURGE_STATE_INIT) {
+
+		switch (state = trx_purge_state()) {
+		case PURGE_STATE_RUN:
+		case PURGE_STATE_STOP:
+			break;
+
+		case PURGE_STATE_INIT:
+			ib_logf(IB_LOG_LEVEL_INFO,
+				"Waiting for purge to start");
+
+			os_thread_sleep(50000);
+			break;
+
+		case PURGE_STATE_EXIT:
+		case PURGE_STATE_DISABLED:
+			ut_error;
+		}
+	}
+}
+
+/********************************************************************
 Starts InnoDB and creates a new database if database files
 are not found and the user wants.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-int
+dberr_t
 innobase_start_or_create_for_mysql(void)
 /*====================================*/
 {
 	ibool		create_new_db;
-	ibool		log_file_created;
-	ibool		log_created	= FALSE;
-	ibool		log_opened	= FALSE;
 	lsn_t		min_flushed_lsn;
 	lsn_t		max_flushed_lsn;
 #ifdef UNIV_LOG_ARCHIVE
@@ -1324,11 +1449,19 @@ innobase_start_or_create_for_mysql(void)
 	ulint		sum_of_new_sizes;
 	ulint		sum_of_data_file_sizes;
 	ulint		tablespace_size_in_header;
-	ulint		err;
-	ulint		i;
+	dberr_t		err;
+	unsigned	i;
+	ulint		srv_n_log_files_found = srv_n_log_files;
 	ulint		io_limit;
 	mtr_t		mtr;
 	ib_bh_t*	ib_bh;
+	char		logfilename[10000];
+	char*		logfile0	= NULL;
+	size_t		dirnamelen;
+
+	if (srv_read_only_mode) {
+		ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode");
+	}
 
 #ifdef HAVE_DARWIN_THREADS
 # ifdef F_FULLFSYNC
@@ -1422,31 +1555,34 @@ innobase_start_or_create_for_mysql(void)
 		" InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
 #endif
 
-	if (UNIV_LIKELY(srv_use_sys_malloc)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: The InnoDB memory heap is disabled\n");
+	if (srv_use_sys_malloc) {
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"The InnoDB memory heap is disabled");
 	}
 
 #if defined(COMPILER_HINTS_ENABLED)
-	ut_print_timestamp(stderr);
-	fprintf(stderr, " InnoDB: Compiler hints enabled.\n");
+	ib_logf(IB_LOG_LEVEL_INFO,
+		" InnoDB: Compiler hints enabled.");
 #endif /* defined(COMPILER_HINTS_ENABLED) */
 
-	ut_print_timestamp(stderr);
-	fputs(" InnoDB: " IB_ATOMICS_STARTUP_MSG "\n", stderr);
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"" IB_ATOMICS_STARTUP_MSG "");
 
-	ut_print_timestamp(stderr);
-	fputs(" InnoDB: Compressed tables use zlib " ZLIB_VERSION
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Compressed tables use zlib " ZLIB_VERSION
 #ifdef UNIV_ZIP_DEBUG
 	      " with validation"
 #endif /* UNIV_ZIP_DEBUG */
-	      "\n" , stderr);
+	      );
 #ifdef UNIV_ZIP_COPY
-	ut_print_timestamp(stderr);
-	fputs(" InnoDB: and extra copying\n", stderr);
+	ib_logf(IB_LOG_LEVEL_INFO, "and extra copying");
 #endif /* UNIV_ZIP_COPY */
 
+
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"CPU %s crc32 instructions",
+		ut_crc32_sse2_enabled ? "supports" : "does not support");
+
 	/* Since InnoDB does not currently clean up all its internal data
 	structures in MySQL Embedded Server Library server_end(), we
 	print an error message if someone tries to start up InnoDB a
@@ -1505,17 +1641,14 @@ innobase_start_or_create_for_mysql(void)
 #elif defined(LINUX_NATIVE_AIO)
 
 	if (srv_use_native_aio) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Using Linux native AIO\n");
+		ib_logf(IB_LOG_LEVEL_INFO, "Using Linux native AIO");
 	}
 #else
 	/* Currently native AIO is supported only on windows and linux
 	and that also when the support is compiled in. In all other
 	cases, we ignore the setting of innodb_use_native_aio. */
 	srv_use_native_aio = FALSE;
-
-#endif
+#endif /* __WIN__ */
 
 	if (srv_file_flush_method_str == NULL) {
 		/* These are the default options */
@@ -1533,6 +1666,9 @@ innobase_start_or_create_for_mysql(void)
 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
 		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
 
+	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
+		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC;
+
 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
 		srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
 
@@ -1550,12 +1686,10 @@ innobase_start_or_create_for_mysql(void)
 	} else if (0 == ut_strcmp(srv_file_flush_method_str,
 				  "async_unbuffered")) {
 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-#endif
+#endif /* __WIN__ */
 	} else {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Unrecognized value %s for"
-			" innodb_flush_method\n",
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Unrecognized value %s for innodb_flush_method",
 			srv_file_flush_method_str);
 		return(DB_ERROR);
 	}
@@ -1580,74 +1714,93 @@ innobase_start_or_create_for_mysql(void)
 		srv_max_n_threads = 10000;
 	} else {
 		srv_buf_pool_instances = 1;
-		srv_max_n_threads = 1000;	/* saves several MB of memory,
-						especially in 64-bit
-						computers */
+
+		/* Saves several MB of memory, especially in
+		64-bit computers */
+
+		srv_max_n_threads = 1000;
 	}
 
-	err = srv_boot();
+	srv_boot();
 
-	if (err != DB_SUCCESS) {
+	if (!srv_read_only_mode) {
 
-		return((int) err);
-	}
+		mutex_create(srv_monitor_file_mutex_key,
+			     &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
 
-	mutex_create(srv_monitor_file_mutex_key,
-		     &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
+		if (srv_innodb_status) {
 
-	if (srv_innodb_status) {
+			srv_monitor_file_name = static_cast<char*>(
+				mem_alloc(
+					strlen(fil_path_to_mysql_datadir)
+					+ 20 + sizeof "/innodb_status."));
 
-		srv_monitor_file_name = static_cast<char*>(
-			mem_alloc(
-				strlen(fil_path_to_mysql_datadir)
-				+ 20 + sizeof "/innodb_status."));
+			sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
+				fil_path_to_mysql_datadir,
+				os_proc_get_number());
 
-		sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
-			fil_path_to_mysql_datadir, os_proc_get_number());
-		srv_monitor_file = fopen(srv_monitor_file_name, "w+");
-		if (!srv_monitor_file) {
-			fprintf(stderr, "InnoDB: unable to create %s: %s\n",
-				srv_monitor_file_name, strerror(errno));
-			return(DB_ERROR);
+			srv_monitor_file = fopen(srv_monitor_file_name, "w+");
+
+			if (!srv_monitor_file) {
+
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Unable to create %s: %s",
+					srv_monitor_file_name,
+					strerror(errno));
+
+				return(DB_ERROR);
+			}
+		} else {
+			srv_monitor_file_name = NULL;
+			srv_monitor_file = os_file_create_tmpfile();
+
+			if (!srv_monitor_file) {
+				return(DB_ERROR);
+			}
 		}
-	} else {
-		srv_monitor_file_name = NULL;
-		srv_monitor_file = os_file_create_tmpfile();
-		if (!srv_monitor_file) {
+
+		mutex_create(srv_dict_tmpfile_mutex_key,
+			     &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
+
+		srv_dict_tmpfile = os_file_create_tmpfile();
+
+		if (!srv_dict_tmpfile) {
 			return(DB_ERROR);
 		}
-	}
 
-	mutex_create(srv_dict_tmpfile_mutex_key,
-		     &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
+		mutex_create(srv_misc_tmpfile_mutex_key,
+			     &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
 
-	srv_dict_tmpfile = os_file_create_tmpfile();
-	if (!srv_dict_tmpfile) {
-		return(DB_ERROR);
-	}
+		srv_misc_tmpfile = os_file_create_tmpfile();
 
-	mutex_create(srv_misc_tmpfile_mutex_key,
-		     &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
-
-	srv_misc_tmpfile = os_file_create_tmpfile();
-	if (!srv_misc_tmpfile) {
-		return(DB_ERROR);
+		if (!srv_misc_tmpfile) {
+			return(DB_ERROR);
+		}
 	}
 
 	/* If user has set the value of innodb_file_io_threads then
 	we'll emit a message telling the user that this parameter
 	is now deprecated. */
 	if (srv_n_file_io_threads != 4) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Warning:"
-			" innodb_file_io_threads is deprecated."
-			" Please use innodb_read_io_threads and"
-			" innodb_write_io_threads instead\n");
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"innodb_file_io_threads is deprecated. Please use "
+			"innodb_read_io_threads and innodb_write_io_threads "
+			"instead");
 	}
 
 	/* Now overwrite the value on srv_n_file_io_threads */
-	srv_n_file_io_threads = 2 + srv_n_read_io_threads
-				+ srv_n_write_io_threads;
+	srv_n_file_io_threads = srv_n_read_io_threads;
+
+	if (!srv_read_only_mode) {
+		/* Add the log and ibuf IO threads. */
+		srv_n_file_io_threads += 2;
+		srv_n_file_io_threads += srv_n_write_io_threads;
+	} else {
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"Disabling background IO write threads.");
+
+		srv_n_write_io_threads = 0;
+	}
 
 	ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
 
@@ -1662,56 +1815,59 @@ innobase_start_or_create_for_mysql(void)
 	}
 # endif /* __WIN__ */
 
-	os_aio_init(io_limit,
-		    srv_n_read_io_threads,
-		    srv_n_write_io_threads,
-		    SRV_MAX_N_PENDING_SYNC_IOS);
+	if (!os_aio_init(io_limit,
+			 srv_n_read_io_threads,
+			 srv_n_write_io_threads,
+			 SRV_MAX_N_PENDING_SYNC_IOS)) {
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Fatal : Cannot initialize AIO sub-system");
+
+		return(DB_ERROR);
+	}
 
 	fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
 
-	/* Print time to initialize the buffer pool */
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		" InnoDB: Initializing buffer pool, size =");
+	double	size;
+	char	unit;
 
 	if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
-		fprintf(stderr,
-			" %.1fG\n",
-			((double) srv_buf_pool_size) / (1024 * 1024 * 1024));
+		size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024);
+		unit = 'G';
 	} else {
-		fprintf(stderr,
-			" %.1fM\n",
-			((double) srv_buf_pool_size) / (1024 * 1024));
+		size = ((double) srv_buf_pool_size) / (1024 * 1024);
+		unit = 'M';
 	}
 
-	err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances);
+	/* Print time to initialize the buffer pool */
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Initializing buffer pool, size = %.1f%c", size, unit);
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		" InnoDB: Completed initialization of buffer pool\n");
+	err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances);
 
 	if (err != DB_SUCCESS) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Fatal error: cannot allocate memory"
-			" for the buffer pool\n");
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Cannot allocate memory for the buffer pool");
 
 		return(DB_ERROR);
 	}
 
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Completed initialization of buffer pool");
+
 #ifdef UNIV_DEBUG
 	/* We have observed deadlocks with a 5MB buffer pool but
 	the actual lower limit could very well be a little higher. */
 
 	if (srv_buf_pool_size <= 5 * 1024 * 1024) {
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Warning: Small buffer pool size "
-			"(%luM), the flst_validate() debug function "
-			"can cause a deadlock if the buffer pool fills up.\n",
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"Small buffer pool size (%luM), the flst_validate() "
+			"debug function can cause a deadlock if the "
+			"buffer pool fills up.",
 			srv_buf_pool_size / 1024 / 1024);
 	}
-#endif
+#endif /* UNIV_DEBUG */
 
 	fsp_init();
 	log_init();
@@ -1720,14 +1876,15 @@ innobase_start_or_create_for_mysql(void)
 
 	/* Create i/o-handler threads: */
 
-	for (i = 0; i < srv_n_file_io_threads; i++) {
+	for (ulint i = 0; i < srv_n_file_io_threads; ++i) {
+
 		n[i] = i;
 
 		os_thread_create(io_handler_thread, n + i, thread_ids + i);
 	}
 
 #ifdef UNIV_LOG_ARCHIVE
-	if (0 != ut_strcmp(srv_log_group_home_dirs[0], srv_arch_dir)) {
+	if (0 != ut_strcmp(srv_log_group_home_dir, srv_arch_dir)) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr, " InnoDB: Error: you must set the log group home dir in my.cnf\n");
 		ut_print_timestamp(stderr);
@@ -1738,16 +1895,14 @@ innobase_start_or_create_for_mysql(void)
 #endif /* UNIV_LOG_ARCHIVE */
 
 	if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE
-	    >= 549755813888ULL /* 512G */) {
+	    >= 512ULL * 1024ULL * 1024ULL * 1024ULL) {
 		/* log_block_convert_lsn_to_no() limits the returned block
 		number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512
 		bytes, then we have a limit of 512 GB. If that limit is to
 		be raised, then log_block_convert_lsn_to_no() must be
 		modified. */
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Error: combined size of log files"
-			" must be < 512 GB\n");
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Combined size of log files must be < 512 GB");
 
 		return(DB_ERROR);
 	}
@@ -1759,7 +1914,6 @@ innobase_start_or_create_for_mysql(void)
 		So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This
 		means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which
 		is 64 TB on 32 bit systems. */
-		ut_print_timestamp(stderr);
 		fprintf(stderr,
 			" InnoDB: Error: combined size of log files"
 			" must be < %lu GB\n",
@@ -1791,10 +1945,8 @@ innobase_start_or_create_for_mysql(void)
 	}
 
 	if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Error: tablespace size must be"
-			" at least 10 MB\n");
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Tablespace size must be at least 10 MB");
 
 		return(DB_ERROR);
 	}
@@ -1805,36 +1957,27 @@ innobase_start_or_create_for_mysql(void)
 #endif /* UNIV_LOG_ARCHIVE */
 					&min_flushed_lsn, &max_flushed_lsn,
 					&sum_of_new_sizes);
-	if (err != DB_SUCCESS) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Could not open or create data files.\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: If you tried to add new data files,"
-			" and it failed here,\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: you should now edit innodb_data_file_path"
-			" in my.cnf back\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: to what it was, and remove the"
-			" new ibdata files InnoDB created\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: in this failed attempt. InnoDB only wrote"
-			" those files full of\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: zeros, but did not yet use them in any way."
-			" But be careful: do not\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: remove old data files"
-			" which contain your precious data!\n");
+	if (err == DB_FAIL) {
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"The system tablespace must be writable!");
+
+		return(DB_ERROR);
 
-		return((int) err);
+	} else if (err != DB_SUCCESS) {
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Could not open or create the system tablespace. If "
+			"you tried to add new data files to the system "
+			"tablespace, and it failed here, you should now "
+			"edit innodb_data_file_path in my.cnf back to what "
+			"it was, and remove the new ibdata files InnoDB "
+			"created in this failed attempt. InnoDB only wrote "
+			"those files full of zeros, but did not yet use "
+			"them in any way. But be careful: do not remove "
+			"old data files which contain your precious data!");
+
+		return(err);
 	}
 
 #ifdef UNIV_LOG_ARCHIVE
@@ -1842,125 +1985,199 @@ innobase_start_or_create_for_mysql(void)
 	srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
 #endif /* UNIV_LOG_ARCHIVE */
 
-	for (i = 0; i < srv_n_log_files; i++) {
-		err = open_or_create_log_file(create_new_db, &log_file_created,
-					      log_opened, 0, i);
-		if (err != DB_SUCCESS) {
+	dirnamelen = strlen(srv_log_group_home_dir);
+	ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
+	memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
 
-			return((int) err);
-		}
+	/* Add a path separator if needed. */
+	if (dirnamelen && logfilename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
+		logfilename[dirnamelen++] = SRV_PATH_SEPARATOR;
+	}
 
-		if (log_file_created) {
-			log_created = TRUE;
-		} else {
-			log_opened = TRUE;
+	srv_log_file_size_requested = srv_log_file_size;
+
+	if (create_new_db) {
+		bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
+		ut_a(success);
+
+		min_flushed_lsn = max_flushed_lsn = log_get_lsn();
+
+		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+
+		err = create_log_files(logfilename, dirnamelen,
+				       max_flushed_lsn, logfile0);
+
+		if (err != DB_SUCCESS) {
+			return(err);
 		}
-		if ((log_opened && create_new_db)
-		    || (log_opened && log_created)) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Error: all log files must be"
-				" created at the same time.\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: All log files must be"
-				" created also in database creation.\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: If you want bigger or smaller"
-				" log files, shut down the\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: database and make sure there"
-				" were no errors in shutdown.\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Then delete the existing log files."
-				" Edit the .cnf file\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: and start the database again.\n");
+	} else {
+		for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
+			os_offset_t	size;
+			os_file_stat_t	stat_info;
+
+			sprintf(logfilename + dirnamelen,
+				"ib_logfile%u", i);
+
+			err = os_file_get_status(
+				logfilename, &stat_info, false);
+
+			if (err == DB_NOT_FOUND) {
+				if (i == 0) {
+					if (max_flushed_lsn
+					    != min_flushed_lsn) {
+						ib_logf(IB_LOG_LEVEL_ERROR,
+							"Cannot create"
+							" log files because"
+							" data files are"
+							" corrupt or"
+							" not in sync"
+							" with each other");
+						return(DB_ERROR);
+					}
+
+					if (max_flushed_lsn < (lsn_t) 1000) {
+						ib_logf(IB_LOG_LEVEL_ERROR,
+							"Cannot create"
+							" log files because"
+							" data files are"
+							" corrupt or the"
+							" database was not"
+							" shut down cleanly"
+							" after creating"
+							" the data files.");
+						return(DB_ERROR);
+					}
+
+					err = create_log_files(
+						logfilename, dirnamelen,
+						max_flushed_lsn, logfile0);
+
+					if (err != DB_SUCCESS) {
+						return(err);
+					}
+
+					create_log_files_rename(
+						logfilename, dirnamelen,
+						max_flushed_lsn, logfile0);
+
+					/* Suppress the message about
+					crash recovery. */
+					max_flushed_lsn = min_flushed_lsn
+						= log_get_lsn();
+					goto files_checked;
+				} else if (i < 2) {
+					/* must have at least 2 log files */
+					ib_logf(IB_LOG_LEVEL_ERROR,
+						"Only one log file found.");
+					return(err);
+				}
 
-			return(DB_ERROR);
+				/* opened all files */
+				break;
+			}
+
+			if (!srv_file_check_mode(logfilename)) {
+				return(DB_ERROR);
+			}
+
+			err = open_log_file(&files[i], logfilename, &size);
+
+			if (err != DB_SUCCESS) {
+				return(err);
+			}
+
+			ut_a(size != (os_offset_t) -1);
+
+			if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Log file %s size "
+					UINT64PF " is not a multiple of"
+					" innodb_page_size",
+					logfilename, size);
+				return(DB_ERROR);
+			}
+
+			size >>= UNIV_PAGE_SIZE_SHIFT;
+
+			if (i == 0) {
+				srv_log_file_size = size;
+			} else if (size != srv_log_file_size) {
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Log file %s is"
+					" of different size "UINT64PF" bytes"
+					" than other log"
+					" files "UINT64PF" bytes!",
+					logfilename,
+					size << UNIV_PAGE_SIZE_SHIFT,
+					(os_offset_t) srv_log_file_size
+					<< UNIV_PAGE_SIZE_SHIFT);
+				return(DB_ERROR);
+			}
 		}
-	}
 
-	/* Open all log files and data files in the system tablespace: we
-	keep them open until database shutdown */
+		srv_n_log_files_found = i;
 
-	fil_open_log_and_system_tablespace_files();
+		/* Create the in-memory file space objects. */
 
-	err = srv_undo_tablespaces_init(create_new_db, srv_undo_tablespaces);
+		sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
 
-	/* If the force recovery is set very high then we carry on regardless
-	of all errors. Basically this is fingers crossed mode. */
+		fil_space_create(logfilename,
+				 SRV_LOG_SPACE_FIRST_ID,
+				 fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
+				 FIL_LOG);
 
-	if (err != DB_SUCCESS
-	    && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
+		ut_a(fil_validate());
 
-		return((int) err);
-	}
+		/* srv_log_file_size is measured in pages; if page size is 16KB,
+		then we have a limit of 64TB on 32 bit systems */
+		ut_a(srv_log_file_size <= ULINT_MAX);
+
+		for (unsigned j = 0; j < i; j++) {
+			sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
+
+			if (!fil_node_create(logfilename,
+					     (ulint) srv_log_file_size,
+					     SRV_LOG_SPACE_FIRST_ID, FALSE)) {
+				return(DB_ERROR);
+			}
+		}
 
-	if (log_created && !create_new_db
-#ifdef UNIV_LOG_ARCHIVE
-	    && !srv_archive_recovery
-#endif /* UNIV_LOG_ARCHIVE */
-	    ) {
-		if (max_flushed_lsn != min_flushed_lsn
 #ifdef UNIV_LOG_ARCHIVE
-		    || max_arch_log_no != min_arch_log_no
+		/* Create the file space object for archived logs. Under
+		MySQL, no archiving ever done. */
+		fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
+				 0, FIL_LOG);
 #endif /* UNIV_LOG_ARCHIVE */
-		    ) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Cannot initialize created"
-				" log files because\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: data files were not in sync"
-				" with each other\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: or the data files are corrupt.\n");
+		log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
+			       SRV_LOG_SPACE_FIRST_ID,
+			       SRV_LOG_SPACE_FIRST_ID + 1);
+	}
 
-			return(DB_ERROR);
-		}
+files_checked:
+	/* Open all log files and data files in the system
+	tablespace: we keep them open until database
+	shutdown */
 
-		if (max_flushed_lsn < (lsn_t) 1000) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Cannot initialize created"
-				" log files because\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: data files are corrupt,"
-				" or new data files were\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: created when the database"
-				" was started previous\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: time but the database"
-				" was not shut down\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: normally after that.\n");
+	fil_open_log_and_system_tablespace_files();
 
-			return(DB_ERROR);
-		}
+	err = srv_undo_tablespaces_init(
+		create_new_db,
+		srv_undo_tablespaces,
+		&srv_undo_tablespaces_open);
 
-		mutex_enter(&(log_sys->mutex));
+	/* If the force recovery is set very high then we carry on regardless
+	of all errors. Basically this is fingers crossed mode. */
 
-#ifdef UNIV_LOG_ARCHIVE
-		/* Do not + 1 arch_log_no because we do not use log
-		archiving */
-		recv_reset_logs(max_flushed_lsn, max_arch_log_no, TRUE);
-#else
-		recv_reset_logs(max_flushed_lsn, TRUE);
-#endif /* UNIV_LOG_ARCHIVE */
+	if (err != DB_SUCCESS
+	    && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
 
-		mutex_exit(&(log_sys->mutex));
+		return(err);
+	}
+
+	/* Initialize objects used by dict stats gathering thread, which
+	can also be used by recovery if it tries to drop some table */
+	if (!srv_read_only_mode) {
+		dict_stats_thread_init();
 	}
 
 	trx_sys_file_format_init();
@@ -1968,6 +2185,9 @@ innobase_start_or_create_for_mysql(void)
 	trx_sys_create();
 
 	if (create_new_db) {
+
+		ut_a(!srv_read_only_mode);
+
 		mtr_start(&mtr);
 
 		fsp_header_init(0, sum_of_new_sizes, &mtr);
@@ -1987,16 +2207,34 @@ innobase_start_or_create_for_mysql(void)
 
 		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
 
-		dict_create();
+		err = dict_create();
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
 
 		srv_startup_is_before_trx_rollback_phase = FALSE;
 
+		bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
+		ut_a(success);
+
+		min_flushed_lsn = max_flushed_lsn = log_get_lsn();
+
+		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+
+		/* Stamp the LSN to the data files. */
+		fil_write_flushed_lsn_to_data_files(max_flushed_lsn, 0);
+
+		fil_flush_file_spaces(FIL_TABLESPACE);
+
+		create_log_files_rename(logfilename, dirnamelen,
+					max_flushed_lsn, logfile0);
 #ifdef UNIV_LOG_ARCHIVE
 	} else if (srv_archive_recovery) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Starting archive"
-			" recovery from a backup...\n");
+
+		ib_logf(IB_LOG_LEVEL_INFO,
+			" Starting archive recovery from a backup...");
+
 		err = recv_recovery_from_archive_start(
 			min_flushed_lsn, srv_archive_recovery_limit_lsn,
 			min_arch_log_no);
@@ -2007,7 +2245,11 @@ innobase_start_or_create_for_mysql(void)
 		/* Since ibuf init is in dict_boot, and ibuf is needed
 		in any disk i/o, first call dict_boot */
 
-		dict_boot();
+		err = dict_boot();
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
 
 		ib_bh = trx_sys_init_at_db_start();
 
@@ -2051,10 +2293,10 @@ innobase_start_or_create_for_mysql(void)
 		/* We always try to do a recovery, even if the database had
 		been shut down normally: this is the normal startup path */
 
-		err = recv_recovery_from_checkpoint_start(LOG_CHECKPOINT,
-							  IB_ULONGLONG_MAX,
-							  min_flushed_lsn,
-							  max_flushed_lsn);
+		err = recv_recovery_from_checkpoint_start(
+			LOG_CHECKPOINT, IB_ULONGLONG_MAX,
+			min_flushed_lsn, max_flushed_lsn);
+
 		if (err != DB_SUCCESS) {
 
 			return(DB_ERROR);
@@ -2066,7 +2308,11 @@ innobase_start_or_create_for_mysql(void)
 		to access space 0, and the insert buffer at this stage already
 		works for space 0. */
 
-		dict_boot();
+		err = dict_boot();
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
 
 		ib_bh = trx_sys_init_at_db_start();
 
@@ -2079,6 +2325,7 @@ innobase_start_or_create_for_mysql(void)
 		are initialized in trx_sys_init_at_db_start(). */
 
 		recv_recovery_from_checkpoint_finish();
+
 		if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
 			/* The following call is necessary for the insert
 			buffer to work with multiple tablespaces. We must
@@ -2100,6 +2347,90 @@ innobase_start_or_create_for_mysql(void)
 				recv_needed_recovery);
 		}
 
+		if (!srv_force_recovery
+		    && !recv_sys->found_corrupt_log
+		    && (srv_log_file_size_requested != srv_log_file_size
+			|| srv_n_log_files_found != srv_n_log_files)) {
+			/* Prepare to replace the redo log files. */
+
+			if (srv_read_only_mode) {
+				ib_logf(IB_LOG_LEVEL_ERROR,
+					"Cannot resize log files "
+					"in read-only mode.");
+				return(DB_READ_ONLY);
+			}
+
+			/* Clean the buffer pool. */
+			bool success = buf_flush_list(
+				ULINT_MAX, LSN_MAX, NULL);
+			ut_a(success);
+
+			RECOVERY_CRASH(1);
+
+			min_flushed_lsn = max_flushed_lsn = log_get_lsn();
+
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Resizing redo log from %u*%u to %u*%u pages"
+				", LSN=" LSN_PF,
+				(unsigned) i,
+				(unsigned) srv_log_file_size,
+				(unsigned) srv_n_log_files,
+				(unsigned) srv_log_file_size_requested,
+				max_flushed_lsn);
+
+			buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+
+			RECOVERY_CRASH(2);
+
+			/* Flush the old log files. */
+			log_buffer_flush_to_disk();
+			/* If innodb_flush_method=O_DSYNC,
+			we need to explicitly flush the log buffers. */
+			fil_flush(SRV_LOG_SPACE_FIRST_ID);
+
+			ut_ad(max_flushed_lsn == log_get_lsn());
+
+			/* Prohibit redo log writes from any other
+			threads until creating a log checkpoint at the
+			end of create_log_files(). */
+			ut_d(recv_no_log_write = TRUE);
+			ut_ad(!buf_pool_check_no_pending_io());
+
+			RECOVERY_CRASH(3);
+
+			/* Stamp the LSN to the data files. */
+			fil_write_flushed_lsn_to_data_files(
+				max_flushed_lsn, 0);
+
+			fil_flush_file_spaces(FIL_TABLESPACE);
+
+			RECOVERY_CRASH(4);
+
+			/* Close and free the redo log files, so that
+			we can replace them. */
+			fil_close_log_files(true);
+
+			RECOVERY_CRASH(5);
+
+			/* Free the old log file space. */
+			log_group_close_all();
+
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Starting to delete and rewrite log files.");
+
+			srv_log_file_size = srv_log_file_size_requested;
+
+			err = create_log_files(logfilename, dirnamelen,
+					       max_flushed_lsn, logfile0);
+
+			if (err != DB_SUCCESS) {
+				return(err);
+			}
+
+			create_log_files_rename(logfilename, dirnamelen,
+						max_flushed_lsn, logfile0);
+		}
+
 		srv_startup_is_before_trx_rollback_phase = FALSE;
 		recv_recovery_rollback_active();
 
@@ -2181,31 +2512,39 @@ innobase_start_or_create_for_mysql(void)
 
 	if (srv_available_undo_logs == ULINT_UNDEFINED) {
 		/* Can only happen if force recovery is set. */
-		ut_a(srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
+		ut_a(srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
+		     || srv_read_only_mode);
 		srv_undo_logs = ULONG_UNDEFINED;
 	}
 
-	/* Create the thread which watches the timeouts for lock waits */
-	os_thread_create(
-		lock_wait_timeout_thread,
-		NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
-
-	/* Create the thread which warns of long semaphore waits */
-	os_thread_create(
-		srv_error_monitor_thread,
-		NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS);
+	if (!srv_read_only_mode) {
+		/* Create the thread which watches the timeouts
+		for lock waits */
+		os_thread_create(
+			lock_wait_timeout_thread,
+			NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
 
-	/* Create the thread which prints InnoDB monitor info */
-	os_thread_create(
-		srv_monitor_thread,
-		NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
+		/* Create the thread which warns of long semaphore waits */
+		os_thread_create(
+			srv_error_monitor_thread,
+			NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS);
 
-	srv_is_being_started = FALSE;
+		/* Create the thread which prints InnoDB monitor info */
+		os_thread_create(
+			srv_monitor_thread,
+			NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
+	}
 
 	/* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
 	err = dict_create_or_check_foreign_constraint_tables();
 	if (err != DB_SUCCESS) {
-		return((int)DB_ERROR);
+		return(err);
+	}
+
+	/* Create the SYS_TABLESPACES system table */
+	err = dict_create_or_check_sys_tablespace();
+	if (err != DB_SUCCESS) {
+		return(err);
 	}
 
 	srv_is_being_started = FALSE;
@@ -2215,11 +2554,15 @@ innobase_start_or_create_for_mysql(void)
 	/* Create the master thread which does purge and other utility
 	operations */
 
-	os_thread_create(
-		srv_master_thread,
-		NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
+	if (!srv_read_only_mode) {
+
+		os_thread_create(
+			srv_master_thread,
+			NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
+	}
 
-	if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
+	if (!srv_read_only_mode
+	    && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
 
 		os_thread_create(
 			srv_purge_coordinator_thread,
@@ -2234,35 +2577,15 @@ innobase_start_or_create_for_mysql(void)
 				srv_worker_thread, NULL,
 				thread_ids + 5 + i + SRV_MAX_N_IO_THREADS);
 		}
-	}
-
-	os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
-
-	/* Wait for the purge coordinator and master thread to startup. */
-
-	purge_state_t	state = trx_purge_state();
 
-	while (srv_shutdown_state == SRV_SHUTDOWN_NONE
-	       && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
-	       && state == PURGE_STATE_INIT) {
-
-		switch (state = trx_purge_state()) {
-		case PURGE_STATE_RUN:
-		case PURGE_STATE_STOP:
-			break;
-
-		case PURGE_STATE_INIT:
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: "
-				"Waiting for the background threads to "
-				"start\n");
+		srv_start_wait_for_purge_to_start();
 
-			os_thread_sleep(50000);
-			break;
+	} else {
+		purge_sys->state = PURGE_STATE_DISABLED;
+	}
 
-		case PURGE_STATE_EXIT:
-			ut_error;
-		}
+	if (!srv_read_only_mode) {
+		os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
 	}
 
 #ifdef UNIV_DEBUG
@@ -2276,7 +2599,8 @@ innobase_start_or_create_for_mysql(void)
 
 	tablespace_size_in_header = fsp_header_get_tablespace_size();
 
-	if (!srv_auto_extend_last_data_file
+	if (!srv_read_only_mode
+	    && !srv_auto_extend_last_data_file
 	    && sum_of_data_file_sizes != tablespace_size_in_header) {
 
 		ut_print_timestamp(stderr);
@@ -2319,7 +2643,8 @@ innobase_start_or_create_for_mysql(void)
 		}
 	}
 
-	if (srv_auto_extend_last_data_file
+	if (!srv_read_only_mode
+	    && srv_auto_extend_last_data_file
 	    && sum_of_data_file_sizes < tablespace_size_in_header) {
 
 		ut_print_timestamp(stderr);
@@ -2383,23 +2708,17 @@ innobase_start_or_create_for_mysql(void)
 	os_fast_mutex_free(&srv_os_test_mutex);
 
 	if (srv_print_verbose_log) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: %s started; "
-			"log sequence number " LSN_PF "\n",
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"%s started; log sequence number " LSN_PF "",
 			INNODB_VERSION_STR, srv_start_lsn);
 	}
 
 	if (srv_force_recovery > 0) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: !!! innodb_force_recovery"
-			" is set to %lu !!!\n",
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"!!! innodb_force_recovery is set to %lu !!!",
 			(ulong) srv_force_recovery);
 	}
 
-	fflush(stderr);
-
 	if (srv_force_recovery == 0) {
 		/* In the insert buffer we may have even bigger tablespace
 		id's, because we may have dropped those tablespaces, but
@@ -2409,16 +2728,20 @@ innobase_start_or_create_for_mysql(void)
 		ibuf_update_max_tablespace_id();
 	}
 
-	/* Create the buffer pool dump/load thread */
-	os_thread_create(buf_dump_thread, NULL, NULL);
+	if (!srv_read_only_mode) {
+		/* Create the buffer pool dump/load thread */
+		os_thread_create(buf_dump_thread, NULL, NULL);
 
-	srv_was_started = TRUE;
+		/* Create the dict stats gathering thread */
+		os_thread_create(dict_stats_thread, NULL, NULL);
 
-	/* Create the thread that will optimize the FTS sub-system
-	in a separate background thread. */
-	fts_optimize_init();
+		/* Create the thread that will optimize the FTS sub-system. */
+		fts_optimize_init();
+	}
 
-	return((int) DB_SUCCESS);
+	srv_was_started = TRUE;
+
+	return(DB_SUCCESS);
 }
 
 #if 0
@@ -2455,27 +2778,28 @@ srv_fts_close(void)
 Shuts down the InnoDB database.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-int
+dberr_t
 innobase_shutdown_for_mysql(void)
 /*=============================*/
 {
 	ulint	i;
+
 	if (!srv_was_started) {
 		if (srv_is_being_started) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Warning: shutting down"
-				" a not properly started\n"
-				"InnoDB: or created database!\n");
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Shutting down an improperly started, "
+				"or created database!");
 		}
 
 		return(DB_SUCCESS);
 	}
 
-	/* Shutdown the FTS optimize sub system. */
-	fts_optimize_start_shutdown();
+	if (!srv_read_only_mode) {
+		/* Shutdown the FTS optimize sub system. */
+		fts_optimize_start_shutdown();
 
-	fts_optimize_end();
+		fts_optimize_end();
+	}
 
 	/* 1. Flush the buffer pool to disk, write the current lsn to
 	the tablespace header(s), and copy all log data to archive.
@@ -2485,18 +2809,12 @@ innobase_shutdown_for_mysql(void)
 	logs_empty_and_mark_files_at_shutdown();
 
 	if (srv_conc_get_active_threads() != 0) {
-		fprintf(stderr,
-			"InnoDB: Warning: query counter shows %ld queries"
-			" still\n"
-			"InnoDB: inside InnoDB at shutdown\n",
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Query counter shows %ld queries still "
+			"inside InnoDB at shutdown",
 			srv_conc_get_active_threads());
 	}
 
-	/* This functionality will be used by WL#5522. */
-	ut_a(trx_purge_state() == PURGE_STATE_RUN
-	     || trx_purge_state() == PURGE_STATE_EXIT
-	     || srv_force_recovery >= SRV_FORCE_NO_BACKGROUND);
-
 	/* 2. Make all threads created by InnoDB to exit */
 
 	srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
@@ -2509,22 +2827,28 @@ innobase_shutdown_for_mysql(void)
 		/* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
 		HERE OR EARLIER */
 
-		/* a. Let the lock timeout thread exit */
-		os_event_set(srv_timeout_event);
+		if (!srv_read_only_mode) {
+			/* a. Let the lock timeout thread exit */
+			os_event_set(lock_sys->timeout_event);
 
-		/* b. srv error monitor thread exits automatically, no need
-		to do anything here */
+			/* b. srv error monitor thread exits automatically,
+			no need to do anything here */
 
-		/* c. We wake the master thread so that it exits */
-		srv_wake_master_thread();
+			/* c. We wake the master thread so that it exits */
+			srv_wake_master_thread();
 
-		/* d. Wakeup purge threads. */
-		srv_purge_wakeup();
+			/* d. Wakeup purge threads. */
+			srv_purge_wakeup();
+		}
 
 		/* e. Exit the i/o threads */
 
 		os_aio_wake_all_threads_at_shutdown();
 
+		/* f. dict_stats_thread is signaled from
+		logs_empty_and_mark_files_at_shutdown() and should have
+		already quit or is quitting right now. */
+
 		os_mutex_enter(os_sync_mutex);
 
 		if (os_thread_count == 0) {
@@ -2549,9 +2873,9 @@ innobase_shutdown_for_mysql(void)
 	}
 
 	if (i == 1000) {
-		fprintf(stderr,
-			"InnoDB: Warning: %lu threads created by InnoDB"
-			" had not exited at shutdown!\n",
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"%lu threads created by InnoDB"
+			" had not exited at shutdown!",
 			(ulong) os_thread_count);
 	}
 
@@ -2563,6 +2887,7 @@ innobase_shutdown_for_mysql(void)
 			mem_free(srv_monitor_file_name);
 		}
 	}
+
 	if (srv_dict_tmpfile) {
 		fclose(srv_dict_tmpfile);
 		srv_dict_tmpfile = 0;
@@ -2573,6 +2898,10 @@ innobase_shutdown_for_mysql(void)
 		srv_misc_tmpfile = 0;
 	}
 
+	if (!srv_read_only_mode) {
+		dict_stats_thread_deinit();
+	}
+
 	/* This must be disabled before closing the buffer pool
 	and closing the data dictionary.  */
 	btr_search_disable();
@@ -2583,9 +2912,14 @@ innobase_shutdown_for_mysql(void)
 	trx_sys_file_format_close();
 	trx_sys_close();
 
-	mutex_free(&srv_monitor_file_mutex);
-	mutex_free(&srv_dict_tmpfile_mutex);
-	mutex_free(&srv_misc_tmpfile_mutex);
+	/* We don't create these mutexes in RO mode because we don't create
+	the temp files that the cover. */
+	if (!srv_read_only_mode) {
+		mutex_free(&srv_monitor_file_mutex);
+		mutex_free(&srv_dict_tmpfile_mutex);
+		mutex_free(&srv_misc_tmpfile_mutex);
+	}
+
 	dict_close();
 	btr_search_sys_free();
 
@@ -2594,6 +2928,7 @@ innobase_shutdown_for_mysql(void)
 	os_aio_free();
 	que_close();
 	row_mysql_close();
+	srv_mon_free();
 	sync_close();
 	srv_free();
 	fil_close();
@@ -2618,11 +2953,10 @@ innobase_shutdown_for_mysql(void)
 	    || os_event_count != 0
 	    || os_mutex_count != 0
 	    || os_fast_mutex_count != 0) {
-		fprintf(stderr,
-			"InnoDB: Warning: some resources were not"
-			" cleaned up in shutdown:\n"
-			"InnoDB: threads %lu, events %lu,"
-			" os_mutexes %lu, os_fast_mutexes %lu\n",
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Some resources were not cleaned up in shutdown: "
+			"threads %lu, events %lu, os_mutexes %lu, "
+			"os_fast_mutexes %lu",
 			(ulong) os_thread_count, (ulong) os_event_count,
 			(ulong) os_mutex_count, (ulong) os_fast_mutex_count);
 	}
@@ -2632,17 +2966,15 @@ innobase_shutdown_for_mysql(void)
 	}
 
 	if (srv_print_verbose_log) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Shutdown completed;"
-			" log sequence number " LSN_PF "\n",
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"Shutdown completed; log sequence number " LSN_PF "",
 			srv_shutdown_lsn);
 	}
 
 	srv_was_started = FALSE;
 	srv_start_has_been_called = FALSE;
 
-	return((int) DB_SUCCESS);
+	return(DB_SUCCESS);
 }
 #endif /* !UNIV_HOTBACKUP */
 
@@ -2650,7 +2982,7 @@ innobase_shutdown_for_mysql(void)
 /********************************************************************
 Signal all per-table background threads to shutdown, and wait for them to do
 so. */
-
+UNIV_INTERN
 void
 srv_shutdown_table_bg_threads(void)
 /*===============================*/
@@ -2723,3 +3055,48 @@ srv_shutdown_table_bg_threads(void)
 		table = next;
 	}
 }
+
+/*****************************************************************//**
+Get the meta-data filename from the table name. */
+UNIV_INTERN
+void
+srv_get_meta_data_filename(
+/*=======================*/
+	dict_table_t*	table,		/*!< in: table */
+	char*			filename,	/*!< out: filename */
+	ulint			max_len)	/*!< in: filename max length */
+{
+	ulint			len;
+	char*			path;
+	char*			suffix;
+	static const ulint	suffix_len = strlen(".cfg");
+
+	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+		dict_get_and_save_data_dir_path(table, false);
+		ut_a(table->data_dir_path);
+
+		path = os_file_make_remote_pathname(
+			table->data_dir_path, table->name, "cfg");
+	} else {
+		path = fil_make_ibd_name(table->name, false);
+	}
+
+	ut_a(path);
+	len = ut_strlen(path);
+	ut_a(max_len >= len);
+
+	suffix = path + (len - suffix_len);
+	if (strncmp(suffix, ".cfg", suffix_len) == 0) {
+		strcpy(filename, path);
+	} else {
+		ut_ad(strncmp(suffix, ".ibd", suffix_len) == 0);
+
+		strncpy(filename, path, len - suffix_len);
+		suffix = filename + (len - suffix_len);
+		strcpy(suffix, ".cfg");
+	}
+
+	mem_free(path);
+
+	srv_normalize_path_for_win(filename);
+}
diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc
index b90a5f29589..749258021f7 100644
--- a/storage/innobase/sync/sync0arr.cc
+++ b/storage/innobase/sync/sync0arr.cc
@@ -39,6 +39,7 @@ Created 9/5/1995 Heikki Tuuri
 #include "sync0rw.h"
 #include "os0sync.h"
 #include "os0file.h"
+#include "lock0lock.h"
 #include "srv0srv.h"
 #include "ha_prototypes.h"
 
@@ -78,11 +79,11 @@ any waiting threads who have missed the signal. */
 /** A cell where an individual thread may wait suspended
 until a resource is released. The suspending is implemented
 using an operating system event semaphore. */
-struct sync_cell_struct {
+struct sync_cell_t {
 	void*		wait_object;	/*!< pointer to the object the
 					thread is waiting for; if NULL
 					the cell is free for use */
-	mutex_t*	old_wait_mutex;	/*!< the latest wait mutex in cell */
+	ib_mutex_t*	old_wait_mutex;	/*!< the latest wait mutex in cell */
 	rw_lock_t*	old_wait_rw_lock;
 					/*!< the latest wait rw-lock
 					in cell */
@@ -116,15 +117,15 @@ all changes (set or reset) to the state of the event must be made
 while owning the mutex. */
 
 /** Synchronization array */
-struct sync_array_struct {
+struct sync_array_t {
 	ulint		n_reserved;	/*!< number of currently reserved
 					cells in the wait array */
 	ulint		n_cells;	/*!< number of cells in the
 					wait array */
 	sync_cell_t*	array;		/*!< pointer to wait array */
-	mutex_t		mutex;		/*!< possible database mutex
+	ib_mutex_t		mutex;		/*!< possible database mutex
 					protecting this data structure */
-	os_mutex_t	os_mutex;	/*!< Possible operating system mutex
+	os_ib_mutex_t	os_mutex;	/*!< Possible operating system mutex
 					protecting the data structure.
 					As this data structure is used in
 					constructing the database mutex,
@@ -293,7 +294,7 @@ sync_cell_get_event(
 	ulint type = cell->request_type;
 
 	if (type == SYNC_MUTEX) {
-		return(((mutex_t*) cell->wait_object)->event);
+		return(((ib_mutex_t*) cell->wait_object)->event);
 	} else if (type == RW_LOCK_WAIT_EX) {
 		return(((rw_lock_t*) cell->wait_object)->wait_ex_event);
 	} else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
@@ -434,7 +435,7 @@ sync_array_cell_print(
 	FILE*		file,	/*!< in: file where to print */
 	sync_cell_t*	cell)	/*!< in: sync cell */
 {
-	mutex_t*	mutex;
+	ib_mutex_t*	mutex;
 	rw_lock_t*	rwlock;
 	ulint		type;
 	ulint		writer;
@@ -600,7 +601,7 @@ sync_array_detect_deadlock(
 	sync_cell_t*	cell,	/*!< in: cell to search */
 	ulint		depth)	/*!< in: recursion depth */
 {
-	mutex_t*	mutex;
+	ib_mutex_t*	mutex;
 	rw_lock_t*	lock;
 	os_thread_id_t	thread;
 	ibool		ret;
@@ -622,7 +623,7 @@ sync_array_detect_deadlock(
 
 	if (cell->request_type == SYNC_MUTEX) {
 
-		mutex = static_cast<mutex_t*>(cell->wait_object);
+		mutex = static_cast<ib_mutex_t*>(cell->wait_object);
 
 		if (mutex_get_lock_word(mutex) != 0) {
 
@@ -736,7 +737,7 @@ sync_arr_cell_can_wake_up(
 /*======================*/
 	sync_cell_t*	cell)	/*!< in: cell to search */
 {
-	mutex_t*	mutex;
+	ib_mutex_t*	mutex;
 	rw_lock_t*	lock;
 
 	if (cell->request_type == SYNC_MUTEX) {
@@ -902,6 +903,11 @@ sync_array_print_long_waits_low(
 	ibool		fatal = FALSE;
 	double		longest_diff = 0;
 
+	/* For huge tables, skip the check during CHECK TABLE etc... */
+	if (fatal_timeout > SRV_SEMAPHORE_WAIT_EXTENSION) {
+		return(FALSE);
+	}
+
 #ifdef UNIV_DEBUG_VALGRIND
 	/* Increase the timeouts if running under valgrind because it executes
 	extremely slowly. UNIV_DEBUG_VALGRIND does not necessary mean that
@@ -1000,7 +1006,7 @@ sync_array_print_long_waits(
 			(ulong) os_file_n_pending_pwrites);
 
 		srv_print_innodb_monitor = TRUE;
-		os_event_set(srv_timeout_event);
+		os_event_set(lock_sys->timeout_event);
 
 		os_thread_sleep(30000000);
 
diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc
index dc6c510a3ed..823efecaf6b 100644
--- a/storage/innobase/sync/sync0rw.cc
+++ b/storage/innobase/sync/sync0rw.cc
@@ -57,11 +57,11 @@ lock_word == 0:		       Write locked
 			       (-lock_word) is the number of readers
 			       that hold the lock.
 lock_word <= -X_LOCK_DECR:     Recursively write locked. lock_word has been
-			       decremented by X_LOCK_DECR once for each lock,
-			       so the number of locks is:
-			       ((-lock_word) / X_LOCK_DECR) + 1
-When lock_word <= -X_LOCK_DECR, we also know that lock_word % X_LOCK_DECR == 0:
-other values of lock_word are invalid.
+			       decremented by X_LOCK_DECR for the first lock
+			       and the first recursive lock, then by 1 for
+			       each recursive lock thereafter.
+			       So the number of locks is:
+			       (lock_copy == 0) ? 1 : 2 - (lock_copy + X_LOCK_DECR)
 
 The lock_word is always read and updated atomically and consistently, so that
 it always represents the state of the lock, and the state of the lock changes
@@ -124,50 +124,21 @@ wait_ex_event:	A thread may only wait on the wait_ex_event after it has
 		performed the following actions in order:
 		   (1) Decrement lock_word by X_LOCK_DECR.
 		   (2) Record counter value of wait_ex_event (os_event_reset,
-                       called from sync_array_reserve_cell).
+		       called from sync_array_reserve_cell).
 		   (3) Verify that lock_word < 0.
 		(1) must come first to ensures no other threads become reader
-                or next writer, and notifies unlocker that signal must be sent.
-                (2) must come before (3) to ensure the signal is not missed.
+		or next writer, and notifies unlocker that signal must be sent.
+		(2) must come before (3) to ensure the signal is not missed.
 		These restrictions force the above ordering.
 		Immediately before sending the wake-up signal, we should:
 		   Verify lock_word == 0 (waiting thread holds x_lock)
 */
 
-
-/** number of spin waits on rw-latches,
-resulted during shared (read) locks */
-UNIV_INTERN ib_int64_t	rw_s_spin_wait_count	= 0;
-/** number of spin loop rounds on rw-latches,
-resulted during shared (read) locks */
-UNIV_INTERN ib_int64_t	rw_s_spin_round_count	= 0;
-
-/** number of OS waits on rw-latches,
-resulted during shared (read) locks */
-UNIV_INTERN ib_int64_t	rw_s_os_wait_count	= 0;
-
-/** number of unlocks (that unlock shared locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-UNIV_INTERN ib_int64_t	rw_s_exit_count		= 0;
-
-/** number of spin waits on rw-latches,
-resulted during exclusive (write) locks */
-UNIV_INTERN ib_int64_t	rw_x_spin_wait_count	= 0;
-/** number of spin loop rounds on rw-latches,
-resulted during exclusive (write) locks */
-UNIV_INTERN ib_int64_t	rw_x_spin_round_count	= 0;
-
-/** number of OS waits on rw-latches,
-resulted during exclusive (write) locks */
-UNIV_INTERN ib_int64_t	rw_x_os_wait_count	= 0;
-
-/** number of unlocks (that unlock exclusive locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-UNIV_INTERN ib_int64_t	rw_x_exit_count		= 0;
+UNIV_INTERN rw_lock_stats_t	rw_lock_stats;
 
 /* The global list of rw-locks */
 UNIV_INTERN rw_lock_list_t	rw_lock_list;
-UNIV_INTERN mutex_t		rw_lock_list_mutex;
+UNIV_INTERN ib_mutex_t		rw_lock_list_mutex;
 
 #ifdef UNIV_PFS_MUTEX
 UNIV_INTERN mysql_pfs_key_t	rw_lock_list_mutex_key;
@@ -179,7 +150,7 @@ UNIV_INTERN mysql_pfs_key_t	rw_lock_mutex_key;
 To modify the debug info list of an rw-lock, this mutex has to be
 acquired in addition to the mutex protecting the lock. */
 
-UNIV_INTERN mutex_t		rw_lock_debug_mutex;
+UNIV_INTERN ib_mutex_t		rw_lock_debug_mutex;
 
 # ifdef UNIV_PFS_MUTEX
 UNIV_INTERN mysql_pfs_key_t	rw_lock_debug_mutex_key;
@@ -258,7 +229,7 @@ rw_lock_create_func(
 	lock->mutex.cline = cline;
 
 	ut_d(lock->mutex.cmutex_name = cmutex_name);
-	ut_d(lock->mutex.mutex_type = 1);
+	ut_d(lock->mutex.ib_mutex_type = 1);
 #else /* INNODB_RW_LOCKS_USE_ATOMICS */
 # ifdef UNIV_DEBUG
 	UT_NOT_USED(cmutex_name);
@@ -292,8 +263,8 @@ rw_lock_create_func(
 	lock->last_x_file_name = "not yet reserved";
 	lock->last_s_line = 0;
 	lock->last_x_line = 0;
-	lock->event = os_event_create(NULL);
-	lock->wait_ex_event = os_event_create(NULL);
+	lock->event = os_event_create();
+	lock->wait_ex_event = os_event_create();
 
 	mutex_enter(&rw_lock_list_mutex);
 
@@ -316,7 +287,7 @@ rw_lock_free_func(
 	rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 #ifndef INNODB_RW_LOCKS_USE_ATOMICS
-	mutex_t*	mutex;
+	ib_mutex_t*	mutex;
 #endif /* !INNODB_RW_LOCKS_USE_ATOMICS */
 
 	ut_ad(rw_lock_validate(lock));
@@ -364,14 +335,15 @@ rw_lock_validate(
 	ulint	waiters;
 	lint	lock_word;
 
-	ut_a(lock);
+	ut_ad(lock);
 
 	waiters = rw_lock_get_waiters(lock);
 	lock_word = lock->lock_word;
 
 	ut_ad(lock->magic_n == RW_LOCK_MAGIC_N);
-	ut_a(waiters == 0 || waiters == 1);
-	ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0);
+	ut_ad(waiters == 0 || waiters == 1);
+	ut_ad(lock_word > -(2 * X_LOCK_DECR));
+	ut_ad(lock_word <= X_LOCK_DECR);
 
 	return(TRUE);
 }
@@ -395,10 +367,16 @@ rw_lock_s_lock_spin(
 	ulint		index;	/* index of the reserved wait cell */
 	ulint		i = 0;	/* spin round count */
 	sync_array_t*	sync_arr;
+	size_t		counter_index;
+
+	/* We reuse the thread id to index into the counter, cache
+	it here for efficiency. */
+
+	counter_index = (size_t) os_thread_get_curr_id();
 
 	ut_ad(rw_lock_validate(lock));
 
-	rw_s_spin_wait_count++;	/*!< Count calls to this function */
+	rw_lock_stats.rw_s_spin_wait_count.add(counter_index, 1);
 lock_loop:
 
 	/* Spin waiting for the writer field to become free */
@@ -414,19 +392,9 @@ lock_loop:
 		os_thread_yield();
 	}
 
-	if (srv_print_latch_waits) {
-		fprintf(stderr,
-			"Thread %lu spin wait rw-s-lock at %p"
-			" cfile %s cline %lu rnds %lu\n",
-			(ulong) os_thread_pf(os_thread_get_curr_id()),
-			(void*) lock,
-			innobase_basename(lock->cfile_name),
-			(ulong) lock->cline, (ulong) i);
-	}
-
 	/* We try once again to obtain the lock */
 	if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
-		rw_s_spin_round_count += i;
+		rw_lock_stats.rw_s_spin_round_count.add(counter_index, i);
 
 		return; /* Success */
 	} else {
@@ -435,7 +403,7 @@ lock_loop:
 			goto lock_loop;
 		}
 
-		rw_s_spin_round_count += i;
+		rw_lock_stats.rw_s_spin_round_count.add(counter_index, i);
 
 		sync_arr = sync_array_get();
 
@@ -444,7 +412,7 @@ lock_loop:
 			file_name, line, &index);
 
 		/* Set waiters before checking lock_word to ensure wake-up
-                signal is sent. This may lead to some unnecessary signals. */
+		signal is sent. This may lead to some unnecessary signals. */
 		rw_lock_set_waiter_flag(lock);
 
 		if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
@@ -452,19 +420,9 @@ lock_loop:
 			return; /* Success */
 		}
 
-		if (srv_print_latch_waits) {
-			fprintf(stderr,
-				"Thread %lu OS wait rw-s-lock at %p"
-				" cfile %s cline %lu\n",
-				os_thread_pf(os_thread_get_curr_id()),
-				(void*) lock,
-				innobase_basename(lock->cfile_name),
-				(ulong) lock->cline);
-		}
-
 		/* these stats may not be accurate */
 		lock->count_os_wait++;
-		rw_s_os_wait_count++;
+		rw_lock_stats.rw_s_os_wait_count.add(counter_index, 1);
 
 		sync_array_wait_event(sync_arr, index);
 
@@ -511,6 +469,12 @@ rw_lock_x_lock_wait(
 	ulint		index;
 	ulint		i = 0;
 	sync_array_t*	sync_arr;
+	size_t		counter_index;
+
+	/* We reuse the thread id to index into the counter, cache
+	it here for efficiency. */
+
+	counter_index = (size_t) os_thread_get_curr_id();
 
 	ut_ad(lock->lock_word <= 0);
 
@@ -524,7 +488,7 @@ rw_lock_x_lock_wait(
 		}
 
 		/* If there is still a reader, then go to sleep.*/
-		rw_x_spin_round_count += i;
+		rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
 
 		sync_arr = sync_array_get();
 
@@ -539,11 +503,11 @@ rw_lock_x_lock_wait(
 
 			/* these stats may not be accurate */
 			lock->count_os_wait++;
-			rw_x_os_wait_count++;
+			rw_lock_stats.rw_x_os_wait_count.add(counter_index, 1);
 
-                        /* Add debug info as it is needed to detect possible
-                        deadlock. We must add info for WAIT_EX thread for
-                        deadlock detection to work properly. */
+			/* Add debug info as it is needed to detect possible
+			deadlock. We must add info for WAIT_EX thread for
+			deadlock detection to work properly. */
 #ifdef UNIV_SYNC_DEBUG
 			rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
 					       file_name, line);
@@ -551,16 +515,16 @@ rw_lock_x_lock_wait(
 
 			sync_array_wait_event(sync_arr, index);
 #ifdef UNIV_SYNC_DEBUG
-			rw_lock_remove_debug_info(lock, pass,
-					       RW_LOCK_WAIT_EX);
+			rw_lock_remove_debug_info(
+				lock, pass, RW_LOCK_WAIT_EX);
 #endif
-                        /* It is possible to wake when lock_word < 0.
-                        We must pass the while-loop check to proceed.*/
+			/* It is possible to wake when lock_word < 0.
+			We must pass the while-loop check to proceed.*/
 		} else {
 			sync_array_free_cell(sync_arr, index);
 		}
 	}
-	rw_x_spin_round_count += i;
+	rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
 }
 
 /******************************************************************//**
@@ -576,8 +540,6 @@ rw_lock_x_lock_low(
 	const char*	file_name,/*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
-
 	if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
 
 		/* lock->recursive also tells us if the writer_thread
@@ -587,8 +549,8 @@ rw_lock_x_lock_low(
 		ut_a(!lock->recursive);
 
 		/* Decrement occurred: we are writer or next-writer. */
-		rw_lock_set_writer_id_and_recursion_flag(lock,
-						pass ? FALSE : TRUE);
+		rw_lock_set_writer_id_and_recursion_flag(
+			lock, pass ? FALSE : TRUE);
 
 		rw_lock_x_lock_wait(lock,
 #ifdef UNIV_SYNC_DEBUG
@@ -597,19 +559,25 @@ rw_lock_x_lock_low(
 				    file_name, line);
 
 	} else {
+		os_thread_id_t	thread_id = os_thread_get_curr_id();
+
 		/* Decrement failed: relock or failed lock */
 		if (!pass && lock->recursive
-		    && os_thread_eq(lock->writer_thread, curr_thread)) {
+		    && os_thread_eq(lock->writer_thread, thread_id)) {
 			/* Relock */
-			lock->lock_word -= X_LOCK_DECR;
+			if (lock->lock_word == 0) {
+				lock->lock_word -= X_LOCK_DECR;
+			} else {
+				--lock->lock_word;
+			}
+
 		} else {
 			/* Another thread locked before us */
 			return(FALSE);
 		}
 	}
 #ifdef UNIV_SYNC_DEBUG
-	rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
-			       file_name, line);
+	rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name, line);
 #endif
 	lock->last_x_file_name = file_name;
 	lock->last_x_line = (unsigned int) line;
@@ -640,6 +608,12 @@ rw_lock_x_lock_func(
 	ulint		index;	/*!< index of the reserved wait cell */
 	sync_array_t*	sync_arr;
 	ibool		spinning = FALSE;
+	size_t		counter_index;
+
+	/* We reuse the thread id to index into the counter, cache
+	it here for efficiency. */
+
+	counter_index = (size_t) os_thread_get_curr_id();
 
 	ut_ad(rw_lock_validate(lock));
 #ifdef UNIV_SYNC_DEBUG
@@ -651,15 +625,17 @@ rw_lock_x_lock_func(
 lock_loop:
 
 	if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
-		rw_x_spin_round_count += i;
+		rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
 
 		return;	/* Locking succeeded */
 
 	} else {
 
-                if (!spinning) {
-                        spinning = TRUE;
-                        rw_x_spin_wait_count++;
+		if (!spinning) {
+			spinning = TRUE;
+
+			rw_lock_stats.rw_x_spin_wait_count.add(
+				counter_index, 1);
 		}
 
 		/* Spin waiting for the lock_word to become free */
@@ -679,16 +655,7 @@ lock_loop:
 		}
 	}
 
-	rw_x_spin_round_count += i;
-
-	if (srv_print_latch_waits) {
-		fprintf(stderr,
-			"Thread %lu spin wait rw-x-lock at %p"
-			" cfile %s cline %lu rnds %lu\n",
-			os_thread_pf(os_thread_get_curr_id()), (void*) lock,
-			innobase_basename(lock->cfile_name),
-			(ulong) lock->cline, (ulong) i);
-	}
+	rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
 
 	sync_arr = sync_array_get();
 
@@ -704,18 +671,9 @@ lock_loop:
 		return; /* Locking succeeded */
 	}
 
-	if (srv_print_latch_waits) {
-		fprintf(stderr,
-			"Thread %lu OS wait for rw-x-lock at %p"
-			" cfile %s cline %lu\n",
-			os_thread_pf(os_thread_get_curr_id()), (void*) lock,
-			innobase_basename(lock->cfile_name),
-			(ulong) lock->cline);
-	}
-
 	/* these stats may not be accurate */
 	lock->count_os_wait++;
-	rw_x_os_wait_count++;
+	rw_lock_stats.rw_x_os_wait_count.add(counter_index, 1);
 
 	sync_array_wait_event(sync_arr, index);
 
diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc
index af64d011db2..d6f7325e2a3 100644
--- a/storage/innobase/sync/sync0sync.cc
+++ b/storage/innobase/sync/sync0sync.cc
@@ -171,25 +171,25 @@ Q.E.D. */
 
 /** The number of iterations in the mutex_spin_wait() spin loop.
 Intended for performance monitoring. */
-static ib_int64_t	mutex_spin_round_count		= 0;
+static ib_counter_t<ib_int64_t, IB_N_SLOTS>	mutex_spin_round_count;
 /** The number of mutex_spin_wait() calls.  Intended for
 performance monitoring. */
-static ib_int64_t	mutex_spin_wait_count		= 0;
+static ib_counter_t<ib_int64_t, IB_N_SLOTS>	mutex_spin_wait_count;
 /** The number of OS waits in mutex_spin_wait().  Intended for
 performance monitoring. */
-static ib_int64_t	mutex_os_wait_count		= 0;
+static ib_counter_t<ib_int64_t, IB_N_SLOTS>	mutex_os_wait_count;
 /** The number of mutex_exit() calls. Intended for performance
 monitoring. */
-UNIV_INTERN ib_int64_t	mutex_exit_count		= 0;
+UNIV_INTERN ib_int64_t			mutex_exit_count;
 
 /** This variable is set to TRUE when sync_init is called */
 UNIV_INTERN ibool	sync_initialized	= FALSE;
 
 #ifdef UNIV_SYNC_DEBUG
 /** An acquired mutex or rw-lock and its level in the latching order */
-typedef struct sync_level_struct	sync_level_t;
+struct sync_level_t;
 /** Mutexes or rw-locks held by a thread */
-typedef struct sync_thread_struct	sync_thread_t;
+struct sync_thread_t;
 
 /** The latch levels currently owned by threads are stored in this data
 structure; the size of this array is OS_THREAD_MAX_N */
@@ -197,7 +197,7 @@ structure; the size of this array is OS_THREAD_MAX_N */
 UNIV_INTERN sync_thread_t*	sync_thread_level_arrays;
 
 /** Mutex protecting sync_thread_level_arrays */
-UNIV_INTERN mutex_t		sync_thread_mutex;
+UNIV_INTERN ib_mutex_t		sync_thread_mutex;
 
 # ifdef UNIV_PFS_MUTEX
 UNIV_INTERN mysql_pfs_key_t	sync_thread_mutex_key;
@@ -208,7 +208,7 @@ UNIV_INTERN mysql_pfs_key_t	sync_thread_mutex_key;
 UNIV_INTERN ut_list_base_node_t  mutex_list;
 
 /** Mutex protecting the mutex_list variable */
-UNIV_INTERN mutex_t mutex_list_mutex;
+UNIV_INTERN ib_mutex_t mutex_list_mutex;
 
 #ifdef UNIV_PFS_MUTEX
 UNIV_INTERN mysql_pfs_key_t	mutex_list_mutex_key;
@@ -221,10 +221,8 @@ UNIV_INTERN ibool	sync_order_checks_on	= FALSE;
 /** Number of slots reserved for each OS thread in the sync level array */
 static const ulint SYNC_THREAD_N_LEVELS = 10000;
 
-typedef struct sync_arr_struct sync_arr_t;
-
 /** Array for tracking sync levels per thread. */
-struct sync_arr_struct {
+struct sync_arr_t {
 	ulint		in_use;		/*!< Number of active cells */
 	ulint		n_elems;	/*!< Number of elements in the array */
 	ulint		max_elems;	/*!< Maximum elements */
@@ -234,14 +232,14 @@ struct sync_arr_struct {
 };
 
 /** Mutexes or rw-locks held by a thread */
-struct sync_thread_struct{
+struct sync_thread_t{
 	os_thread_id_t	id;		/*!< OS thread id */
 	sync_arr_t*	levels;		/*!< level array for this thread; if
 					this is NULL this slot is unused */
 };
 
 /** An acquired mutex or rw-lock and its level in the latching order */
-struct sync_level_struct{
+struct sync_level_t{
 	void*		latch;		/*!< pointer to a mutex or an
 					rw-lock; NULL means that
 					the slot is empty */
@@ -264,7 +262,7 @@ UNIV_INTERN
 void
 mutex_create_func(
 /*==============*/
-	mutex_t*	mutex,		/*!< in: pointer to memory */
+	ib_mutex_t*	mutex,		/*!< in: pointer to memory */
 #ifdef UNIV_DEBUG
 	const char*	cmutex_name,	/*!< in: mutex name */
 # ifdef UNIV_SYNC_DEBUG
@@ -280,7 +278,7 @@ mutex_create_func(
 	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mutex->os_fast_mutex);
 	mutex->lock_word = 0;
 #endif
-	mutex->event = os_event_create(NULL);
+	mutex->event = os_event_create();
 	mutex_set_waiters(mutex, 0);
 #ifdef UNIV_DEBUG
 	mutex->magic_n = MUTEX_MAGIC_N;
@@ -293,16 +291,6 @@ mutex_create_func(
 	mutex->cfile_name = cfile_name;
 	mutex->cline = cline;
 	mutex->count_os_wait = 0;
-#ifdef UNIV_DEBUG
-	mutex->cmutex_name=	  cmutex_name;
-	mutex->count_using=	  0;
-	mutex->mutex_type=	  0;
-	mutex->lspent_time=	  0;
-	mutex->lmax_spent_time=     0;
-	mutex->count_spin_loop= 0;
-	mutex->count_spin_rounds=   0;
-	mutex->count_os_yield=  0;
-#endif /* UNIV_DEBUG */
 
 	/* Check that lock_word is aligned; this is important on Intel */
 	ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
@@ -337,7 +325,7 @@ UNIV_INTERN
 void
 mutex_free_func(
 /*============*/
-	mutex_t*	mutex)	/*!< in: mutex */
+	ib_mutex_t*	mutex)	/*!< in: mutex */
 {
 	ut_ad(mutex_validate(mutex));
 	ut_a(mutex_get_lock_word(mutex) == 0);
@@ -397,7 +385,7 @@ UNIV_INTERN
 ulint
 mutex_enter_nowait_func(
 /*====================*/
-	mutex_t*	mutex,		/*!< in: pointer to mutex */
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
 	const char*	file_name __attribute__((unused)),
 					/*!< in: file name where mutex
 					requested */
@@ -406,7 +394,7 @@ mutex_enter_nowait_func(
 {
 	ut_ad(mutex_validate(mutex));
 
-	if (!mutex_test_and_set(mutex)) {
+	if (!ib_mutex_test_and_set(mutex)) {
 
 		ut_d(mutex->thread_id = os_thread_get_curr_id());
 #ifdef UNIV_SYNC_DEBUG
@@ -427,7 +415,7 @@ UNIV_INTERN
 ibool
 mutex_validate(
 /*===========*/
-	const mutex_t*	mutex)	/*!< in: mutex */
+	const ib_mutex_t*	mutex)	/*!< in: mutex */
 {
 	ut_a(mutex);
 	ut_a(mutex->magic_n == MUTEX_MAGIC_N);
@@ -443,7 +431,7 @@ UNIV_INTERN
 ibool
 mutex_own(
 /*======*/
-	const mutex_t*	mutex)	/*!< in: mutex */
+	const ib_mutex_t*	mutex)	/*!< in: mutex */
 {
 	ut_ad(mutex_validate(mutex));
 
@@ -458,7 +446,7 @@ UNIV_INTERN
 void
 mutex_set_waiters(
 /*==============*/
-	mutex_t*	mutex,	/*!< in: mutex */
+	ib_mutex_t*	mutex,	/*!< in: mutex */
 	ulint		n)	/*!< in: value to set */
 {
 	volatile ulint*	ptr;		/* declared volatile to ensure that
@@ -479,7 +467,7 @@ UNIV_INTERN
 void
 mutex_spin_wait(
 /*============*/
-	mutex_t*	mutex,		/*!< in: pointer to mutex */
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
 	const char*	file_name,	/*!< in: file name where mutex
 					requested */
 	ulint		line)		/*!< in: line where requested */
@@ -487,6 +475,9 @@ mutex_spin_wait(
 	ulint		i;		/* spin round count */
 	ulint		index;		/* index of the reserved wait cell */
 	sync_array_t*	sync_arr;
+	size_t		counter_index;
+
+	counter_index = (size_t) os_thread_get_curr_id();
 
 	ut_ad(mutex);
 
@@ -494,7 +485,7 @@ mutex_spin_wait(
 	isn't exact. Moved out of ifdef that follows because we are willing
 	to sacrifice the cost of counting this as the data is valuable.
 	Count the number of calls to mutex_spin_wait. */
-	mutex_spin_wait_count++;
+	mutex_spin_wait_count.add(counter_index, 1);
 
 mutex_loop:
 
@@ -507,7 +498,6 @@ mutex_loop:
 	a memory word. */
 
 spin_loop:
-	ut_d(mutex->count_spin_loop++);
 
 	while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
 		if (srv_spin_wait_delay) {
@@ -518,26 +508,12 @@ spin_loop:
 	}
 
 	if (i == SYNC_SPIN_ROUNDS) {
-#ifdef UNIV_DEBUG
-		mutex->count_os_yield++;
-#endif /* UNIV_DEBUG */
 		os_thread_yield();
 	}
 
-#ifdef UNIV_SRV_PRINT_LATCH_WAITS
-	fprintf(stderr,
-		"Thread %lu spin wait mutex at %p"
-		" cfile %s cline %lu rnds %lu\n",
-		(ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
-		innobase_basename(mutex->cfile_name),
-		(ulong) mutex->cline, (ulong) i);
-#endif
-
-	mutex_spin_round_count += i;
-
-	ut_d(mutex->count_spin_rounds += i);
+	mutex_spin_round_count.add(counter_index, i);
 
-	if (mutex_test_and_set(mutex) == 0) {
+	if (ib_mutex_test_and_set(mutex) == 0) {
 		/* Succeeded! */
 
 		ut_d(mutex->thread_id = os_thread_get_curr_id());
@@ -550,7 +526,7 @@ spin_loop:
 	/* We may end up with a situation where lock_word is 0 but the OS
 	fast mutex is still reserved. On FreeBSD the OS does not seem to
 	schedule a thread which is constantly calling pthread_mutex_trylock
-	(in mutex_test_and_set implementation). Then we could end up
+	(in ib_mutex_test_and_set implementation). Then we could end up
 	spinning here indefinitely. The following 'i++' stops this infinite
 	spin. */
 
@@ -575,7 +551,7 @@ spin_loop:
 
 	/* Try to reserve still a few times */
 	for (i = 0; i < 4; i++) {
-		if (mutex_test_and_set(mutex) == 0) {
+		if (ib_mutex_test_and_set(mutex) == 0) {
 			/* Succeeded! Free the reserved wait cell */
 
 			sync_array_free_cell(sync_arr, index);
@@ -585,13 +561,6 @@ spin_loop:
 			mutex_set_debug_info(mutex, file_name, line);
 #endif
 
-#ifdef UNIV_SRV_PRINT_LATCH_WAITS
-			fprintf(stderr, "Thread %lu spin wait succeeds at 2:"
-				" mutex at %p\n",
-				(ulong) os_thread_pf(os_thread_get_curr_id()),
-				(void*) mutex);
-#endif
-
 			return;
 
 			/* Note that in this case we leave the waiters field
@@ -604,19 +573,12 @@ spin_loop:
 	after the change in the wait array and the waiters field was made.
 	Now there is no risk of infinite wait on the event. */
 
-#ifdef UNIV_SRV_PRINT_LATCH_WAITS
-	fprintf(stderr,
-		"Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n",
-		(ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
-		innobase_basename(mutex->cfile_name),
-		(ulong) mutex->cline, (ulong) i);
-#endif
-
-	mutex_os_wait_count++;
+	mutex_os_wait_count.add(counter_index, 1);
 
 	mutex->count_os_wait++;
 
 	sync_array_wait_event(sync_arr, index);
+
 	goto mutex_loop;
 }
 
@@ -626,7 +588,7 @@ UNIV_INTERN
 void
 mutex_signal_object(
 /*================*/
-	mutex_t*	mutex)	/*!< in: mutex */
+	ib_mutex_t*	mutex)	/*!< in: mutex */
 {
 	mutex_set_waiters(mutex, 0);
 
@@ -643,7 +605,7 @@ UNIV_INTERN
 void
 mutex_set_debug_info(
 /*=================*/
-	mutex_t*	mutex,		/*!< in: mutex */
+	ib_mutex_t*	mutex,		/*!< in: mutex */
 	const char*	file_name,	/*!< in: file where requested */
 	ulint		line)		/*!< in: line where requested */
 {
@@ -662,7 +624,7 @@ UNIV_INTERN
 void
 mutex_get_debug_info(
 /*=================*/
-	mutex_t*	mutex,		/*!< in: mutex */
+	ib_mutex_t*	mutex,		/*!< in: mutex */
 	const char**	file_name,	/*!< out: file where requested */
 	ulint*		line,		/*!< out: line where requested */
 	os_thread_id_t* thread_id)	/*!< out: id of the thread which owns
@@ -683,7 +645,7 @@ mutex_list_print_info(
 /*==================*/
 	FILE*	file)		/*!< in: file where to print */
 {
-	mutex_t*	mutex;
+	ib_mutex_t*	mutex;
 	const char*	file_name;
 	ulint		line;
 	os_thread_id_t	thread_id;
@@ -726,7 +688,7 @@ ulint
 mutex_n_reserved(void)
 /*==================*/
 {
-	mutex_t*	mutex;
+	ib_mutex_t*	mutex;
 	ulint		count	= 0;
 
 	mutex_enter(&mutex_list_mutex);
@@ -825,9 +787,9 @@ sync_print_warning(
 	const sync_level_t*	slot)	/*!< in: slot for which to
 					print warning */
 {
-	mutex_t*	mutex;
+	ib_mutex_t*	mutex;
 
-	mutex = static_cast<mutex_t*>(slot->latch);
+	mutex = static_cast<ib_mutex_t*>(slot->latch);
 
 	if (mutex->magic_n == MUTEX_MAGIC_N) {
 		fprintf(stderr,
@@ -1200,6 +1162,8 @@ sync_thread_add_level(
 	case SYNC_TRX_I_S_RWLOCK:
 	case SYNC_TRX_I_S_LAST_READ:
 	case SYNC_IBUF_MUTEX:
+	case SYNC_INDEX_ONLINE_LOG:
+	case SYNC_STATS_AUTO_RECALC:
 		if (!sync_thread_levels_g(array, level, TRUE)) {
 			fprintf(stderr,
 				"InnoDB: sync_thread_levels_g(array, %lu)"
@@ -1448,7 +1412,7 @@ sync_thread_reset_level(
 		return(TRUE);
 	}
 
-	if (((mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
+	if (((ib_mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
 		rw_lock_t*	rw_lock;
 
 		rw_lock = (rw_lock_t*) latch;
@@ -1511,7 +1475,7 @@ sync_init(void)
 	mutex_create(rw_lock_debug_mutex_key, &rw_lock_debug_mutex,
 		     SYNC_NO_ORDER_CHECK);
 
-	rw_lock_debug_event = os_event_create(NULL);
+	rw_lock_debug_event = os_event_create();
 	rw_lock_debug_waiters = FALSE;
 #endif /* UNIV_SYNC_DEBUG */
 }
@@ -1552,7 +1516,7 @@ void
 sync_close(void)
 /*===========*/
 {
-	mutex_t*	mutex;
+	ib_mutex_t*	mutex;
 
 	sync_array_close();
 
@@ -1569,7 +1533,7 @@ sync_close(void)
 
 		mutex_free(mutex);
 
-	        mutex = UT_LIST_GET_FIRST(mutex_list);
+		mutex = UT_LIST_GET_FIRST(mutex_list);
 	}
 
 	mutex_free(&mutex_list_mutex);
@@ -1593,13 +1557,6 @@ sync_print_wait_info(
 /*=================*/
 	FILE*	file)		/*!< in: file where to print */
 {
-#ifdef UNIV_SYNC_DEBUG
-	fprintf(file,
-		"Mutex exits "UINT64PF", "
-		"rws exits "UINT64PF",  rwx exits "UINT64PF"\n",
-		mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
-#endif
-
 	fprintf(file,
 		"Mutex spin waits "UINT64PF", rounds "UINT64PF", "
 		"OS waits "UINT64PF"\n"
@@ -1607,25 +1564,27 @@ sync_print_wait_info(
 		"OS waits "UINT64PF"\n"
 		"RW-excl spins "UINT64PF", rounds "UINT64PF", "
 		"OS waits "UINT64PF"\n",
-		mutex_spin_wait_count,
-		mutex_spin_round_count,
-		mutex_os_wait_count,
-		rw_s_spin_wait_count,
-		rw_s_spin_round_count,
-		rw_s_os_wait_count,
-		rw_x_spin_wait_count,
-		rw_x_spin_round_count,
-		rw_x_os_wait_count);
+		(ib_uint64_t) mutex_spin_wait_count,
+		(ib_uint64_t) mutex_spin_round_count,
+		(ib_uint64_t) mutex_os_wait_count,
+		(ib_uint64_t) rw_lock_stats.rw_s_spin_wait_count,
+		(ib_uint64_t) rw_lock_stats.rw_s_spin_round_count,
+		(ib_uint64_t) rw_lock_stats.rw_s_os_wait_count,
+		(ib_uint64_t) rw_lock_stats.rw_x_spin_wait_count,
+		(ib_uint64_t) rw_lock_stats.rw_x_spin_round_count,
+		(ib_uint64_t) rw_lock_stats.rw_x_os_wait_count);
 
 	fprintf(file,
 		"Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
 		"%.2f RW-excl\n",
 		(double) mutex_spin_round_count /
 		(mutex_spin_wait_count ? mutex_spin_wait_count : 1),
-		(double) rw_s_spin_round_count /
-		(rw_s_spin_wait_count ? rw_s_spin_wait_count : 1),
-		(double) rw_x_spin_round_count /
-		(rw_x_spin_wait_count ? rw_x_spin_wait_count : 1));
+		(double) rw_lock_stats.rw_s_spin_round_count /
+		(rw_lock_stats.rw_s_spin_wait_count
+		 ? rw_lock_stats.rw_s_spin_wait_count : 1),
+		(double) rw_lock_stats.rw_x_spin_round_count /
+		(rw_lock_stats.rw_x_spin_wait_count
+		 ? rw_lock_stats.rw_x_spin_wait_count : 1));
 }
 
 /*******************************************************************//**
diff --git a/storage/innobase/trx/trx0i_s.cc b/storage/innobase/trx/trx0i_s.cc
index cbf90afae0d..f6360562ae7 100644
--- a/storage/innobase/trx/trx0i_s.cc
+++ b/storage/innobase/trx/trx0i_s.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -131,25 +131,25 @@ noop because it will be empty. */
 /** Memory for each table in the intermediate buffer is allocated in
 separate chunks. These chunks are considered to be concatenated to
 represent one flat array of rows. */
-typedef struct i_s_mem_chunk_struct {
+struct i_s_mem_chunk_t {
 	ulint	offset;		/*!< offset, in number of rows */
 	ulint	rows_allocd;	/*!< the size of this chunk, in number
 				of rows */
 	void*	base;		/*!< start of the chunk */
-} i_s_mem_chunk_t;
+};
 
 /** This represents one table's cache. */
-typedef struct i_s_table_cache_struct {
+struct i_s_table_cache_t {
 	ulint		rows_used;	/*!< number of used rows */
 	ulint		rows_allocd;	/*!< number of allocated rows */
 	ulint		row_size;	/*!< size of a single row */
 	i_s_mem_chunk_t	chunks[MEM_CHUNKS_IN_TABLE_CACHE]; /*!< array of
 					memory chunks that stores the
 					rows */
-} i_s_table_cache_t;
+};
 
 /** This structure describes the intermediate buffer */
-struct trx_i_s_cache_struct {
+struct trx_i_s_cache_t {
 	rw_lock_t	rw_lock;	/*!< read-write lock protecting
 					the rest of this structure */
 	ullint		last_read;	/*!< last time the cache was read;
@@ -501,8 +501,7 @@ fill_trx_row(
 		goto thd_done;
 	}
 
-	row->trx_mysql_thread_id = thd_get_thread_id(
-		static_cast<const THD*>(trx->mysql_thd));
+	row->trx_mysql_thread_id = thd_get_thread_id(trx->mysql_thd);
 
 	stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len);
 
@@ -1290,7 +1289,10 @@ fetch_data_into_cache_low(
 
 	for (trx = UT_LIST_GET_FIRST(*trx_list);
 	     trx != NULL;
-	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+	     trx =
+	     (trx_list == &trx_sys->mysql_trx_list
+	      ? UT_LIST_GET_NEXT(mysql_trx_list, trx)
+	      : UT_LIST_GET_NEXT(trx_list, trx))) {
 
 		i_s_trx_row_t*		trx_row;
 		i_s_locks_row_t*	requested_lock_row;
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index 62c140879aa..f6d8dfc6b40 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -69,19 +69,9 @@ UNIV_INTERN mysql_pfs_key_t	trx_purge_latch_key;
 UNIV_INTERN mysql_pfs_key_t	purge_sys_bh_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
 
-/********************************************************************//**
-Fetches the next undo log record from the history list to purge. It must be
-released with the corresponding release function.
-@return copy of an undo log record or pointer to trx_purge_dummy_rec,
-if the whole undo log can skipped in purge; NULL if none left */
-static
-trx_undo_rec_t*
-trx_purge_fetch_next_rec(
-/*=====================*/
-	roll_ptr_t*	roll_ptr,	/*!< out: roll pointer to undo record */
-	ulint*		n_pages_handled,/*!< in/out: number of UNDO log pages
-					handled */
-	mem_heap_t*	heap);		/*!< in: memory heap where copied */
+#ifdef UNIV_DEBUG
+UNIV_INTERN my_bool		srv_purge_view_update_only_debug;
+#endif /* UNIV_DEBUG */
 
 /****************************************************************//**
 Builds a purge 'query' graph. The actual purge is performed by executing
@@ -129,7 +119,7 @@ trx_purge_sys_create(
 	purge_sys = static_cast<trx_purge_t*>(mem_zalloc(sizeof(*purge_sys)));
 
 	purge_sys->state = PURGE_STATE_INIT;
-	purge_sys->event = os_event_create("purge");
+	purge_sys->event = os_event_create();
 
 	/* Take ownership of ib_bh, we are responsible for freeing it. */
 	purge_sys->ib_bh = ib_bh;
@@ -539,7 +529,6 @@ trx_purge_truncate_history(
 	}
 }
 
-
 /***********************************************************************//**
 Updates the last not yet purged history log info in rseg when we have purged
 a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */
@@ -703,7 +692,7 @@ trx_purge_get_rseg_with_min_trx_id(
 
 	/* We assume in purge of externally stored fields that space id is
 	in the range of UNDO tablespace space ids */
-	ut_a(purge_sys->rseg->space <= srv_undo_tablespaces);
+	ut_a(purge_sys->rseg->space <= srv_undo_tablespaces_open);
 
 	zip_size = purge_sys->rseg->zip_size;
 
@@ -924,7 +913,7 @@ Fetches the next undo log record from the history list to purge. It must be
 released with the corresponding release function.
 @return copy of an undo log record or pointer to trx_purge_dummy_rec,
 if the whole undo log can skipped in purge; NULL if none left */
-static
+static __attribute__((warn_unused_result, nonnull))
 trx_undo_rec_t*
 trx_purge_fetch_next_rec(
 /*=====================*/
@@ -1215,6 +1204,12 @@ trx_purge(
 
 	rw_lock_x_unlock(&purge_sys->latch);
 
+#ifdef UNIV_DEBUG
+	if (srv_purge_view_update_only_debug) {
+		return(0);
+	}
+#endif
+
 	/* Fetch the UNDO recs that need to be purged. */
 	n_pages_handled = trx_purge_attach_undo_recs(
 		n_purge_threads, purge_sys, &purge_sys->limit, batch_size);
@@ -1260,6 +1255,14 @@ run_synchronously:
 
 	ut_a(purge_sys->n_submitted == purge_sys->n_completed);
 
+#ifdef UNIV_DEBUG
+	if (purge_sys->limit.trx_no == 0) {
+		purge_sys->done = purge_sys->iter;
+	} else {
+		purge_sys->done = purge_sys->limit;
+	}
+#endif /* UNIV_DEBUG */
+
 	if (truncate) {
 		trx_purge_truncate();
 	}
@@ -1305,14 +1308,14 @@ trx_purge_stop(void)
 
 	ut_a(purge_sys->state != PURGE_STATE_INIT);
 	ut_a(purge_sys->state != PURGE_STATE_EXIT);
+	ut_a(purge_sys->state != PURGE_STATE_DISABLED);
 
 	++purge_sys->n_stop;
 
 	state = purge_sys->state;
 
 	if (state == PURGE_STATE_RUN) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Stopping purge.\n");
+		ib_logf(IB_LOG_LEVEL_INFO, "Stopping purge");
 
 		/* We need to wakeup the purge thread in case it is suspended,
 		so that it can acknowledge the state change. */
@@ -1329,6 +1332,28 @@ trx_purge_stop(void)
 		/* Wait for purge coordinator to signal that it
 		is suspended. */
 		os_event_wait_low(purge_sys->event, sig_count);
+	} else { 
+		bool	once = true; 
+
+		rw_lock_x_lock(&purge_sys->latch);
+
+		/* Wait for purge to signal that it has actually stopped. */ 
+		while (purge_sys->running) { 
+
+			if (once) { 
+				ib_logf(IB_LOG_LEVEL_INFO,
+					"Waiting for purge to stop");
+				once = false; 
+			}
+
+			rw_lock_x_unlock(&purge_sys->latch);
+
+			os_thread_sleep(10000); 
+
+			rw_lock_x_lock(&purge_sys->latch);
+		} 
+
+		rw_lock_x_unlock(&purge_sys->latch);
 	}
 
 	MONITOR_INC_VALUE(MONITOR_PURGE_STOP_COUNT, 1);
@@ -1343,8 +1368,16 @@ trx_purge_run(void)
 {
 	rw_lock_x_lock(&purge_sys->latch);
 
-	ut_a(purge_sys->state != PURGE_STATE_INIT);
-	ut_a(purge_sys->state != PURGE_STATE_EXIT);
+	switch(purge_sys->state) {
+	case PURGE_STATE_INIT:
+	case PURGE_STATE_EXIT:
+	case PURGE_STATE_DISABLED:
+		ut_error;
+
+	case PURGE_STATE_RUN:
+	case PURGE_STATE_STOP:
+		break;
+	}
 
 	if (purge_sys->n_stop > 0) {
 
@@ -1354,8 +1387,7 @@ trx_purge_run(void)
 
 		if (purge_sys->n_stop == 0) {
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: Resuming purge.\n");
+			ib_logf(IB_LOG_LEVEL_INFO, "Resuming purge");
 
 			purge_sys->state = PURGE_STATE_RUN;
 		}
diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc
index b87eac9362e..a698b37c2a6 100644
--- a/storage/innobase/trx/trx0rec.cc
+++ b/storage/innobase/trx/trx0rec.cc
@@ -287,7 +287,7 @@ trx_undo_rec_get_pars(
 					TRX_UNDO_INSERT_REC, ... */
 	ulint*		cmpl_info,	/*!< out: compiler info, relevant only
 					for update type records */
-	ibool*		updated_extern,	/*!< out: TRUE if we updated an
+	bool*		updated_extern,	/*!< out: true if we updated an
 					externally stored fild */
 	undo_no_t*	undo_no,	/*!< out: undo log record number */
 	table_id_t*	table_id)	/*!< out: table id */
@@ -300,12 +300,8 @@ trx_undo_rec_get_pars(
 	type_cmpl = mach_read_from_1(ptr);
 	ptr++;
 
-	if (type_cmpl & TRX_UNDO_UPD_EXTERN) {
-		*updated_extern = TRUE;
-		type_cmpl -= TRX_UNDO_UPD_EXTERN;
-	} else {
-		*updated_extern = FALSE;
-	}
+	*updated_extern = !!(type_cmpl & TRX_UNDO_UPD_EXTERN);
+	type_cmpl &= ~TRX_UNDO_UPD_EXTERN;
 
 	*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
 	*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
@@ -588,6 +584,7 @@ trx_undo_page_report_modify(
 	/* Store first some general parameters to the undo log */
 
 	if (!update) {
+		ut_ad(!rec_get_deleted_flag(rec, dict_table_is_comp(table)));
 		type_cmpl = TRX_UNDO_DEL_MARK_REC;
 	} else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
 		type_cmpl = TRX_UNDO_UPD_DEL_REC;
@@ -1040,8 +1037,9 @@ trx_undo_update_rec_get_update(
 }
 
 /*******************************************************************//**
-Builds a partial row from an update undo log record. It contains the
-columns which occur as ordering in any index of the table.
+Builds a partial row from an update undo log record, for purge.
+It contains the columns which occur as ordering in any index of the table.
+Any missing columns are indicated by col->mtype == DATA_MISSING.
 @return	pointer to remaining part of undo record */
 UNIV_INTERN
 byte*
@@ -1075,7 +1073,12 @@ trx_undo_rec_get_partial_row(
 
 	*row = dtuple_create(heap, row_len);
 
-	dict_table_copy_types(*row, index->table);
+	/* Mark all columns in the row uninitialized, so that
+	we can distinguish missing fields from fields that are SQL NULL. */
+	for (ulint i = 0; i < row_len; i++) {
+		dfield_get_type(dtuple_get_nth_field(*row, i))
+			->mtype = DATA_MISSING;
+	}
 
 	end_ptr = ptr + mach_read_from_2(ptr);
 	ptr += 2;
@@ -1097,7 +1100,9 @@ trx_undo_rec_get_partial_row(
 		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
 
 		dfield = dtuple_get_nth_field(*row, col_no);
-
+		dict_col_copy_type(
+			dict_table_get_nth_col(index->table, col_no),
+			dfield_get_type(dfield));
 		dfield_set_data(dfield, field, len);
 
 		if (len != UNIV_SQL_NULL
@@ -1177,7 +1182,7 @@ transaction and in consistent reads that must look to the history of this
 transaction.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 trx_undo_report_row_operation(
 /*==========================*/
 	ulint		flags,		/*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
@@ -1196,6 +1201,7 @@ trx_undo_report_row_operation(
 	const rec_t*	rec,		/*!< in: in case of an update or delete
 					marking, the record in the clustered
 					index, otherwise NULL */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec) */
 	roll_ptr_t*	roll_ptr)	/*!< out: rollback pointer to the
 					inserted undo log record,
 					0 if BTR_NO_UNDO_LOG
@@ -1207,16 +1213,14 @@ trx_undo_report_row_operation(
 	buf_block_t*	undo_block;
 	trx_rseg_t*	rseg;
 	mtr_t		mtr;
-	ulint		err		= DB_SUCCESS;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
+	dberr_t		err		= DB_SUCCESS;
 #ifdef UNIV_DEBUG
 	int		loop_count	= 0;
 #endif /* UNIV_DEBUG */
-	rec_offs_init(offsets_);
 
+	ut_ad(!srv_read_only_mode);
 	ut_a(dict_index_is_clust(index));
+	ut_ad(!rec || rec_offs_validate(rec, index, offsets));
 
 	if (flags & BTR_NO_UNDO_LOG_FLAG) {
 
@@ -1230,6 +1234,17 @@ trx_undo_report_row_operation(
 	      || (clust_entry && !update && !rec));
 
 	trx = thr_get_trx(thr);
+
+	/* This table is visible only to the session that created it. */
+	if (trx->read_only) {
+		ut_ad(!srv_read_only_mode);
+		/* MySQL should block writes to non-temporary tables. */
+		ut_a(DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_TEMPORARY));
+		if (trx->rseg == 0) {
+			trx_assign_rseg(trx);
+		}
+	}
+
 	rseg = trx->rseg;
 
 	mtr_start(&mtr);
@@ -1272,8 +1287,6 @@ trx_undo_report_row_operation(
 		}
 
 		ut_ad(err == DB_SUCCESS);
-		offsets = rec_get_offsets(rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
 	}
 
 	page_no = undo->last_page_no;
@@ -1352,8 +1365,7 @@ trx_undo_report_row_operation(
 			*roll_ptr = trx_undo_build_roll_ptr(
 				op_type == TRX_UNDO_INSERT_OP,
 				rseg->id, page_no, offset);
-			err = DB_SUCCESS;
-			goto func_exit;
+			return(DB_SUCCESS);
 		}
 
 		ut_ad(page_no == undo->last_page_no);
@@ -1380,10 +1392,6 @@ trx_undo_report_row_operation(
 err_exit:
 	mutex_exit(&trx->undo_mutex);
 	mtr_commit(&mtr);
-func_exit:
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
 	return(err);
 }
 
@@ -1428,39 +1436,34 @@ trx_undo_get_undo_rec_low(
 /******************************************************************//**
 Copies an undo record to heap.
 
-NOTE: the caller must have latches on the clustered index page and
-purge_view.
+NOTE: the caller must have latches on the clustered index page.
 
-@return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been
-truncated and we cannot fetch the old version */
-static
-ulint
+@retval true if the undo log has been
+truncated and we cannot fetch the old version
+@retval false if the undo log record is available  */
+static __attribute__((nonnull, warn_unused_result))
+bool
 trx_undo_get_undo_rec(
 /*==================*/
 	roll_ptr_t	roll_ptr,	/*!< in: roll pointer to record */
 	trx_id_t	trx_id,		/*!< in: id of the trx that generated
 					the roll pointer: it points to an
 					undo log of this transaction */
-	trx_undo_rec_t** undo_rec,	/*!< out, own: copy of the record */
+	trx_undo_rec_t**undo_rec,	/*!< out, own: copy of the record */
 	mem_heap_t*	heap)		/*!< in: memory heap where copied */
 {
-	ibool		missing_history;
+	bool		missing_history;
 
 	rw_lock_s_lock(&purge_sys->latch);
 	missing_history = read_view_sees_trx_id(purge_sys->view, trx_id);
-	rw_lock_s_unlock(&purge_sys->latch);
-
-	if (UNIV_UNLIKELY(missing_history)) {
 
-		/* It may be that the necessary undo log has already been
-		deleted */
-
-		return(DB_MISSING_HISTORY);
+	if (!missing_history) {
+		*undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
 	}
 
-	*undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
+	rw_lock_s_unlock(&purge_sys->latch);
 
-	return(DB_SUCCESS);
+	return(missing_history);
 }
 
 #ifdef UNIV_DEBUG
@@ -1471,13 +1474,13 @@ trx_undo_get_undo_rec(
 
 /*******************************************************************//**
 Build a previous version of a clustered index record. The caller must
-hold a latch on the index page of the clustered index record, to
-guarantee that the stack of versions is locked all the way down to the
-purge_sys->view.
-@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
-earlier than purge_view, which means that it may have been removed */
+hold a latch on the index page of the clustered index record.
+@retval true if previous version was built, or if it was an insert
+or the table has been rebuilt
+@retval false if the previous version is earlier than purge_view,
+which means that it may have been removed */
 UNIV_INTERN
-ulint
+bool
 trx_undo_prev_version_build(
 /*========================*/
 	const rec_t*	index_rec ATTRIB_USED_ONLY_IN_DEBUG,
@@ -1488,7 +1491,7 @@ trx_undo_prev_version_build(
 				index_rec page and purge_view */
 	const rec_t*	rec,	/*!< in: version of a clustered index record */
 	dict_index_t*	index,	/*!< in: clustered index */
-	ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
 	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
 				needed is allocated */
 	rec_t**		old_vers)/*!< out, own: previous version, or NULL if
@@ -1509,9 +1512,8 @@ trx_undo_prev_version_build(
 	byte*		ptr;
 	ulint		info_bits;
 	ulint		cmpl_info;
-	ibool		dummy_extern;
+	bool		dummy_extern;
 	byte*		buf;
-	ulint		err;
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!rw_lock_own(&purge_sys->latch, RW_LOCK_SHARED));
 #endif /* UNIV_SYNC_DEBUG */
@@ -1526,28 +1528,28 @@ trx_undo_prev_version_build(
 	*old_vers = NULL;
 
 	if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
-
 		/* The record rec is the first inserted version */
-
-		return(DB_SUCCESS);
+		return(true);
 	}
 
 	rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
 
-	err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
-
-	if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
-		/* The undo record may already have been purged.
-		This should never happen for user transactions, but
-		it can happen in purge. */
-		ut_ad(err == DB_MISSING_HISTORY);
-
-		return(err);
+	if (trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap)) {
+		/* The undo record may already have been purged,
+		during purge or semi-consistent read. */
+		return(false);
 	}
 
 	ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
 				    &dummy_extern, &undo_no, &table_id);
 
+	if (table_id != index->table->id) {
+		/* The table should have been rebuilt, but purge has
+		not yet removed the undo log records for the
+		now-dropped old table (table_id). */
+		return(true);
+	}
+
 	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
 					       &info_bits);
 
@@ -1578,7 +1580,6 @@ trx_undo_prev_version_build(
 	ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
 					     roll_ptr, info_bits,
 					     NULL, heap, &update);
-	ut_a(table_id == index->table->id);
 	ut_a(ptr);
 
 # if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
@@ -1588,14 +1589,46 @@ trx_undo_prev_version_build(
 	if (row_upd_changes_field_size_or_external(index, offsets, update)) {
 		ulint	n_ext;
 
+		/* We should confirm the existence of disowned external data,
+		if the previous version record is delete marked. If the trx_id
+		of the previous record is seen by purge view, we should treat
+		it as missing history, because the disowned external data
+		might be purged already.
+
+		The inherited external data (BLOBs) can be freed (purged)
+		after trx_id was committed, provided that no view was started
+		before trx_id. If the purge view can see the committed
+		delete-marked record by trx_id, no transactions need to access
+		the BLOB. */
+
+		/* the row_upd_changes_disowned_external(update) call could be
+		omitted, but the synchronization on purge_sys->latch is likely
+		more expensive. */
+
+		if ((update->info_bits & REC_INFO_DELETED_FLAG)
+		    && row_upd_changes_disowned_external(update)) {
+			bool	missing_extern;
+
+			rw_lock_s_lock(&purge_sys->latch);
+			missing_extern = read_view_sees_trx_id(purge_sys->view,
+							       trx_id);
+			rw_lock_s_unlock(&purge_sys->latch);
+
+			if (missing_extern) {
+				/* treat as a fresh insert, not to
+				cause assertion error at the caller. */
+				return(true);
+			}
+		}
+
 		/* We have to set the appropriate extern storage bits in the
 		old version of the record: the extern bits in rec for those
 		fields that update does NOT update, as well as the bits for
 		those fields that update updates to become externally stored
 		fields. Store the info: */
 
-		entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index,
-					       offsets, &n_ext, heap);
+		entry = row_rec_to_index_entry(
+			rec, index, offsets, &n_ext, heap);
 		n_ext += btr_push_update_extern_fields(entry, update, heap);
 		/* The page containing the clustered index record
 		corresponding to entry is latched in mtr.  Thus the
@@ -1618,6 +1651,6 @@ trx_undo_prev_version_build(
 		row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
 	}
 
-	return(DB_SUCCESS);
+	return(true);
 }
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc
index 042b5b87da7..d07e40c506d 100644
--- a/storage/innobase/trx/trx0roll.cc
+++ b/storage/innobase/trx/trx0roll.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -133,7 +133,7 @@ trx_rollback_to_savepoint_low(
 Rollback a transaction to a given savepoint or do a complete rollback.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 trx_rollback_to_savepoint(
 /*======================*/
 	trx_t*		trx,	/*!< in: transaction handle */
@@ -157,14 +157,14 @@ trx_rollback_to_savepoint(
 
 	srv_active_wake_master_thread();
 
-	return((int) trx->error_state);
+	return(trx->error_state);
 }
 
 /*******************************************************************//**
 Rollback a transaction used in MySQL.
 @return	error code or DB_SUCCESS */
 static
-enum db_err
+dberr_t
 trx_rollback_for_mysql_low(
 /*=======================*/
 	trx_t*	trx)	/*!< in/out: transaction */
@@ -193,7 +193,7 @@ trx_rollback_for_mysql_low(
 Rollback a transaction used in MySQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 trx_rollback_for_mysql(
 /*===================*/
 	trx_t*	trx)	/*!< in/out: transaction */
@@ -214,7 +214,7 @@ trx_rollback_for_mysql(
 		return(trx_rollback_for_mysql_low(trx));
 
 	case TRX_STATE_PREPARED:
-		assert_trx_in_rw_list(trx);
+		ut_ad(!trx_is_autocommit_non_locking(trx));
 		return(trx_rollback_for_mysql_low(trx));
 
 	case TRX_STATE_COMMITTED_IN_MEMORY:
@@ -223,19 +223,19 @@ trx_rollback_for_mysql(
 	}
 
 	ut_error;
-	return((int) DB_CORRUPTION);
+	return(DB_CORRUPTION);
 }
 
 /*******************************************************************//**
 Rollback the latest SQL statement for MySQL.
 @return	error code or DB_SUCCESS */
 UNIV_INTERN
-int
+dberr_t
 trx_rollback_last_sql_stat_for_mysql(
 /*=================================*/
 	trx_t*	trx)	/*!< in/out: transaction */
 {
-	int	err;
+	dberr_t	err;
 
 	/* We are reading trx->state without holding trx_sys->mutex
 	here, because the statement rollback should be invoked for a
@@ -344,8 +344,8 @@ the row, these locks are naturally released in the rollback. Savepoints which
 were set after this savepoint are deleted.
 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
 otherwise DB_SUCCESS */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 trx_rollback_to_savepoint_for_mysql_low(
 /*====================================*/
 	trx_t*			trx,	/*!< in/out: transaction */
@@ -358,7 +358,7 @@ trx_rollback_to_savepoint_for_mysql_low(
 					binlog entries of the queries
 					executed after the savepoint */
 {
-	ulint	err;
+	dberr_t	err;
 
 	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
 	ut_ad(trx->in_mysql_trx_list);
@@ -395,7 +395,7 @@ were set after this savepoint are deleted.
 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
 otherwise DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 trx_rollback_to_savepoint_for_mysql(
 /*================================*/
 	trx_t*		trx,			/*!< in: transaction handle */
@@ -449,7 +449,7 @@ savepoint and replaces it with a new. Savepoints are deleted in a transaction
 commit or rollback.
 @return	always DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 trx_savepoint_for_mysql(
 /*====================*/
 	trx_t*		trx,			/*!< in: transaction handle */
@@ -495,7 +495,7 @@ savepoint are left as is.
 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
 otherwise DB_SUCCESS */
 UNIV_INTERN
-ulint
+dberr_t
 trx_release_savepoint_for_mysql(
 /*============================*/
 	trx_t*		trx,			/*!< in: transaction handle */
@@ -623,18 +623,16 @@ trx_rollback_active(
 	if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
 	    && trx->table_id != 0) {
 
-		/* If the transaction was for a dictionary operation, we
-		drop the relevant table, if it still exists */
+		/* If the transaction was for a dictionary operation,
+		we drop the relevant table only if it is not flagged
+		as DISCARDED. If it still exists. */
 
-		fprintf(stderr,
-			"InnoDB: Dropping table with id "UINT64PF
-			" in recovery if it exists\n",
-			(ib_uint64_t) trx->table_id);
+		table = dict_table_open_on_id(
+			trx->table_id, dictionary_locked, FALSE);
 
-		table = dict_table_open_on_id(trx->table_id, dictionary_locked);
+		if (table && !dict_table_is_discarded(table)) {
 
-		if (table) {
-			ulint	err;
+			dberr_t	err;
 
 			/* Ensure that the table doesn't get evicted from the
 			cache, keeps things simple for drop. */
@@ -643,16 +641,17 @@ trx_rollback_active(
 				dict_table_move_from_lru_to_non_lru(table);
 			}
 
-			dict_table_close(table, dictionary_locked);
+			dict_table_close(table, dictionary_locked, FALSE);
 
-			fputs("InnoDB: Table found: dropping table ", stderr);
-			ut_print_name(stderr, trx, TRUE, table->name);
-			fputs(" in recovery\n", stderr);
+			ib_logf(IB_LOG_LEVEL_WARN,
+				"Dropping table '%s', with id " UINT64PF " "
+				"in recovery",
+				table->name, trx->table_id);
 
 			err = row_drop_table_for_mysql(table->name, trx, TRUE);
 			trx_commit_for_mysql(trx);
 
-			ut_a(err == (int) DB_SUCCESS);
+			ut_a(err == DB_SUCCESS);
 		}
 	}
 
@@ -660,9 +659,8 @@ trx_rollback_active(
 		row_mysql_unlock_data_dictionary(trx);
 	}
 
-	fprintf(stderr, "\nInnoDB: Rolling back of trx id " TRX_ID_FMT
-		" completed\n",
-		trx->id);
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Rollback of trx with id " TRX_ID_FMT " completed", trx->id);
 
 	mem_heap_free(heap);
 
@@ -808,6 +806,8 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
 			/*!< in: a dummy parameter required by
 			os_thread_create */
 {
+	ut_ad(!srv_read_only_mode);
+
 #ifdef UNIV_PFS_THREAD
 	pfs_register_thread(trx_rollback_clean_thread_key);
 #endif /* UNIV_PFS_THREAD */
diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc
index 97fd1f36943..7c2bbc90ad9 100644
--- a/storage/innobase/trx/trx0sys.cc
+++ b/storage/innobase/trx/trx0sys.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -43,20 +43,16 @@ Created 3/26/1996 Heikki Tuuri
 #include "log0recv.h"
 #include "os0file.h"
 #include "read0read.h"
-#include "buf0dblwr.h"
 
 /** The file format tag structure with id and name. */
-struct file_format_struct {
+struct file_format_t {
 	ulint		id;		/*!< id of the file format */
 	const char*	name;		/*!< text representation of the
 					file format */
-	mutex_t		mutex;		/*!< covers changes to the above
+	ib_mutex_t		mutex;		/*!< covers changes to the above
 					fields */
 };
 
-/** The file format tag */
-typedef struct file_format_struct	file_format_t;
-
 /** The transaction system */
 UNIV_INTERN trx_sys_t*		trx_sys		= NULL;
 
@@ -122,12 +118,12 @@ UNIV_INTERN mysql_pfs_key_t	file_format_max_mutex_key;
 UNIV_INTERN mysql_pfs_key_t	trx_sys_mutex_key;
 #endif /* UNIV_PFS_RWLOCK */
 
+#ifndef UNIV_HOTBACKUP
 #ifdef UNIV_DEBUG
 /* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
 uint		trx_rseg_n_slots_debug = 0;
 #endif
 
-#ifndef UNIV_HOTBACKUP
 /** This is used to track the maximum file format id known to InnoDB. It's
 updated via SET GLOBAL innodb_file_format_max = 'x' or when we open
 or create a table. */
@@ -180,13 +176,17 @@ trx_sys_flush_max_trx_id(void)
 
 	ut_ad(mutex_own(&trx_sys->mutex));
 
-	mtr_start(&mtr);
+	if (!srv_read_only_mode) {
+		mtr_start(&mtr);
 
-	sys_header = trx_sysf_get(&mtr);
+		sys_header = trx_sysf_get(&mtr);
 
-	mlog_write_ull(sys_header + TRX_SYS_TRX_ID_STORE,
-		       trx_sys->max_trx_id, &mtr);
-	mtr_commit(&mtr);
+		mlog_write_ull(
+			sys_header + TRX_SYS_TRX_ID_STORE,
+			trx_sys->max_trx_id, &mtr);
+
+		mtr_commit(&mtr);
+	}
 }
 
 /*****************************************************************//**
@@ -524,6 +524,8 @@ trx_sys_init_at_db_start(void)
 						   + TRX_SYS_TRX_ID_STORE),
 				     TRX_SYS_TRX_ID_WRITE_MARGIN);
 
+	ut_d(trx_sys->rw_max_trx_id = trx_sys->max_trx_id);
+
 	UT_LIST_INIT(trx_sys->mysql_trx_list);
 
 	trx_dummy_sess = sess_open();
@@ -701,7 +703,7 @@ Check for the max file format tag stored on disk. Note: If max_format_id
 is == UNIV_FORMAT_MAX + 1 then we only print a warning.
 @return	DB_SUCCESS or error code */
 UNIV_INTERN
-ulint
+dberr_t
 trx_sys_file_format_max_check(
 /*==========================*/
 	ulint	max_format_id)	/*!< in: max format id to check */
@@ -718,21 +720,18 @@ trx_sys_file_format_max_check(
 		format_id = UNIV_FORMAT_MIN;
 	}
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		" InnoDB: highest supported file format is %s.\n",
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Highest supported file format is %s.",
 		trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX));
 
 	if (format_id > UNIV_FORMAT_MAX) {
 
 		ut_a(format_id < FILE_FORMAT_NAME_N);
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: %s: the system tablespace is in a file "
-			"format that this version doesn't support - %s\n",
-			((max_format_id <= UNIV_FORMAT_MAX)
-				? "Error" : "Warning"),
+		ib_logf(max_format_id <= UNIV_FORMAT_MAX
+			? IB_LOG_LEVEL_ERROR : IB_LOG_LEVEL_WARN,
+			"The system tablespace is in a file "
+			"format that this version doesn't support - %s.",
 			trx_sys_file_format_id_to_name(format_id));
 
 		if (max_format_id <= UNIV_FORMAT_MAX) {
@@ -883,7 +882,7 @@ trx_sys_create_rsegs(
 	ut_a(n_spaces < TRX_SYS_N_RSEGS);
 	ut_a(n_rsegs <= TRX_SYS_N_RSEGS);
 
-	if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+	if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO || srv_read_only_mode) {
 		return(ULINT_UNDEFINED);
 	}
 
@@ -926,9 +925,8 @@ trx_sys_create_rsegs(
 		}
 	}
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr, " InnoDB: %lu rollback segment(s) are active.\n",
-		n_used);
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"%lu rollback segment(s) are active.", n_used);
 
 	return(n_used);
 }
@@ -1000,7 +998,7 @@ trx_sys_read_file_format_id(
 	);
 	if (!success) {
 		/* The following call prints an error message */
-		os_file_get_last_error(TRUE);
+		os_file_get_last_error(true);
 
 		ut_print_timestamp(stderr);
 
@@ -1019,7 +1017,7 @@ trx_sys_read_file_format_id(
 
 	if (!success) {
 		/* The following call prints an error message */
-		os_file_get_last_error(TRUE);
+		os_file_get_last_error(true);
 
 		ut_print_timestamp(stderr);
 
@@ -1080,7 +1078,7 @@ trx_sys_read_pertable_file_format_id(
 	);
 	if (!success) {
 		/* The following call prints an error message */
-		os_file_get_last_error(TRUE);
+		os_file_get_last_error(true);
 
 		ut_print_timestamp(stderr);
 
@@ -1099,7 +1097,7 @@ trx_sys_read_pertable_file_format_id(
 
 	if (!success) {
 		/* The following call prints an error message */
-		os_file_get_last_error(TRUE);
+		os_file_get_last_error(true);
 
 		ut_print_timestamp(stderr);
 
@@ -1120,11 +1118,11 @@ trx_sys_read_pertable_file_format_id(
 	if (flags == 0) {
 		/* file format is Antelope */
 		*format_id = 0;
-		return (TRUE);
+		return(TRUE);
 	} else if (flags & 1) {
 		/* tablespace flags are ok */
 		*format_id = (flags / 32) % 128;
-		return (TRUE);
+		return(TRUE);
 	} else {
 		/* bad tablespace flags */
 		return(FALSE);
@@ -1143,7 +1141,7 @@ trx_sys_file_format_id_to_name(
 {
 	if (!(id < FILE_FORMAT_NAME_N)) {
 		/* unknown id */
-		return ("Unknown");
+		return("Unknown");
 	}
 
 	return(file_format_name_map[id]);
@@ -1252,7 +1250,7 @@ trx_sys_any_active_transactions(void)
 	mutex_enter(&trx_sys->mutex);
 
 	total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list)
-		+ trx_sys->n_mysql_trx;
+		  + UT_LIST_GET_LEN(trx_sys->mysql_trx_list);
 
 	ut_a(total_trx >= trx_sys->n_prepared_trx);
 	total_trx -= trx_sys->n_prepared_trx;
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 80ebe0df2b3..449b970842a 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -105,6 +105,7 @@ trx_create(void)
 
 	trx->state = TRX_STATE_NOT_STARTED;
 
+	trx->active_commit_ordered = 0;
 	trx->isolation_level = TRX_ISO_REPEATABLE_READ;
 
 	trx->no = IB_ULONGLONG_MAX;
@@ -146,10 +147,6 @@ trx_create(void)
 	trx->lock.table_locks = ib_vector_create(
 		heap_alloc, sizeof(void**), 32);
 
-	/* For non-locking selects we avoid calling ut_time() too frequently.
-	Set the time here for new transactions. */
-	trx->start_time = ut_time();
-
 	return(trx);
 }
 
@@ -184,8 +181,6 @@ trx_allocate_for_mysql(void)
 
 	mutex_enter(&trx_sys->mutex);
 
-	trx_sys->n_mysql_trx++;
-
 	ut_d(trx->in_mysql_trx_list = TRUE);
 	UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
 
@@ -205,6 +200,7 @@ trx_free(
 	ut_a(trx->magic_n == TRX_MAGIC_N);
 	ut_ad(!trx->in_ro_trx_list);
 	ut_ad(!trx->in_rw_trx_list);
+	ut_ad(!trx->in_mysql_trx_list);
 
 	mutex_free(&trx->undo_mutex);
 
@@ -233,8 +229,10 @@ trx_free(
 	/* We allocated a dedicated heap for the vector. */
 	ib_vector_free(trx->autoinc_locks);
 
-	/* We allocated a dedicated heap for the vector. */
-	ib_vector_free(trx->lock.table_locks);
+	if (trx->lock.table_locks != NULL) {
+		/* We allocated a dedicated heap for the vector. */
+		ib_vector_free(trx->lock.table_locks);
+	}
 
 	mutex_free(&trx->mutex);
 
@@ -249,11 +247,12 @@ trx_free_for_background(
 /*====================*/
 	trx_t*	trx)	/*!< in, own: trx object */
 {
-	if (UNIV_UNLIKELY(trx->declared_to_be_inside_innodb)) {
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Error: Freeing a trx which is declared"
-		      " to be processing\n"
-		      "InnoDB: inside InnoDB.\n", stderr);
+	if (trx->declared_to_be_inside_innodb) {
+
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Freeing a trx (%p, " TRX_ID_FMT ") which is declared "
+			"to be processing inside InnoDB", trx, trx->id);
+
 		trx_print(stderr, trx, 600);
 		putc('\n', stderr);
 
@@ -262,16 +261,16 @@ trx_free_for_background(
 		srv_conc_force_exit_innodb(trx);
 	}
 
-	if (UNIV_UNLIKELY(trx->n_mysql_tables_in_use != 0
-			  || trx->mysql_n_tables_locked != 0)) {
+	if (trx->n_mysql_tables_in_use != 0
+	    || trx->mysql_n_tables_locked != 0) {
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Error: MySQL is freeing a thd\n"
-			"InnoDB: though trx->n_mysql_tables_in_use is %lu\n"
-			"InnoDB: and trx->mysql_n_tables_locked is %lu.\n",
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"MySQL is freeing a thd though "
+			"trx->n_mysql_tables_in_use is %lu and "
+			"trx->mysql_n_tables_locked is %lu.",
 			(ulong) trx->n_mysql_tables_in_use,
 			(ulong) trx->mysql_n_tables_locked);
+
 		trx_print(stderr, trx, 600);
 		ut_print_buf(stderr, trx, sizeof(trx_t));
 		putc('\n', stderr);
@@ -326,8 +325,6 @@ trx_free_for_mysql(
 
 	ut_ad(trx_sys_validate_trx_list());
 
-	trx_sys->n_mysql_trx--;
-
 	mutex_exit(&trx_sys->mutex);
 
 	trx_free_for_background(trx);
@@ -348,6 +345,9 @@ trx_list_rw_insert_ordered(
 
 	ut_ad(!trx->read_only);
 
+	ut_d(trx->start_file = __FILE__);
+	ut_d(trx->start_line = __LINE__);
+
 	ut_a(srv_is_being_started);
 	ut_ad(!trx->in_ro_trx_list);
 	ut_ad(!trx->in_rw_trx_list);
@@ -372,6 +372,7 @@ trx_list_rw_insert_ordered(
 
 		if (trx2 == NULL) {
 			UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
+			ut_d(trx_sys->rw_max_trx_id = trx->id);
 		} else {
 			UT_LIST_INSERT_AFTER(
 				trx_list, trx_sys->rw_trx_list, trx2, trx);
@@ -423,6 +424,7 @@ trx_resurrect_insert(
 
 				trx->state = TRX_STATE_PREPARED;
 				trx_sys->n_prepared_trx++;
+				trx_sys->n_prepared_recovered_trx++;
 			} else {
 				fprintf(stderr,
 					"InnoDB: Since innodb_force_recovery"
@@ -483,6 +485,7 @@ trx_resurrect_update_in_prepared_state(
 		if (srv_force_recovery == 0) {
 			if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
 				trx_sys->n_prepared_trx++;
+				trx_sys->n_prepared_recovered_trx++;
 			} else {
 				ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
 			}
@@ -620,10 +623,10 @@ trx_lists_init_at_db_start(void)
 /******************************************************************//**
 Assigns a rollback segment to a transaction in a round-robin fashion.
 @return	assigned rollback segment instance */
-UNIV_INLINE
+static
 trx_rseg_t*
-trx_assign_rseg(
-/*============*/
+trx_assign_rseg_low(
+/*================*/
 	ulong	max_undo_logs,	/*!< in: maximum number of UNDO logs to use */
 	ulint	n_tablespaces)	/*!< in: number of rollback tablespaces */
 {
@@ -631,7 +634,7 @@ trx_assign_rseg(
 	trx_rseg_t*	rseg;
 	static ulint	latest_rseg = 0;
 
-	if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+	if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO || srv_read_only_mode) {
 		ut_a(max_undo_logs == ULONG_UNDEFINED);
 		return(NULL);
 	}
@@ -668,6 +671,24 @@ trx_assign_rseg(
 }
 
 /****************************************************************//**
+Assign a read-only transaction a rollback-segment, if it is attempting
+to write to a TEMPORARY table. */
+UNIV_INTERN
+void
+trx_assign_rseg(
+/*============*/
+	trx_t*		trx)		/*!< A read-only transaction that
+					needs to be assigned a RBS. */
+{
+	ut_a(trx->rseg == 0);
+	ut_a(trx->read_only);
+	ut_a(!srv_read_only_mode);
+	ut_a(!trx_is_autocommit_non_locking(trx));
+
+	trx->rseg = trx_assign_rseg_low(srv_undo_logs, srv_undo_tablespaces);
+}
+
+/****************************************************************//**
 Starts a transaction. */
 static
 void
@@ -675,10 +696,10 @@ trx_start_low(
 /*==========*/
 	trx_t*	trx)		/*!< in: transaction */
 {
-	static ulint	n_start_times;
-
 	ut_ad(trx->rseg == NULL);
 
+	ut_ad(trx->start_file != 0);
+	ut_ad(trx->start_line != 0);
 	ut_ad(!trx->is_recovered);
 	ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
 	ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
@@ -686,7 +707,9 @@ trx_start_low(
 	/* Check whether it is an AUTOCOMMIT SELECT */
 	trx->auto_commit = thd_trx_is_auto_commit(trx->mysql_thd);
 
-	trx->read_only = thd_trx_is_read_only(trx->mysql_thd);
+	trx->read_only =
+		(!trx->ddl && thd_trx_is_read_only(trx->mysql_thd))
+		|| srv_read_only_mode;
 
 	if (!trx->auto_commit) {
 		++trx->will_lock;
@@ -695,16 +718,10 @@ trx_start_low(
 	}
 
 	if (!trx->read_only) {
-		trx->rseg = trx_assign_rseg(
+		trx->rseg = trx_assign_rseg_low(
 			srv_undo_logs, srv_undo_tablespaces);
 	}
 
-	/* Avoid making an unnecessary system call, for non-locking
-	auto-commit selects we reuse the start_time for every 32  starts. */
-	if (!trx_is_autocommit_non_locking(trx) || !(n_start_times++ % 32)) {
-		trx->start_time = ut_time();
-	}
-
 	/* The initial value for trx->no: IB_ULONGLONG_MAX is used in
 	read_view_open_now: */
 
@@ -745,12 +762,15 @@ trx_start_low(
 		ut_ad(!trx_is_autocommit_non_locking(trx));
 		UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
 		ut_d(trx->in_rw_trx_list = TRUE);
+		ut_d(trx_sys->rw_max_trx_id = trx->id);
 	}
 
 	ut_ad(trx_sys_validate_trx_list());
 
 	mutex_exit(&trx_sys->mutex);
 
+	trx->start_time = ut_time();
+
 	MONITOR_INC(MONITOR_TRX_ACTIVE);
 }
 
@@ -971,6 +991,52 @@ trx_finalize_for_fts(
 	trx->fts_trx = NULL;
 }
 
+/**********************************************************************//**
+If required, flushes the log to disk based on the value of
+innodb_flush_log_at_trx_commit. */
+static
+void
+trx_flush_log_if_needed_low(
+/*========================*/
+	lsn_t	lsn)	/*!< in: lsn up to which logs are to be
+			flushed. */
+{
+	switch (srv_flush_log_at_trx_commit) {
+	case 0:
+		/* Do nothing */
+		break;
+	case 1:
+        case 3:
+		/* Write the log and optionally flush it to disk */
+		log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
+				srv_unix_file_flush_method != SRV_UNIX_NOSYNC);
+		break;
+	case 2:
+		/* Write the log but do not flush it to disk */
+		log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
+
+		break;
+	default:
+		ut_error;
+	}
+}
+
+/**********************************************************************//**
+If required, flushes the log to disk based on the value of
+innodb_flush_log_at_trx_commit. */
+static __attribute__((nonnull))
+void
+trx_flush_log_if_needed(
+/*====================*/
+	lsn_t	lsn,	/*!< in: lsn up to which logs are to be
+			flushed. */
+	trx_t*	trx)	/*!< in/out: transaction */
+{
+	trx->op_info = "flushing log";
+	trx_flush_log_if_needed_low(lsn);
+	trx->op_info = "";
+}
+
 /****************************************************************//**
 Commits a transaction. */
 UNIV_INTERN
@@ -987,7 +1053,7 @@ trx_commit(
 	ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
 
 	/* undo_no is non-zero if we're doing the final commit. */
-	if (trx->fts_trx && (trx->undo_no != 0)) {
+	if (trx->fts_trx && trx->undo_no != 0) {
 		ulint   error;
 
 		ut_a(!trx_is_autocommit_non_locking(trx));
@@ -1043,6 +1109,8 @@ trx_commit(
 
 		trx->state = TRX_STATE_NOT_STARTED;
 
+		read_view_remove(trx->global_read_view, false);
+
 		MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
 	} else {
 		lock_trx_release_locks(trx);
@@ -1057,7 +1125,6 @@ trx_commit(
 		assert_trx_in_list(trx);
 
 		if (trx->read_only) {
-			ut_ad(trx->rseg == NULL);
 			UT_LIST_REMOVE(trx_list, trx_sys->ro_trx_list, trx);
 			ut_d(trx->in_ro_trx_list = FALSE);
 			MONITOR_INC(MONITOR_TRX_RO_COMMIT);
@@ -1075,13 +1142,16 @@ trx_commit(
 
 		trx->state = TRX_STATE_NOT_STARTED;
 
+		/* We already own the trx_sys_t::mutex, by doing it here we
+		avoid a potential context switch later. */
+		read_view_remove(trx->global_read_view, true);
+
 		ut_ad(trx_sys_validate_trx_list());
 
 		mutex_exit(&trx_sys->mutex);
 	}
 
 	if (trx->global_read_view != NULL) {
-		read_view_remove(trx->global_read_view);
 
 		mem_heap_empty(trx->global_read_view_heap);
 
@@ -1129,26 +1199,8 @@ trx_commit(
 			trx->must_flush_log_later = TRUE;
 		} else if (srv_flush_log_at_trx_commit == 0) {
 			/* Do nothing */
-		} else if (srv_flush_log_at_trx_commit == 1 ||
-			   srv_flush_log_at_trx_commit == 3) {
-			if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
-				/* Write the log but do not flush it to disk */
-
-				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
-						FALSE);
-			} else {
-				/* Write the log to the log files AND flush
-				them to disk */
-
-				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
-			}
-		} else if (srv_flush_log_at_trx_commit == 2) {
-
-			/* Write the log but do not flush it to disk */
-
-			log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
 		} else {
-			ut_error;
+			trx_flush_log_if_needed(lsn, trx);
 		}
 
 		trx->commit_lsn = lsn;
@@ -1162,6 +1214,14 @@ trx_commit(
 	trx->undo_no = 0;
 	trx->last_sql_stat_start.least_undo_no = 0;
 
+	trx->ddl = false;
+#ifdef UNIV_DEBUG
+	ut_ad(trx->start_file != 0);
+	ut_ad(trx->start_line != 0);
+	trx->start_file = 0;
+	trx->start_line = 0;
+#endif /* UNIV_DEBUG */
+
 	trx->will_lock = 0;
 	trx->read_only = FALSE;
 	trx->auto_commit = FALSE;
@@ -1175,6 +1235,8 @@ trx_commit(
 	ut_ad(!trx->in_ro_trx_list);
 	ut_ad(!trx->in_rw_trx_list);
 
+	trx->dict_operation = TRX_DICT_OP_NONE;
+
 	trx->error_state = DB_SUCCESS;
 
 	/* trx->in_mysql_trx_list would hold between
@@ -1365,7 +1427,7 @@ trx_commit_step(
 Does the transaction commit for MySQL.
 @return	DB_SUCCESS or error number */
 UNIV_INTERN
-ulint
+dberr_t
 trx_commit_for_mysql(
 /*=================*/
 	trx_t*	trx)	/*!< in/out: transaction */
@@ -1389,6 +1451,9 @@ trx_commit_for_mysql(
 		records, generated by the same transaction do not. */
 		trx->support_xa = thd_supports_xa(trx->mysql_thd);
 
+		ut_d(trx->start_file = __FILE__);
+		ut_d(trx->start_line = __LINE__);
+
 		trx_start_low(trx);
 		/* fall through */
 	case TRX_STATE_ACTIVE:
@@ -1407,53 +1472,23 @@ trx_commit_for_mysql(
 
 /**********************************************************************//**
 If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE.
-@return	0 or error number */
+with trx->flush_log_later == TRUE. */
 UNIV_INTERN
-ulint
+void
 trx_commit_complete_for_mysql(
 /*==========================*/
-	trx_t*	trx)	/*!< in: trx handle */
+	trx_t*	trx)	/*!< in/out: transaction */
 {
-	lsn_t	lsn	= trx->commit_lsn;
-
 	ut_a(trx);
 
-	trx->op_info = "flushing log";
-
-	if (!trx->must_flush_log_later) {
-		/* Do nothing */
-	} else if (srv_flush_log_at_trx_commit == 0) {
-		/* Do nothing */
-	} else if (srv_flush_log_at_trx_commit == 1 && trx->active_commit_ordered) {
-		/* Do nothing - we already flushed the prepare and binlog write
-		to disk, so transaction is durable (will be recovered from
-		binlog if necessary) */
-	} else if (srv_flush_log_at_trx_commit == 1 || srv_flush_log_at_trx_commit == 3) {
-		if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
-			/* Write the log but do not flush it to disk */
-
-			log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
-		} else {
-			/* Write the log to the log files AND flush them to
-			disk */
-
-			log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
-		}
-	} else if (srv_flush_log_at_trx_commit == 2) {
-
-		/* Write the log but do not flush it to disk */
-
-		log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
-	} else {
-		ut_error;
+	if (!trx->must_flush_log_later
+	    || (srv_flush_log_at_trx_commit == 1 && trx->active_commit_ordered)) {
+		return;
 	}
 
-	trx->must_flush_log_later = FALSE;
-
-	trx->op_info = "";
+	trx_flush_log_if_needed(trx->commit_lsn, trx);
 
-	return(0);
+	trx->must_flush_log_later = FALSE;
 }
 
 /**********************************************************************//**
@@ -1500,9 +1535,9 @@ trx_print_low(
 	ulint		max_query_len,
 			/*!< in: max query length to print,
 			or 0 to use the default max length */
-	ulint		n_lock_rec,
+	ulint		n_rec_locks,
 			/*!< in: lock_number_of_rows_locked(&trx->lock) */
-	ulint		n_lock_struct,
+	ulint		n_trx_locks,
 			/*!< in: length of trx->lock.trx_locks */
 	ulint		heap_size)
 			/*!< in: mem_heap_get_size(trx->lock.lock_heap) */
@@ -1581,14 +1616,14 @@ state_ok:
 		fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
 	}
 
-	if (n_lock_struct > 0 || heap_size > 400) {
+	if (n_trx_locks > 0 || heap_size > 400) {
 		newline = TRUE;
 
 		fprintf(f, "%lu lock struct(s), heap size %lu,"
 			" %lu row lock(s)",
-			(ulong) n_lock_struct,
+			(ulong) n_trx_locks,
 			(ulong) heap_size,
-			(ulong) n_lock_rec);
+			(ulong) n_rec_locks);
 	}
 
 	if (trx->has_search_latch) {
@@ -1644,19 +1679,19 @@ trx_print(
 	ulint		max_query_len)	/*!< in: max query length to print,
 					or 0 to use the default max length */
 {
-	ulint	n_lock_rec;
-	ulint	n_lock_struct;
+	ulint	n_rec_locks;
+	ulint	n_trx_locks;
 	ulint	heap_size;
 
 	lock_mutex_enter();
-	n_lock_rec = lock_number_of_rows_locked(&trx->lock);
-	n_lock_struct = UT_LIST_GET_LEN(trx->lock.trx_locks);
+	n_rec_locks = lock_number_of_rows_locked(&trx->lock);
+	n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
 	heap_size = mem_heap_get_size(trx->lock.lock_heap);
 	lock_mutex_exit();
 
 	mutex_enter(&trx_sys->mutex);
 	trx_print_low(f, trx, max_query_len,
-		      n_lock_rec, n_lock_struct, heap_size);
+		      n_rec_locks, n_trx_locks, heap_size);
 	mutex_exit(&trx_sys->mutex);
 }
 
@@ -1684,7 +1719,6 @@ trx_assert_started(
 
 	switch (trx->state) {
 	case TRX_STATE_PREPARED:
-		assert_trx_in_rw_list(trx);
 		return(TRUE);
 
 	case TRX_STATE_ACTIVE:
@@ -1826,28 +1860,7 @@ trx_prepare(
 		TODO: find out if MySQL holds some mutex when calling this.
 		That would spoil our group prepare algorithm. */
 
-		if (srv_flush_log_at_trx_commit == 0) {
-			/* Do nothing */
-		} else if (srv_flush_log_at_trx_commit == 1 || srv_flush_log_at_trx_commit == 3) {
-			if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
-				/* Write the log but do not flush it to disk */
-
-				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
-						FALSE);
-			} else {
-				/* Write the log to the log files AND flush
-				them to disk */
-
-				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
-			}
-		} else if (srv_flush_log_at_trx_commit == 2) {
-
-			/* Write the log but do not flush it to disk */
-
-			log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
-		} else {
-			ut_error;
-		}
+		trx_flush_log_if_needed(lsn, trx);
 	}
 }
 
@@ -1859,7 +1872,7 @@ trx_prepare_for_mysql(
 /*==================*/
 	trx_t*	trx)	/*!< in/out: trx handle */
 {
-	trx_start_if_not_started_xa(trx);
+	trx_start_if_not_started_xa_low(trx);
 
 	trx->op_info = "preparing";
 
@@ -1935,12 +1948,12 @@ trx_recover_for_mysql(
 	if (count > 0){
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
-			"  InnoDB: %lu transactions in prepared state"
+			"  InnoDB: %d transactions in prepared state"
 			" after recovery\n",
-			(ulong) count);
+			int (count));
 	}
 
-	return ((int) count);
+	return(int (count));
 }
 
 /*******************************************************************//**
@@ -2023,8 +2036,8 @@ trx_get_trx_by_xid(
 Starts the transaction if it is not yet started. */
 UNIV_INTERN
 void
-trx_start_if_not_started_xa(
-/*========================*/
+trx_start_if_not_started_xa_low(
+/*============================*/
 	trx_t*	trx)	/*!< in: transaction */
 {
 	switch (trx->state) {
@@ -2057,8 +2070,8 @@ trx_start_if_not_started_xa(
 Starts the transaction if it is not yet started. */
 UNIV_INTERN
 void
-trx_start_if_not_started(
-/*=====================*/
+trx_start_if_not_started_low(
+/*=========================*/
 	trx_t*	trx)	/*!< in: transaction */
 {
 	switch (trx->state) {
@@ -2074,3 +2087,45 @@ trx_start_if_not_started(
 
 	ut_error;
 }
+
+/*************************************************************//**
+Starts the transaction for a DDL operation. */
+UNIV_INTERN
+void
+trx_start_for_ddl_low(
+/*==================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	trx_dict_op_t	op)	/*!< in: dictionary operation type */
+{
+	switch (trx->state) {
+	case TRX_STATE_NOT_STARTED:
+		/* Flag this transaction as a dictionary operation, so that
+		the data dictionary will be locked in crash recovery. */
+
+		trx_set_dict_operation(trx, op);
+
+		/* Ensure it is not flagged as an auto-commit-non-locking
+		transation. */
+		trx->will_lock = 1;
+
+		trx->ddl = true;
+
+		trx_start_low(trx);
+		return;
+
+	case TRX_STATE_ACTIVE:
+		/* We have this start if not started idiom, therefore we
+		can't add stronger checks here. */
+		trx->ddl = true;
+
+		ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
+		ut_ad(trx->will_lock > 0);
+		return;
+	case TRX_STATE_PREPARED:
+	case TRX_STATE_COMMITTED_IN_MEMORY:
+		break;
+	}
+
+	ut_error;
+}
+
diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc
index 13ad2bb3755..c4480b11366 100644
--- a/storage/innobase/trx/trx0undo.cc
+++ b/storage/innobase/trx/trx0undo.cc
@@ -413,8 +413,8 @@ trx_undo_page_init(
 Creates a new undo log segment in file.
 @return DB_SUCCESS if page creation OK possible error codes are:
 DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 trx_undo_seg_create(
 /*================*/
 	trx_rseg_t*	rseg __attribute__((unused)),/*!< in: rollback segment */
@@ -435,7 +435,7 @@ trx_undo_seg_create(
 	trx_usegf_t*	seg_hdr;
 	ulint		n_reserved;
 	ibool		success;
-	ulint		err = DB_SUCCESS;
+	dberr_t		err = DB_SUCCESS;
 
 	ut_ad(mtr && id && rseg_hdr);
 	ut_ad(mutex_own(&(rseg->mutex)));
@@ -1468,7 +1468,7 @@ trx_undo_mem_create(
 
 	if (undo == NULL) {
 
-		return NULL;
+		return(NULL);
 	}
 
 	undo->id = id;
@@ -1551,8 +1551,8 @@ Creates a new undo log.
 @return DB_SUCCESS if successful in creating the new undo lob object,
 possible error codes are: DB_TOO_MANY_CONCURRENT_TRXS
 DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 trx_undo_create(
 /*============*/
 	trx_t*		trx,	/*!< in: transaction */
@@ -1571,7 +1571,7 @@ trx_undo_create(
 	ulint		offset;
 	ulint		id;
 	page_t*		undo_page;
-	ulint		err;
+	dberr_t		err;
 
 	ut_ad(mutex_own(&(rseg->mutex)));
 
@@ -1746,7 +1746,7 @@ undo log reused.
 are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY
 DB_OUT_OF_MEMORY */
 UNIV_INTERN
-ulint
+dberr_t
 trx_undo_assign_undo(
 /*=================*/
 	trx_t*		trx,	/*!< in: transaction */
@@ -1755,7 +1755,7 @@ trx_undo_assign_undo(
 	trx_rseg_t*	rseg;
 	trx_undo_t*	undo;
 	mtr_t		mtr;
-	ulint		err = DB_SUCCESS;
+	dberr_t		err = DB_SUCCESS;
 
 	ut_ad(trx);
 
@@ -1771,11 +1771,17 @@ trx_undo_assign_undo(
 
 	mutex_enter(&rseg->mutex);
 
+	DBUG_EXECUTE_IF(
+		"ib_create_table_fail_too_many_trx",
+		err = DB_TOO_MANY_CONCURRENT_TRXS;
+		goto func_exit;
+	);
+
 	undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid,
 				     &mtr);
 	if (undo == NULL) {
 		err = trx_undo_create(trx, rseg, type, trx->id, &trx->xid,
-								&undo, &mtr);
+				      &undo, &mtr);
 		if (err != DB_SUCCESS) {
 
 			goto func_exit;
@@ -1800,7 +1806,7 @@ func_exit:
 	mutex_exit(&(rseg->mutex));
 	mtr_commit(&mtr);
 
-	return err;
+	return(err);
 }
 
 /******************************************************************//**
diff --git a/storage/innobase/ut/ut0crc32.cc b/storage/innobase/ut/ut0crc32.cc
index 538879dd9e2..695035d6ae8 100644
--- a/storage/innobase/ut/ut0crc32.cc
+++ b/storage/innobase/ut/ut0crc32.cc
@@ -79,11 +79,11 @@ mysys/my_perf.c, contributed by Facebook under the following license.
  * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
  */
 
-#include <string.h> /* memcmp() */
-
 #include "univ.i"
 #include "ut0crc32.h"
 
+#include <string.h>
+
 ib_ut_crc32_t	ut_crc32;
 
 /* Precalculated table used to generate the CRC32 if the CPU does not
@@ -92,7 +92,7 @@ static ib_uint32_t	ut_crc32_slice8_table[8][256];
 static ibool		ut_crc32_slice8_table_initialized = FALSE;
 
 /* Flag that tells whether the CPU supports CRC32 or not */
-static ibool		ut_crc32_sse2_enabled = FALSE;
+UNIV_INTERN bool		ut_crc32_sse2_enabled = false;
 
 /********************************************************************//**
 Initializes the table that is used to generate the CRC32 if the CPU does
@@ -315,8 +315,4 @@ ut_crc32_init()
 		ut_crc32_slice8_table_init();
 		ut_crc32 = ut_crc32_slice8;
 	}
-
-	ut_print_timestamp(stderr);
-	fprintf(stderr, " InnoDB: CPU %s crc32 instructions\n",
-		ut_crc32_sse2_enabled ? "supports" : "does not support");
 }
diff --git a/storage/innobase/ut/ut0mem.cc b/storage/innobase/ut/ut0mem.cc
index 42ad180d373..2bb5d9ce332 100644
--- a/storage/innobase/ut/ut0mem.cc
+++ b/storage/innobase/ut/ut0mem.cc
@@ -35,9 +35,6 @@ Created 5/11/1994 Heikki Tuuri
 
 #include <stdlib.h>
 
-/** This struct is placed first in every allocated memory block */
-typedef struct ut_mem_block_struct ut_mem_block_t;
-
 /** The total amount of memory currently allocated from the operating
 system with os_mem_alloc_large() or malloc().  Does not count malloc()
 if srv_use_sys_malloc is set.  Protected by ut_list_mutex. */
@@ -52,14 +49,14 @@ UNIV_INTERN mysql_pfs_key_t	ut_list_mutex_key;
 #endif
 
 /** Dynamically allocated memory block */
-struct ut_mem_block_struct{
+struct ut_mem_block_t{
 	UT_LIST_NODE_T(ut_mem_block_t) mem_block_list;
 			/*!< mem block list node */
 	ulint	size;	/*!< size of allocated memory */
 	ulint	magic_n;/*!< magic number (UT_MEM_MAGIC_N) */
 };
 
-/** The value of ut_mem_block_struct::magic_n.  Used in detecting
+/** The value of ut_mem_block_t::magic_n.  Used in detecting
 memory corruption. */
 #define UT_MEM_MAGIC_N	1601650166
 
diff --git a/storage/innobase/ut/ut0rbt.cc b/storage/innobase/ut/ut0rbt.cc
index b21543a679d..e93844af600 100644
--- a/storage/innobase/ut/ut0rbt.cc
+++ b/storage/innobase/ut/ut0rbt.cc
@@ -773,7 +773,7 @@ rbt_create_arg_cmp(
 	size_t		sizeof_value,		/*!< in: sizeof data item */
 	ib_rbt_arg_compare
 			compare,		/*!< in: fn to compare items */
-	const void*	cmp_arg)		/*!< in: compare fn arg */
+	void*		cmp_arg)		/*!< in: compare fn arg */
 {
 	ib_rbt_t*       tree;
 
diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc
index 2268cfd2493..3c94d96c3ac 100644
--- a/storage/innobase/ut/ut0ut.cc
+++ b/storage/innobase/ut/ut0ut.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,6 +28,7 @@ Created 5/11/1994 Heikki Tuuri
 #ifndef UNIV_INNOCHECKSUM
 
 #include "ut0sort.h"
+#include "os0thread.h" /* thread-ID */
 
 #ifdef UNIV_NONINL
 #include "ut0ut.ic"
@@ -218,18 +219,25 @@ ut_print_timestamp(
 /*===============*/
 	FILE*  file) /*!< in: file where to print */
 {
+	ulint thread_id = 0;
+
+#ifndef UNIV_INNOCHECKSUM
+	thread_id = os_thread_pf(os_thread_get_curr_id());
+#endif
+
 #ifdef __WIN__
 	SYSTEMTIME cal_tm;
 
 	GetLocalTime(&cal_tm);
 
-	fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
-		(int) cal_tm.wYear % 100,
+	fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %lx",
+		(int) cal_tm.wYear,
 		(int) cal_tm.wMonth,
 		(int) cal_tm.wDay,
 		(int) cal_tm.wHour,
 		(int) cal_tm.wMinute,
-		(int) cal_tm.wSecond);
+		(int) cal_tm.wSecond,
+		thread_id);
 #else
 	struct tm* cal_tm_ptr;
 	time_t	   tm;
@@ -243,13 +251,14 @@ ut_print_timestamp(
 	time(&tm);
 	cal_tm_ptr = localtime(&tm);
 #endif
-	fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
-		cal_tm_ptr->tm_year % 100,
+	fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %lx",
+		cal_tm_ptr->tm_year + 1900,
 		cal_tm_ptr->tm_mon + 1,
 		cal_tm_ptr->tm_mday,
 		cal_tm_ptr->tm_hour,
 		cal_tm_ptr->tm_min,
-		cal_tm_ptr->tm_sec);
+		cal_tm_ptr->tm_sec,
+		thread_id);
 #endif
 }
 
@@ -515,7 +524,7 @@ void
 ut_print_name(
 /*==========*/
 	FILE*		f,	/*!< in: output stream */
-	trx_t*		trx,	/*!< in: transaction */
+	const trx_t*	trx,	/*!< in: transaction */
 	ibool		table_id,/*!< in: TRUE=print a table name,
 				FALSE=print other identifier */
 	const char*	name)	/*!< in: name to print */
@@ -533,7 +542,7 @@ void
 ut_print_namel(
 /*===========*/
 	FILE*		f,	/*!< in: output stream */
-	trx_t*		trx,	/*!< in: transaction (NULL=no quotes) */
+	const trx_t*	trx,	/*!< in: transaction (NULL=no quotes) */
 	ibool		table_id,/*!< in: TRUE=print a table name,
 				FALSE=print other identifier */
 	const char*	name,	/*!< in: name to print */
@@ -553,6 +562,50 @@ ut_print_namel(
 }
 
 /**********************************************************************//**
+Formats a table or index name, quoted as an SQL identifier. If the name
+contains a slash '/', the result will contain two identifiers separated by
+a period (.), as in SQL database_name.identifier.
+@return pointer to 'formatted' */
+UNIV_INTERN
+char*
+ut_format_name(
+/*===========*/
+	const char*	name,		/*!< in: table or index name, must be
+					'\0'-terminated */
+	ibool		is_table,	/*!< in: if TRUE then 'name' is a table
+					name */
+	char*		formatted,	/*!< out: formatted result, will be
+					'\0'-terminated */
+	ulint		formatted_size)	/*!< out: no more than this number of
+					bytes will be written to 'formatted' */
+{
+	switch (formatted_size) {
+	case 1:
+		formatted[0] = '\0';
+		/* FALL-THROUGH */
+	case 0:
+		return(formatted);
+	}
+
+	char*	end;
+
+	end = innobase_convert_name(formatted, formatted_size,
+				    name, strlen(name), NULL, is_table);
+
+	/* If the space in 'formatted' was completely used, then sacrifice
+	the last character in order to write '\0' at the end. */
+	if ((ulint) (end - formatted) == formatted_size) {
+		end--;
+	}
+
+	ut_a((ulint) (end - formatted) < formatted_size);
+
+	*end = '\0';
+
+	return(formatted);
+}
+
+/**********************************************************************//**
 Catenate files. */
 UNIV_INTERN
 void
@@ -648,7 +701,7 @@ UNIV_INTERN
 const char*
 ut_strerr(
 /*======*/
-	enum db_err	num)	/*!< in: error number */
+	dberr_t	num)	/*!< in: error number */
 {
 	switch (num) {
 	case DB_SUCCESS:
@@ -703,10 +756,12 @@ ut_strerr(
 		return("Cannot drop constraint");
 	case DB_NO_SAVEPOINT:
 		return("No such savepoint");
-	case DB_TABLESPACE_ALREADY_EXISTS:
+	case DB_TABLESPACE_EXISTS:
 		return("Tablespace already exists");
 	case DB_TABLESPACE_DELETED:
-		return("No such tablespace");
+		return("Tablespace deleted or being deleted");
+	case DB_TABLESPACE_NOT_FOUND:
+		return("Tablespace not found");
 	case DB_LOCK_TABLE_FULL:
 		return("Lock structs have exhausted the buffer pool");
 	case DB_FOREIGN_DUPLICATE_KEY:
@@ -717,8 +772,8 @@ ut_strerr(
 		return("Too many concurrent transactions");
 	case DB_UNSUPPORTED:
 		return("Unsupported");
-	case DB_PRIMARY_KEY_IS_NULL:
-		return("Primary key is NULL");
+	case DB_INVALID_NULL:
+		return("NULL value encountered in NOT NULL column");
 	case DB_STATS_DO_NOT_EXIST:
 		return("Persistent statistics do not exist");
 	case DB_FAIL:
@@ -745,6 +800,21 @@ ut_strerr(
 		return("Undo record too big");
 	case DB_END_OF_INDEX:
 		return("End of index");
+	case DB_IO_ERROR:
+		return("I/O error");
+	case DB_TABLE_IN_FK_CHECK:
+		return("Table is being used in foreign key check");
+	case DB_DATA_MISMATCH:
+		return("data mismatch");
+	case DB_SCHEMA_NOT_LOCKED:
+		return("schema not locked");
+	case DB_NOT_FOUND:
+		return("not found");
+	case DB_ONLINE_LOG_TOO_BIG:
+		return("Log size exceeded during online index creation");
+	case DB_DICT_CHANGED:
+		return("Table dictionary has changed");
+
 	/* do not add default: in order to produce a warning if new code
 	is added to the enum but not added here */
 	}
diff --git a/storage/innobase/ut/ut0vec.cc b/storage/innobase/ut/ut0vec.cc
index 8ac5d9dc5d3..5842d9f1c0e 100644
--- a/storage/innobase/ut/ut0vec.cc
+++ b/storage/innobase/ut/ut0vec.cc
@@ -44,12 +44,14 @@ ib_vector_create(
 
 	ut_a(size > 0);
 
-	vec = static_cast<ib_vector_t*>(allocator->mem_malloc(allocator, sizeof(*vec)));
+	vec = static_cast<ib_vector_t*>(
+		allocator->mem_malloc(allocator, sizeof(*vec)));
 
 	vec->used = 0;
 	vec->total = size;
 	vec->allocator = allocator;
 	vec->sizeof_value = sizeof_value;
+
 	vec->data = static_cast<void*>(
 		allocator->mem_malloc(allocator, vec->sizeof_value * size));
 
diff --git a/storage/innobase/ut/ut0wqueue.cc b/storage/innobase/ut/ut0wqueue.cc
index 6d410524fe7..d1ba36b3b00 100644
--- a/storage/innobase/ut/ut0wqueue.cc
+++ b/storage/innobase/ut/ut0wqueue.cc
@@ -40,7 +40,7 @@ ib_wqueue_create(void)
 	mutex_create(PFS_NOT_INSTRUMENTED, &wq->mutex, SYNC_WORK_QUEUE);
 
 	wq->items = ib_list_create();
-	wq->event = os_event_create(NULL);
+	wq->event = os_event_create();
 
 	return(wq);
 }
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
index 95f37ddb12f..092e1a8a79e 100644
--- a/storage/maria/ha_maria.cc
+++ b/storage/maria/ha_maria.cc
@@ -512,7 +512,7 @@ static int table2maria(TABLE *table_arg, data_file_type row_type,
     keydef[i].block_length= pos->block_size;
     keydef[i].seg= keyseg;
     keydef[i].keysegs= pos->key_parts;
-    for (j= 0; j < pos->key_parts; j++)
+    for (j= 0; j < pos->user_defined_key_parts; j++)
     {
       Field *field= pos->key_part[j].field;
       type= field->key_type();
@@ -574,7 +574,7 @@ static int table2maria(TABLE *table_arg, data_file_type row_type,
                                           (uchar*) table_arg->record[0]);
       }
     }
-    keyseg+= pos->key_parts;
+    keyseg+= pos->user_defined_key_parts;
   }
   if (table_arg->found_next_number_field)
     keydef[share->next_number_index].flag|= HA_AUTO_KEY;
@@ -1042,7 +1042,7 @@ ulong ha_maria::index_flags(uint inx, uint part, bool all_parts) const
 double ha_maria::scan_time()
 {
   if (file->s->data_file_type == BLOCK_RECORD)
-    return ulonglong2double(stats.data_file_length - file->s->block_size) / max(file->s->block_size / 2, IO_SIZE) + 2;
+    return ulonglong2double(stats.data_file_length - file->s->block_size) / MY_MAX(file->s->block_size / 2, IO_SIZE) + 2;
   return handler::scan_time();
 }
 
@@ -2464,18 +2464,18 @@ int ha_maria::info(uint flag)
     ref_length= maria_info.reflength;
     share->db_options_in_use= maria_info.options;
     stats.block_size= maria_block_size;
-    stats.mrr_length_per_rec= maria_info.reflength + 8; // 8 = max(sizeof(void *))
+    stats.mrr_length_per_rec= maria_info.reflength + 8; // 8 = MY_MAX(sizeof(void *))
 
     /* Update share */
     share->keys_in_use.set_prefix(share->keys);
     share->keys_in_use.intersect_extended(maria_info.key_map);
     share->keys_for_keyread.intersect(share->keys_in_use);
     share->db_record_offset= maria_info.record_offset;
-    if (share->key_parts)
+    if (share->user_defined_key_parts)
     {
       ulong *to= table->key_info[0].rec_per_key, *end;
       double *from= maria_info.rec_per_key;
-      for (end= to+ share->key_parts ; to < end ; to++, from++)
+      for (end= to+ share->user_defined_key_parts ; to < end ; to++, from++)
         *to= (ulong) (*from + 0.5);
     }
 
diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c
index e3668d3c8d3..a351447cce3 100644
--- a/storage/maria/ma_bitmap.c
+++ b/storage/maria/ma_bitmap.c
@@ -319,7 +319,11 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file,
 my_bool _ma_bitmap_end(MARIA_SHARE *share)
 {
   my_bool res;
-  mysql_mutex_assert_owner(&share->close_lock);
+
+#ifndef DBUG_OFF
+  if (! share->internal_table)
+    mysql_mutex_assert_owner(&share->close_lock);
+#endif
   DBUG_ASSERT(share->bitmap.non_flushable == 0);
   DBUG_ASSERT(share->bitmap.flush_all_requested == 0);
   DBUG_ASSERT(share->bitmap.waiting_for_non_flushable == 0 &&
@@ -1393,7 +1397,7 @@ found:
   IMPLEMENTATION
     We will return the smallest area >= size.  If there is no such
     block, we will return the biggest area that satisfies
-    area_size >= min(BLOB_SEGMENT_MIN_SIZE*full_page_size, size)
+    area_size >= MY_MIN(BLOB_SEGMENT_MIN_SIZE*full_page_size, size)
 
     To speed up searches, we will only consider areas that has at least 16 free
     pages starting on an even boundary.  When finding such an area, we will
@@ -1501,7 +1505,7 @@ static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap,
     DBUG_RETURN(0);                             /* No room on page */
 
   /*
-    Now allocate min(pages_needed, area_size), starting from
+    Now allocate MY_MIN(pages_needed, area_size), starting from
     best_start + best_prefix_area_size
   */
   if (best_area_size > pages_needed)
diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c
index 55b9a137050..2fc30b880b4 100644
--- a/storage/maria/ma_blockrec.c
+++ b/storage/maria/ma_blockrec.c
@@ -1230,7 +1230,7 @@ static my_bool extend_directory(MARIA_HA *info, uchar *buff, uint block_size,
   }
 
   check_directory(buff, block_size,
-                  info ? min(info->s->base.min_block_length, length) : 0,
+                  info ? MY_MIN(info->s->base.min_block_length, length) : 0,
                   *empty_space);
   DBUG_RETURN(0);
 }
@@ -2126,7 +2126,7 @@ static my_bool write_full_pages(MARIA_HA *info,
     }
     lsn_store(buff, lsn);
     buff[PAGE_TYPE_OFFSET]= (uchar) BLOB_PAGE;
-    copy_length= min(data_size, length);
+    copy_length= MY_MIN(data_size, length);
     memcpy(buff + LSN_SIZE + PAGE_TYPE_SIZE, data, copy_length);
     length-= copy_length;
 
@@ -3504,7 +3504,7 @@ static my_bool allocate_and_write_block_record(MARIA_HA *info,
 
   /* page will be pinned & locked by get_head_or_tail_page */
   if (get_head_or_tail_page(info, blocks->block, info->buff,
-                            max(row->space_on_head_page,
+                            MY_MAX(row->space_on_head_page,
                                 info->s->base.min_block_length),
                             HEAD_PAGE,
                             PAGECACHE_LOCK_WRITE, &row_pos))
@@ -3952,7 +3952,7 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info,
   */
 
   DBUG_ASSERT(blocks->count > 1 ||
-              max(new_row->total_length, share->base.min_block_length) <=
+              MY_MAX(new_row->total_length, share->base.min_block_length) <=
               length_on_head_page);
 
   /* Store same amount of data on head page as on original page */
diff --git a/storage/maria/ma_cache.c b/storage/maria/ma_cache.c
index 829189baeed..35926d37e03 100644
--- a/storage/maria/ma_cache.c
+++ b/storage/maria/ma_cache.c
@@ -61,7 +61,7 @@ my_bool _ma_read_cache(MARIA_HA *handler, IO_CACHE *info, uchar *buff,
       (my_off_t) (info->read_end - info->request_pos))
   {
     in_buff_pos=info->request_pos+(uint) offset;
-    in_buff_length= min(length,(size_t) (info->read_end-in_buff_pos));
+    in_buff_length= MY_MIN(length,(size_t) (info->read_end-in_buff_pos));
     memcpy(buff,info->request_pos+(uint) offset,(size_t) in_buff_length);
     if (!(length-=in_buff_length))
       DBUG_RETURN(0);
diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c
index ab9080c40fb..e6907aabe27 100644
--- a/storage/maria/ma_check.c
+++ b/storage/maria/ma_check.c
@@ -2396,7 +2396,7 @@ static int initialize_variables_for_repair(HA_CHECK *param,
   else
   {
     ulong rec_length;
-    rec_length= max(share->base.min_pack_length,
+    rec_length= MY_MAX(share->base.min_pack_length,
                     share->base.min_block_length);
     sort_info->max_records= (ha_rows) (sort_info->filelength / rec_length);
   }
@@ -3600,7 +3600,7 @@ int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start,
   ulong buff_length;
   DBUG_ENTER("maria_filecopy");
 
-  buff_length=(ulong) min(param->write_buffer_length,length);
+  buff_length=(ulong) MY_MIN(param->write_buffer_length,length);
   if (!(buff=my_malloc(buff_length,MYF(0))))
   {
     buff=tmp_buff; buff_length=IO_SIZE;
@@ -5658,7 +5658,7 @@ word_init_ft_buf:
   ft_buf->buf=ft_buf->lastkey+a_len;
   /*
     32 is just a safety margin here
-    (at least max(val_len, sizeof(nod_flag)) should be there).
+    (at least MY_MAX(val_len, sizeof(nod_flag)) should be there).
     May be better performance could be achieved if we'd put
       (sort_info->keyinfo->block_length-32)/XXX
       instead.
@@ -6071,7 +6071,7 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename)
   maria_close(*org_info);
 
   bzero((char*) &create_info,sizeof(create_info));
-  create_info.max_rows=max(max_records,share.base.records);
+  create_info.max_rows=MY_MAX(max_records,share.base.records);
   create_info.reloc_rows=share.base.reloc;
   create_info.old_options=(share.options |
 			   (unpack ? HA_OPTION_TEMP_COMPRESS_RECORD : 0));
@@ -6494,7 +6494,8 @@ static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file)
   DBUG_ENTER("create_new_data_handle");
 
   if (!(sort_info->new_info= maria_open(info->s->open_file_name.str, O_RDWR,
-                                        HA_OPEN_COPY | HA_OPEN_FOR_REPAIR)))
+                                        HA_OPEN_COPY | HA_OPEN_FOR_REPAIR |
+                                        HA_OPEN_INTERNAL_TABLE)))
     DBUG_RETURN(1);
 
   new_info= sort_info->new_info;
@@ -6915,7 +6916,7 @@ static TrID max_trid_in_system(void)
 {
   TrID id= trnman_get_max_trid(); /* 0 if transac manager not initialized */
   /* 'id' may be far bigger, if last shutdown is old */
-  return max(id, max_trid_in_control_file);
+  return MY_MAX(id, max_trid_in_control_file);
 }
 
 
diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c
index 304216a76d9..51494300172 100644
--- a/storage/maria/ma_checkpoint.c
+++ b/storage/maria/ma_checkpoint.c
@@ -563,7 +563,7 @@ pthread_handler_t ma_checkpoint_background(void *arg)
   DBUG_ASSERT(interval > 0);
 
 #ifdef HAVE_PSI_THREAD_INTERFACE
-  PSI_CALL(set_thread_user_host)(0,0,0,0);
+  PSI_THREAD_CALL(set_thread_user_host)(0,0,0,0);
 #endif
 
   /*
@@ -861,11 +861,11 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
     my_malloc(STATE_COPIES * sizeof(struct st_state_copy), MYF(MY_WME));
   dfiles= (PAGECACHE_FILE *)my_realloc((uchar *)dfiles,
                                        /* avoid size of 0 for my_realloc */
-                                       max(1, nb) * sizeof(PAGECACHE_FILE),
+                                       MY_MAX(1, nb) * sizeof(PAGECACHE_FILE),
                                        MYF(MY_WME | MY_ALLOW_ZERO_PTR));
   kfiles= (PAGECACHE_FILE *)my_realloc((uchar *)kfiles,
                                        /* avoid size of 0 for my_realloc */
-                                       max(1, nb) * sizeof(PAGECACHE_FILE),
+                                       MY_MAX(1, nb) * sizeof(PAGECACHE_FILE),
                                        MYF(MY_WME | MY_ALLOW_ZERO_PTR));
   if (unlikely((state_copies == NULL) ||
                (dfiles == NULL) || (kfiles == NULL)))
@@ -898,7 +898,7 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
         Collect and cache a bunch of states. We do this for many states at a
         time, to not lock/unlock the log's lock too often.
       */
-      uint j, bound= min(nb, i + STATE_COPIES);
+      uint j, bound= MY_MIN(nb, i + STATE_COPIES);
       state_copy= state_copies;
       /* part of the state is protected by log's lock */
       translog_lock();
diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c
index c355f1f1def..dd3a034425a 100644
--- a/storage/maria/ma_close.c
+++ b/storage/maria/ma_close.c
@@ -27,6 +27,7 @@ int maria_close(register MARIA_HA *info)
   int error=0,flag;
   my_bool share_can_be_freed= FALSE;
   MARIA_SHARE *share= info->s;
+  my_bool internal_table= share->internal_table;
   DBUG_ENTER("maria_close");
   DBUG_PRINT("enter",("name: '%s'  base: 0x%lx  reopen: %u  locks: %u",
                       share->open_file_name.str,
@@ -49,9 +50,9 @@ int maria_close(register MARIA_HA *info)
       error= my_errno;
   }
 
-
   /* Ensure no one can open this file while we are closing it */
-  mysql_mutex_lock(&THR_LOCK_maria);
+  if (!internal_table)
+    mysql_mutex_lock(&THR_LOCK_maria);
   if (info->lock_type == F_EXTRA_LCK)
     info->lock_type=F_UNLCK;			/* HA_EXTRA_NO_USER_CHANGE */
 
@@ -60,8 +61,11 @@ int maria_close(register MARIA_HA *info)
     if (maria_lock_database(info,F_UNLCK))
       error=my_errno;
   }
-  mysql_mutex_lock(&share->close_lock);
-  mysql_mutex_lock(&share->intern_lock);
+  if (!internal_table)
+  {
+    mysql_mutex_lock(&share->close_lock);
+    mysql_mutex_lock(&share->intern_lock);
+  }
 
   if (share->options & HA_OPTION_READ_ONLY_DATA)
   {
@@ -75,7 +79,8 @@ int maria_close(register MARIA_HA *info)
     info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
   }
   flag= !--share->reopen;
-  maria_open_list=list_delete(maria_open_list,&info->open_list);
+  if (!internal_table)
+    maria_open_list=list_delete(maria_open_list,&info->open_list);
 
   my_free(info->rec_buff);
   (*share->end)(info);
@@ -159,7 +164,8 @@ int maria_close(register MARIA_HA *info)
         error= my_errno;
     }
     thr_lock_delete(&share->lock);
-    (void) mysql_mutex_destroy(&share->key_del_lock);
+    mysql_mutex_destroy(&share->key_del_lock);
+
     {
       int i,keys;
       keys = share->state.header.keys;
@@ -181,9 +187,11 @@ int maria_close(register MARIA_HA *info)
       We have to unlock share->intern_lock then lock it after
       LOCK_trn_list (trnman_lock()) to avoid dead locks.
     */
-    mysql_mutex_unlock(&share->intern_lock);
+    if (!internal_table)
+      mysql_mutex_unlock(&share->intern_lock);
     _ma_remove_not_visible_states_with_lock(share, TRUE);
-    mysql_mutex_lock(&share->intern_lock);
+    if (!internal_table)
+      mysql_mutex_lock(&share->intern_lock);
 
     if (share->in_checkpoint & MARIA_CHECKPOINT_LOOKS_AT_ME)
     {
@@ -220,9 +228,12 @@ int maria_close(register MARIA_HA *info)
       share->state_history= 0;
     }
   }
-  mysql_mutex_unlock(&THR_LOCK_maria);
-  mysql_mutex_unlock(&share->intern_lock);
-  mysql_mutex_unlock(&share->close_lock);
+  if (!internal_table)
+  {
+    mysql_mutex_unlock(&THR_LOCK_maria);
+    mysql_mutex_unlock(&share->intern_lock);
+    mysql_mutex_unlock(&share->close_lock);
+  }
   if (share_can_be_freed)
   {
     (void) mysql_mutex_destroy(&share->intern_lock);
diff --git a/storage/maria/ma_commit.c b/storage/maria/ma_commit.c
index 70bc668a220..46db3ca4ae5 100644
--- a/storage/maria/ma_commit.c
+++ b/storage/maria/ma_commit.c
@@ -39,11 +39,11 @@ int ma_commit(TRN *trn)
   /*
     - if COMMIT record is written before trnman_commit_trn():
     if Checkpoint comes in the middle it will see trn is not committed,
-    then if crash, Recovery might roll back trn (if min(rec_lsn) is after
+    then if crash, Recovery might roll back trn (if MY_MIN(rec_lsn) is after
     COMMIT record) and this is not an issue as
     * transaction's updates were not made visible to other transactions
     * "commit ok" was not sent to client
-    Alternatively, Recovery might commit trn (if min(rec_lsn) is before COMMIT
+    Alternatively, Recovery might commit trn (if MY_MIN(rec_lsn) is before COMMIT
     record), which is ok too. All in all it means that "trn committed" is not
     100% equal to "COMMIT record written".
     - if COMMIT record is written after trnman_commit_trn():
diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c
index 28c3491730f..152302a5426 100644
--- a/storage/maria/ma_create.c
+++ b/storage/maria/ma_create.c
@@ -51,6 +51,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
        base_pos,long_varchar_count,varchar_length,
        unique_key_parts,fulltext_keys,offset, not_block_record_extra_length;
   uint max_field_lengths, extra_header_size, column_nr;
+  uint internal_table= flags & HA_CREATE_INTERNAL_TABLE;
   ulong reclength, real_reclength,min_pack_length;
   char filename[FN_REFLEN], linkname[FN_REFLEN], *linkname_ptr;
   ulong pack_reclength;
@@ -713,7 +714,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
     got from MAI file header (see also mariapack.c:save_state)
   */
   share.base.key_reflength=
-    maria_get_pointer_length(max(ci->key_file_length,tmp),3);
+    maria_get_pointer_length(MY_MAX(ci->key_file_length,tmp),3);
   share.base.keys= share.state.header.keys= keys;
   share.state.header.uniques= uniques;
   share.state.header.fulltext_keys= fulltext_keys;
@@ -780,7 +781,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
     share.base.min_block_length=
       (share.base.pack_reclength+3 < MARIA_EXTEND_BLOCK_LENGTH &&
        ! share.base.blobs) ?
-      max(share.base.pack_reclength,MARIA_MIN_BLOCK_LENGTH) :
+      MY_MAX(share.base.pack_reclength,MARIA_MIN_BLOCK_LENGTH) :
       MARIA_EXTEND_BLOCK_LENGTH;
   }
   else if (datafile_type == STATIC_RECORD)
@@ -789,7 +790,8 @@ int maria_create(const char *name, enum data_file_type datafile_type,
   if (! (flags & HA_DONT_TOUCH_DATA))
     share.state.create_time= time((time_t*) 0);
 
-  mysql_mutex_lock(&THR_LOCK_maria);
+  if (!internal_table)
+    mysql_mutex_lock(&THR_LOCK_maria);
 
   /*
     NOTE: For test_if_reopen() we need a real path name. Hence we need
@@ -854,7 +856,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
     NOTE: The filename is compared against unique_file_name of every
     open table. Hence we need a real path here.
   */
-  if (_ma_test_if_reopen(filename))
+  if (!internal_table && _ma_test_if_reopen(filename))
   {
     my_printf_error(HA_ERR_TABLE_EXIST, "Aria table '%s' is in use "
                     "(most likely by a MERGE table). Try FLUSH TABLES.",
@@ -1171,7 +1173,8 @@ int maria_create(const char *name, enum data_file_type datafile_type,
     if (mysql_file_close(dfile,MYF(0)))
       goto err;
   }
-  mysql_mutex_unlock(&THR_LOCK_maria);
+  if (!internal_table)
+    mysql_mutex_unlock(&THR_LOCK_maria);
   res= 0;
   my_free((char*) rec_per_key_part);
   errpos=0;
@@ -1180,7 +1183,8 @@ int maria_create(const char *name, enum data_file_type datafile_type,
   DBUG_RETURN(res);
 
 err:
-  mysql_mutex_unlock(&THR_LOCK_maria);
+  if (!internal_table)
+    mysql_mutex_unlock(&THR_LOCK_maria);
 
 err_no_lock:
   save_errno=my_errno;
diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c
index 5b8d0e01677..50edb216a1c 100644
--- a/storage/maria/ma_delete.c
+++ b/storage/maria/ma_delete.c
@@ -987,7 +987,7 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
         */
         if (_ma_log_add(anc_page, anc_length, keypos,
                         anc_key_inserted.move_length +
-                        max(anc_key_inserted.changed_length -
+                        MY_MAX(anc_key_inserted.changed_length -
                             anc_key_inserted.move_length,
                             key_deleted.changed_length),
                         anc_key_inserted.move_length -
@@ -1229,7 +1229,7 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
       */
       if (_ma_log_add(anc_page, anc_length, keypos,
                       anc_key_inserted.move_length +
-                      max(anc_key_inserted.changed_length -
+                      MY_MAX(anc_key_inserted.changed_length -
                           anc_key_inserted.move_length,
                           key_deleted.changed_length),
                       anc_key_inserted.move_length -
@@ -1570,7 +1570,7 @@ my_bool _ma_log_delete(MARIA_PAGE *ma_page, const uchar *key_pos,
       current_size != share->max_index_block_size)
   {
     /* Append data that didn't fit on the page before */
-    uint length= (min(ma_page->size, share->max_index_block_size) -
+    uint length= (MY_MIN(ma_page->size, share->max_index_block_size) -
                   current_size);
     uchar *data= ma_page->buff + current_size;
 
diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c
index c1c0a8e9729..4bb51d0dcf3 100644
--- a/storage/maria/ma_dynrec.c
+++ b/storage/maria/ma_dynrec.c
@@ -851,7 +851,7 @@ static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos,
 	uint tmp=MY_ALIGN(reclength - length + 3 +
 			  test(reclength >= 65520L),MARIA_DYN_ALIGN_SIZE);
 	/* Don't create a block bigger than MARIA_MAX_BLOCK_LENGTH */
-	tmp= min(length+tmp, MARIA_MAX_BLOCK_LENGTH)-length;
+	tmp= MY_MIN(length+tmp, MARIA_MAX_BLOCK_LENGTH)-length;
 	/* Check if we can extend this block */
 	if (block_info.filepos + block_info.block_len ==
 	    info->state->data_file_length &&
diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c
index 0847f3c729c..66e7b4033c7 100644
--- a/storage/maria/ma_extra.c
+++ b/storage/maria/ma_extra.c
@@ -105,7 +105,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
       cache_size= (extra_arg ? *(ulong*) extra_arg :
 		   my_default_record_cache_size);
       if (!(init_io_cache(&info->rec_cache, info->dfile.file,
-			 (uint) min(share->state.state.data_file_length+1,
+			 (uint) MY_MIN(share->state.state.data_file_length+1,
 				    cache_size),
 			  READ_CACHE,0L,(pbool) (info->lock_type != F_UNLCK),
 			  MYF(share->write_flag & MY_WAIT_IF_FULL))))
diff --git a/storage/maria/ma_ft_boolean_search.c b/storage/maria/ma_ft_boolean_search.c
index c98c4b599fc..eb5813f84f1 100644
--- a/storage/maria/ma_ft_boolean_search.c
+++ b/storage/maria/ma_ft_boolean_search.c
@@ -46,9 +46,9 @@
     three subexpressions (including the top-level one),
     every one has its own max_docid, updated by its plus word.
     but for the search word6 uses
-    max(word1.max_docid, word3.max_docid, word5.max_docid),
+    MY_MAX(word1.max_docid, word3.max_docid, word5.max_docid),
     while word4 uses, accordingly,
-    max(word1.max_docid, word3.max_docid).
+    MY_MAX(word1.max_docid, word3.max_docid).
 */
 
 #define FT_CORE
diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c
index 341ea147785..912ed0984a3 100644
--- a/storage/maria/ma_info.c
+++ b/storage/maria/ma_info.c
@@ -31,7 +31,7 @@ MARIA_RECORD_POS maria_position(MARIA_HA *info)
 uint maria_max_key_length()
 {
   uint tmp= (_ma_max_key_length() - 8 - HA_MAX_KEY_SEG*3);
-  return min(HA_MAX_KEY_LENGTH, tmp);
+  return MY_MIN(HA_MAX_KEY_LENGTH, tmp);
 }
 
 /* Get information about the table */
diff --git a/storage/maria/ma_key_recover.c b/storage/maria/ma_key_recover.c
index 502ac2b8809..ae9427981ea 100644
--- a/storage/maria/ma_key_recover.c
+++ b/storage/maria/ma_key_recover.c
@@ -506,7 +506,7 @@ my_bool _ma_log_add(MARIA_PAGE *ma_page,
                        move_length));
   DBUG_ASSERT(info->s->now_transactional);
   DBUG_ASSERT(move_length <= (int) changed_length);
-  DBUG_ASSERT(ma_page->org_size == min(org_page_length, max_page_size));
+  DBUG_ASSERT(ma_page->org_size == MY_MIN(org_page_length, max_page_size));
   DBUG_ASSERT(ma_page->size == org_page_length + move_length);
   DBUG_ASSERT(offset <= ma_page->org_size);
 
@@ -618,7 +618,7 @@ my_bool _ma_log_add(MARIA_PAGE *ma_page,
   DBUG_ASSERT(current_size <= max_page_size && current_size <= ma_page->size);
   if (current_size != ma_page->size && current_size != max_page_size)
   {
-    uint length= min(ma_page->size, max_page_size) - current_size;
+    uint length= MY_MIN(ma_page->size, max_page_size) - current_size;
     uchar *data= ma_page->buff + current_size;
 
     log_pos[0]= KEY_OP_ADD_SUFFIX;
@@ -641,7 +641,7 @@ my_bool _ma_log_add(MARIA_PAGE *ma_page,
     overflow!
   */
   ma_page->org_size= current_size;
-  DBUG_ASSERT(ma_page->org_size == min(ma_page->size, max_page_size));
+  DBUG_ASSERT(ma_page->org_size == MY_MIN(ma_page->size, max_page_size));
 
   if (translog_write_record(&lsn, LOGREC_REDO_INDEX,
                             info->trn, info,
@@ -663,7 +663,7 @@ void _ma_log_key_changes(MARIA_PAGE *ma_page, LEX_CUSTRING *log_array,
                          uint *translog_parts)
 {
   MARIA_SHARE *share= ma_page->info->s;
-  int page_length= min(ma_page->size, share->max_index_block_size);
+  int page_length= MY_MIN(ma_page->size, share->max_index_block_size);
   uint org_length;
   ha_checksum crc;
 
@@ -1111,7 +1111,7 @@ uint _ma_apply_redo_index(MARIA_HA *info,
                           uint2korr(header), uint2korr(header+2)));
       DBUG_ASSERT(uint2korr(header) == page_length);
 #ifndef DBUG_OFF
-      new_page_length= min(uint2korr(header+2), max_page_size);
+      new_page_length= MY_MIN(uint2korr(header+2), max_page_size);
 #endif
       header+= 4;
       break;
@@ -1148,7 +1148,7 @@ uint _ma_apply_redo_index(MARIA_HA *info,
         from= uint2korr(header);
         header+= 2;
         /* "from" is a place in the existing page */
-        DBUG_ASSERT(max(from, to) < max_page_size);
+        DBUG_ASSERT(MY_MAX(from, to) < max_page_size);
         memcpy(buff + to, buff + from, full_length);
       }
       break;
diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c
index 56926c048d8..2a2681c0844 100644
--- a/storage/maria/ma_loghandler.c
+++ b/storage/maria/ma_loghandler.c
@@ -4808,7 +4808,7 @@ static my_bool translog_advance_pointer(int pages, uint16 last_page_data)
     }
 #endif
 
-    min_offset= min(buffer_end_offset, file_end_offset);
+    min_offset= MY_MIN(buffer_end_offset, file_end_offset);
     /* TODO: check is it ptr or size enough */
     log_descriptor.bc.buffer->size+= min_offset;
     log_descriptor.bc.ptr+= min_offset;
@@ -6833,7 +6833,7 @@ translog_variable_length_header(uchar *page, translog_size_t page_offset,
     page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
 
     base_lsn= buff->lsn;
-    body_len= min(page_rest, buff->record_length);
+    body_len= MY_MIN(page_rest, buff->record_length);
   }
   else
   {
@@ -7396,7 +7396,7 @@ translog_size_t translog_read_record(LSN lsn,
                       data->scanner.fixed_horizon));
   if (offset < data->read_header)
   {
-    uint16 len= min(data->read_header, end) - offset;
+    uint16 len= MY_MIN(data->read_header, end) - offset;
     DBUG_PRINT("info",
                ("enter header offset: %lu  length: %lu",
                 (ulong) offset, (ulong) length));
diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c
index 88422e3dc5f..0543f426af3 100644
--- a/storage/maria/ma_open.c
+++ b/storage/maria/ma_open.c
@@ -78,6 +78,7 @@ MARIA_HA *_ma_test_if_reopen(const char *filename)
     mode	Mode of table (O_RDONLY | O_RDWR)
     data_file   Filedescriptor of data file to use < 0 if one should open
 	        open it.
+    internal_table <> 0 if this is an internal temporary table
 
  RETURN
     #   Maria handler
@@ -86,7 +87,8 @@ MARIA_HA *_ma_test_if_reopen(const char *filename)
 
 
 static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, const char *name,
-                                      int mode, File data_file)
+                                      int mode, File data_file,
+                                      uint internal_table)
 {
   int save_errno;
   uint errpos;
@@ -159,7 +161,7 @@ static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, const char *name,
   /* The following should be big enough for all pinning purposes */
   if (my_init_dynamic_array(&info.pinned_pages,
                             sizeof(MARIA_PINNED_PAGE),
-                            max(share->base.blobs*2 + 4,
+                            MY_MAX(share->base.blobs*2 + 4,
                                 MARIA_MAX_TREE_LEVELS*3), 16, MYF(0)))
     goto err;
 
@@ -207,9 +209,17 @@ static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, const char *name,
   if (share->options & HA_OPTION_TMP_TABLE)
     m_info->lock.type= TL_WRITE;
 
-  m_info->open_list.data=(void*) m_info;
-  maria_open_list=list_add(maria_open_list,&m_info->open_list);
-
+  if (!internal_table)
+  {
+    m_info->open_list.data=(void*) m_info;
+    maria_open_list=list_add(maria_open_list,&m_info->open_list);
+  }
+  else
+  {
+    /* We don't need to mark internal temporary tables as changed on disk */
+    share->internal_table= 1;
+    share->global_changed= 1;
+  }
   DBUG_RETURN(m_info);
 
 err:
@@ -243,7 +253,7 @@ MARIA_HA *maria_clone(MARIA_SHARE *share, int mode)
   mysql_mutex_lock(&THR_LOCK_maria);
   new_info= maria_clone_internal(share, NullS, mode,
                                  share->data_file_type == BLOCK_RECORD ?
-                                 share->bitmap.file.file : -1);
+                                 share->bitmap.file.file : -1, 0);
   mysql_mutex_unlock(&THR_LOCK_maria);
   return new_info;
 }
@@ -263,6 +273,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
   int kfile,open_mode,save_errno;
   uint i,j,len,errpos,head_length,base_pos,keys, realpath_err,
     key_parts,unique_key_parts,fulltext_keys,uniques;
+  uint internal_table= test(open_flags & HA_OPEN_INTERNAL_TABLE);
   size_t info_length;
   char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN],
        data_name[FN_REFLEN];
@@ -293,10 +304,11 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
     DBUG_RETURN(0);
   }
 
-  mysql_mutex_lock(&THR_LOCK_maria);
   old_info= 0;
+  if (!internal_table)
+    mysql_mutex_lock(&THR_LOCK_maria);
   if ((open_flags & HA_OPEN_COPY) ||
-      !(old_info=_ma_test_if_reopen(name_buff)))
+      (internal_table || !(old_info=_ma_test_if_reopen(name_buff))))
   {
     share= &share_buff;
     bzero((uchar*) &share_buff,sizeof(share_buff));
@@ -592,7 +604,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
         {
           /* Packed key, ensure we don't get overflow in underflow() */
           keyinfo->underflow_block_length=
-            max((int) (share->max_index_block_size - keyinfo->maxlength * 3),
+            MY_MAX((int) (share->max_index_block_size - keyinfo->maxlength * 3),
                 (int) (share->keypage_header + share->base.key_reflength));
           set_if_smaller(keyinfo->underflow_block_length,
                          keyinfo->block_length/3);
@@ -780,7 +792,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
       /* Need some extra bytes for decode_bytes */
       share->base.extra_rec_buff_size+= 7;
     }
-    share->base.default_rec_buff_size= max(share->base.pack_reclength +
+    share->base.default_rec_buff_size= MY_MAX(share->base.pack_reclength +
                                            share->base.extra_rec_buff_size,
                                            share->base.max_key_length);
 
@@ -981,14 +993,16 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
       data_file= share->bitmap.file.file;       /* Only opened once */
   }
 
-  if (!(m_info= maria_clone_internal(share, name, mode, data_file)))
+  if (!(m_info= maria_clone_internal(share, name, mode, data_file,
+                                     internal_table)))
     goto err;
 
   if (maria_is_crashed(m_info))
     DBUG_PRINT("warning", ("table is crashed: changed: %u",
                            share->state.changed));
 
-  mysql_mutex_unlock(&THR_LOCK_maria);
+  if (!internal_table)
+    mysql_mutex_unlock(&THR_LOCK_maria);
 
   m_info->open_flags= open_flags;
   DBUG_PRINT("exit", ("table: %p  name: %s",m_info, name));
@@ -1027,7 +1041,8 @@ err:
   default:
     break;
   }
-  mysql_mutex_unlock(&THR_LOCK_maria);
+  if (!internal_table)
+    mysql_mutex_unlock(&THR_LOCK_maria);
   my_errno= save_errno;
   DBUG_RETURN (NULL);
 } /* maria_open */
diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c
index c14e69414b5..9b06c0d4f78 100644
--- a/storage/maria/ma_packrec.c
+++ b/storage/maria/ma_packrec.c
@@ -718,7 +718,7 @@ static uint find_longest_bitstream(uint16 *table, uint16 *end)
       return OFFSET_TABLE_SIZE;
     }
     length2= find_longest_bitstream(next, end) + 1;
-    length=max(length,length2);
+    length=MY_MAX(length,length2);
   }
   return length;
 }
@@ -1447,7 +1447,7 @@ uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff,
   info->filepos=filepos+head_length;
   if (file > 0)
   {
-    info->offset=min(info->rec_len, ref_length - head_length);
+    info->offset=MY_MIN(info->rec_len, ref_length - head_length);
     memcpy(*rec_buff_p, header + head_length, info->offset);
   }
   return 0;
diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c
index 9ac42f885b5..c896f730d3f 100644
--- a/storage/maria/ma_recovery.c
+++ b/storage/maria/ma_recovery.c
@@ -3679,7 +3679,7 @@ static void print_redo_phase_progress(TRANSLOG_ADDRESS addr)
   cur_offset= LSN_OFFSET(addr);
   local_remainder= (cur_logno == end_logno) ? (end_offset - cur_offset) :
     (((longlong)log_file_size) - cur_offset +
-     max(end_logno - cur_logno - 1, 0) * ((longlong)log_file_size) +
+     MY_MAX(end_logno - cur_logno - 1, 0) * ((longlong)log_file_size) +
      end_offset);
   if (initial_remainder == (ulonglong)(-1))
     initial_remainder= local_remainder;
diff --git a/storage/maria/ma_rt_mbr.c b/storage/maria/ma_rt_mbr.c
index b3e2b0ceab8..496ace2a84f 100644
--- a/storage/maria/ma_rt_mbr.c
+++ b/storage/maria/ma_rt_mbr.c
@@ -329,8 +329,8 @@ int maria_rtree_d_mbr(const HA_KEYSEG *keyseg, const uchar *a,
   bmin= korr_func(b); \
   amax= korr_func(a+len); \
   bmax= korr_func(b+len); \
-  amin= min(amin, bmin); \
-  amax= max(amax, bmax); \
+  amin= MY_MIN(amin, bmin); \
+  amax= MY_MAX(amax, bmax); \
   store_func(c, amin); \
   store_func(c+len, amax); \
 }
@@ -342,8 +342,8 @@ int maria_rtree_d_mbr(const HA_KEYSEG *keyseg, const uchar *a,
   get_func(bmin, b); \
   get_func(amax, a+len); \
   get_func(bmax, b+len); \
-  amin= min(amin, bmin); \
-  amax= max(amax, bmax); \
+  amin= MY_MIN(amin, bmin); \
+  amax= MY_MAX(amax, bmax); \
   store_func(c, amin); \
   store_func(c+len, amax); \
 }
@@ -422,8 +422,8 @@ int maria_rtree_combine_rect(const HA_KEYSEG *keyseg, const uchar* a,
   bmin= korr_func(b); \
   amax= korr_func(a+len); \
   bmax= korr_func(b+len); \
-  amin= max(amin, bmin); \
-  amax= min(amax, bmax); \
+  amin= MY_MAX(amin, bmin); \
+  amax= MY_MIN(amax, bmax); \
   if (amin >= amax) \
     return 0; \
   res *= amax - amin; \
@@ -436,8 +436,8 @@ int maria_rtree_combine_rect(const HA_KEYSEG *keyseg, const uchar* a,
   get_func(bmin, b); \
   get_func(amax, a+len); \
   get_func(bmax, b+len); \
-  amin= max(amin, bmin); \
-  amax= min(amax, bmax); \
+  amin= MY_MAX(amin, bmin); \
+  amax= MY_MIN(amax, bmax); \
   if (amin >= amax)  \
     return 0; \
   res *= amax - amin; \
@@ -513,7 +513,7 @@ double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, uchar* a, uchar* b,
    amax= korr_func(a+len); \
    bmax= korr_func(b+len); \
    a_area *= (((double)amax) - ((double)amin)); \
-   loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+   loc_ab_area *= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
 }
 
 #define RT_AREA_INC_GET(type, get_func, len)\
@@ -524,7 +524,7 @@ double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, uchar* a, uchar* b,
    get_func(amax, a+len); \
    get_func(bmax, b+len); \
    a_area *= (((double)amax) - ((double)amin)); \
-   loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+   loc_ab_area *= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
 }
 
 /*
@@ -612,7 +612,7 @@ safe_end:
    amax= korr_func(a+len); \
    bmax= korr_func(b+len); \
    a_perim+= (((double)amax) - ((double)amin)); \
-   *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+   *ab_perim+= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
 }
 
 #define RT_PERIM_INC_GET(type, get_func, len)\
@@ -623,7 +623,7 @@ safe_end:
    get_func(amax, a+len); \
    get_func(bmax, b+len); \
    a_perim+= (((double)amax) - ((double)amin)); \
-   *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+   *ab_perim+= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
 }
 
 /*
diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c
index 4bc179c3008..72f9c7ceca3 100644
--- a/storage/maria/ma_sort.c
+++ b/storage/maria/ma_sort.c
@@ -133,7 +133,7 @@ int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages,
   sort_keys= (uchar **) NULL; error= 1;
   maxbuffer=1;
 
-  memavl=max(sortbuff_size,MIN_SORT_MEMORY);
+  memavl=MY_MAX(sortbuff_size,MIN_SORT_MEMORY);
   records=	info->sort_info->max_records;
   sort_length=	info->key_length;
   LINT_INIT(keys);
@@ -364,7 +364,7 @@ pthread_handler_t _ma_thr_find_all_keys(void *arg)
     bzero((char*) &sort_param->buffpek,sizeof(sort_param->buffpek));
     bzero((char*) &sort_param->unique, sizeof(sort_param->unique));
 
-    memavl=       max(sort_param->sortbuff_size, MIN_SORT_MEMORY);
+    memavl=       MY_MAX(sort_param->sortbuff_size, MIN_SORT_MEMORY);
     idx=          (uint)sort_param->sort_info->max_records;
     sort_length=  sort_param->key_length;
     maxbuffer=    1;
@@ -857,7 +857,7 @@ static uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
   register uint count;
   uint length;
 
-  if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
+  if ((count=(uint) MY_MIN((ha_rows) buffpek->max_keys,buffpek->count)))
   {
     if (mysql_file_pread(fromfile->file, buffpek->base,
                  (length= sort_length*count),buffpek->file_pos,MYF_RW))
@@ -877,7 +877,7 @@ static uint read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek,
   uint idx;
   uchar *buffp;
 
-  if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
+  if ((count=(uint) MY_MIN((ha_rows) buffpek->max_keys,buffpek->count)))
   {
     buffp= buffpek->base;
 
diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c
index 945654a0bbe..0147d00d6e0 100644
--- a/storage/maria/ma_test1.c
+++ b/storage/maria/ma_test1.c
@@ -631,7 +631,7 @@ static void create_record(uchar *record,uint rownr)
     uint tmp;
     uchar *ptr;;
     sprintf((char*) blob_record,"... row: %d", rownr);
-    strappend((char*) blob_record,max(MAX_REC_LENGTH-rownr,10),' ');
+    strappend((char*) blob_record,MY_MAX(MAX_REC_LENGTH-rownr,10),' ');
     tmp=strlen((char*) blob_record);
     int4store(pos,tmp);
     ptr=blob_record;
diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c
index ea1978b4ee5..242e5e16333 100644
--- a/storage/maria/ma_test2.c
+++ b/storage/maria/ma_test2.c
@@ -698,7 +698,7 @@ int main(int argc, char *argv[])
     goto err2;
   }
 
-  for (i=min(2,keys) ; i-- > 0 ;)
+  for (i=MY_MIN(2,keys) ; i-- > 0 ;)
   {
     if (maria_rsame(file,read_record2,(int) i)) goto err;
     if (bcmp(read_record,read_record2,reclength) != 0)
diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c
index 5d440a40dc0..24a3f96f42e 100644
--- a/storage/maria/ma_write.c
+++ b/storage/maria/ma_write.c
@@ -933,7 +933,7 @@ ChangeSet@1.2562, 2008-04-09 07:41:40+02:00, serg@janus.mylan +9 -0
                                  &s_temp));
   }
   DBUG_RETURN(_ma_split_page(info, key, anc_page,
-                             min(org_anc_length,
+                             MY_MIN(org_anc_length,
                                  info->s->max_index_block_size),
                              key_pos, s_temp.changed_length, t_length,
                              key_buff, insert_last));
@@ -2075,7 +2075,7 @@ static my_bool _ma_log_split(MARIA_PAGE *ma_page,
       Handle case when split happened directly after the newly inserted key.
     */
     max_key_length= new_length - offset;
-    extra_length= min(key_length, max_key_length);
+    extra_length= MY_MIN(key_length, max_key_length);
     if (offset + move_length > new_length)
     {
       /* This is true when move_length includes changes for next packed key */
diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h
index e983f561bbb..c1ac49a6b35 100644
--- a/storage/maria/maria_def.h
+++ b/storage/maria/maria_def.h
@@ -465,6 +465,7 @@ typedef struct st_maria_share
   my_bool changed,			/* If changed since lock */
     global_changed,			/* If changed since open */
     not_flushed;
+  my_bool internal_table;               /* Internal tmp table */
   my_bool lock_key_trees;               /* If we have to lock trees on read */
   my_bool non_transactional_concurrent_insert;
   my_bool delay_key_write;
diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c
index 40686995378..2fe5e818db9 100644
--- a/storage/maria/maria_pack.c
+++ b/storage/maria/maria_pack.c
@@ -1243,7 +1243,7 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
     {
       if (huff_counts->field_length > 2 &&
 	  huff_counts->empty_fields + (records - huff_counts->empty_fields)*
-	  (1+max_bit(max(huff_counts->max_pre_space,
+	  (1+max_bit(MY_MAX(huff_counts->max_pre_space,
 			 huff_counts->max_end_space))) <
 	  records * max_bit(huff_counts->field_length))
       {
@@ -3021,7 +3021,7 @@ static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length,
   if (mrg->src_file_has_indexes_disabled)
   {
     isam_file->s->state.state.key_file_length=
-      max(isam_file->s->state.state.key_file_length, new_length);
+      MY_MAX(isam_file->s->state.state.key_file_length, new_length);
   }
   state.dellink= HA_OFFSET_ERROR;
   state.version=(ulong) time((time_t*) 0);
diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c
index f8959c977f8..38fdb358e53 100644
--- a/storage/maria/trnman.c
+++ b/storage/maria/trnman.c
@@ -877,7 +877,7 @@ TrID trnman_get_min_safe_trid()
 {
   TrID trid;
   mysql_mutex_lock(&LOCK_trn_list);
-  trid= min(active_list_min.next->min_read_from,
+  trid= MY_MIN(active_list_min.next->min_read_from,
             global_trid_generator);
   mysql_mutex_unlock(&LOCK_trn_list);
   return trid;
diff --git a/storage/maria/unittest/ma_test_all-t b/storage/maria/unittest/ma_test_all-t
index e66d269ab93..18b26a7bd45 100755
--- a/storage/maria/unittest/ma_test_all-t
+++ b/storage/maria/unittest/ma_test_all-t
@@ -650,6 +650,8 @@ sub ok
   {
     exit 1;
   }
+  # Unlink all files so that we can continue on error
+  unlink_all_possible_tmp_files();
   return 0;
 }
 
@@ -702,7 +704,7 @@ sub unlink_all_possible_tmp_files()
   unlink_log_files();
 
   # Unlink tmp files that may have been created when testing the test programs
-  unlink <$full_tmpdir/*.TMD $full_tmpdir/aria_read_log_test1.txt $full_tmpdir/test1*.MA? $full_tmpdir/ma_test_recovery.output aria_log_control aria_log.00000001 aria_log.00000002 aria_logtest1.MA? test1.MA? test2.MA? test3.MA?>;
+  unlink <$full_tmpdir/*.TMD $full_tmpdir/aria_read_log_test1.txt $full_tmpdir/test1*.MA? $full_tmpdir/ma_test_recovery.output aria_log_control aria_log.00000001 aria_log.00000002 aria_logtest1.MA? test1.MA? test2.MA? test3.MA? *.TMD>;
 }
 
 ####
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index f649de8bd5c..3e73bb7c801 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -247,8 +247,8 @@ int table2myisam(TABLE *table_arg, MI_KEYDEF **keydef_out,
       pos->algorithm;
     keydef[i].block_length= pos->block_size;
     keydef[i].seg= keyseg;
-    keydef[i].keysegs= pos->key_parts;
-    for (j= 0; j < pos->key_parts; j++)
+    keydef[i].keysegs= pos->user_defined_key_parts;
+    for (j= 0; j < pos->user_defined_key_parts; j++)
     {
       Field *field= pos->key_part[j].field;
       type= field->key_type();
@@ -310,7 +310,7 @@ int table2myisam(TABLE *table_arg, MI_KEYDEF **keydef_out,
                                           (uchar*) table_arg->record[0]);
       }
     }
-    keyseg+= pos->key_parts;
+    keyseg+= pos->user_defined_key_parts;
   }
   if (table_arg->found_next_number_field)
     keydef[share->next_number_index].flag|= HA_AUTO_KEY;
@@ -1136,8 +1136,8 @@ int ha_myisam::repair(THD *thd, HA_CHECK &param, bool do_optimize)
       }
       if (error && file->create_unique_index_by_sort && 
           share->state.dupp_key != MAX_KEY)
-          print_keydup_error(share->state.dupp_key, 
-                             ER(ER_DUP_ENTRY_WITH_KEY_NAME), MYF(0));
+        print_keydup_error(table, &table->key_info[share->state.dupp_key],
+                           MYF(0));
     }
     else
     {
@@ -1527,8 +1527,8 @@ void ha_myisam::start_bulk_insert(ha_rows rows, uint flags)
 {
   DBUG_ENTER("ha_myisam::start_bulk_insert");
   THD *thd= current_thd;
-  ulong size= min(thd->variables.read_buff_size,
-                  (ulong) (table->s->avg_row_length*rows));
+  ulong size= MY_MIN(thd->variables.read_buff_size,
+                     (ulong) (table->s->avg_row_length*rows));
   DBUG_PRINT("info",("start_bulk_insert: rows %lu size %lu",
                      (ulong) rows, size));
 
@@ -1539,36 +1539,33 @@ void ha_myisam::start_bulk_insert(ha_rows rows, uint flags)
   can_enable_indexes= mi_is_all_keys_active(file->s->state.key_map,
                                             file->s->base.keys);
 
-  if (!(specialflag & SPECIAL_SAFE_MODE))
+  /*
+    Only disable old index if the table was empty and we are inserting
+    a lot of rows.
+    Note that in end_bulk_insert() we may truncate the table if
+    enable_indexes() failed, thus it's essential that indexes are
+    disabled ONLY for an empty table.
+  */
+  if (file->state->records == 0 && can_enable_indexes &&
+      (!rows || rows >= MI_MIN_ROWS_TO_DISABLE_INDEXES))
   {
-    /*
-      Only disable old index if the table was empty and we are inserting
-      a lot of rows.
-      Note that in end_bulk_insert() we may truncate the table if
-      enable_indexes() failed, thus it's essential that indexes are
-      disabled ONLY for an empty table.
-    */
-    if (file->state->records == 0 && can_enable_indexes &&
-        (!rows || rows >= MI_MIN_ROWS_TO_DISABLE_INDEXES))
+    if (file->open_flag & HA_OPEN_INTERNAL_TABLE)
     {
-      if (file->open_flag & HA_OPEN_INTERNAL_TABLE)
-      {
-        file->update|= HA_STATE_CHANGED;
-        mi_clear_all_keys_active(file->s->state.key_map);
-      }
-      else
-      {
-        my_bool all_keys= test(flags & HA_CREATE_UNIQUE_INDEX_BY_SORT);
-        mi_disable_indexes_for_rebuild(file, rows, all_keys);
-      }
+      file->update|= HA_STATE_CHANGED;
+      mi_clear_all_keys_active(file->s->state.key_map);
     }
     else
+    {
+      my_bool all_keys= test(flags & HA_CREATE_UNIQUE_INDEX_BY_SORT);
+      mi_disable_indexes_for_rebuild(file, rows, all_keys);
+    }
+  }
+  else
     if (!file->bulk_insert &&
         (!rows || rows >= MI_MIN_ROWS_TO_USE_BULK_INSERT))
     {
       mi_init_bulk_insert(file, thd->variables.bulk_insert_buff_size, rows);
     }
-  }
   DBUG_VOID_RETURN;
 }
 
@@ -1846,7 +1843,7 @@ int ha_myisam::info(uint flag)
       number of records in the buffer results in a different number of buffer
       refills and in a different order of records in the result set.
     */
-    stats.mrr_length_per_rec= misam_info.reflength + 8; // 8=max(sizeof(void *))
+    stats.mrr_length_per_rec= misam_info.reflength + 8; // 8=MY_MAX(sizeof(void *))
 
     ref_length= misam_info.reflength;
     share->db_options_in_use= misam_info.options;
@@ -1896,8 +1893,6 @@ int ha_myisam::info(uint flag)
 
 int ha_myisam::extra(enum ha_extra_function operation)
 {
-  if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD)
-    return 0;
   if (operation == HA_EXTRA_MMAP && !opt_myisam_use_mmap)
     return 0;
   return mi_extra(file, operation, 0);
@@ -1915,8 +1910,6 @@ int ha_myisam::reset(void)
 
 int ha_myisam::extra_opt(enum ha_extra_function operation, ulong cache_size)
 {
-  if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE)
-    return 0;
   return mi_extra(file, operation, (void*) &cache_size);
 }
 
diff --git a/storage/myisam/mi_cache.c b/storage/myisam/mi_cache.c
index 6e9feaefb2d..3477e67eae5 100644
--- a/storage/myisam/mi_cache.c
+++ b/storage/myisam/mi_cache.c
@@ -62,7 +62,7 @@ int _mi_read_cache(IO_CACHE *info, uchar *buff, my_off_t pos, uint length,
       (my_off_t) (info->read_end - info->request_pos))
   {
     in_buff_pos=info->request_pos+(uint) offset;
-    in_buff_length= min(length, (size_t) (info->read_end-in_buff_pos));
+    in_buff_length= MY_MIN(length, (size_t) (info->read_end-in_buff_pos));
     memcpy(buff,info->request_pos+(uint) offset,(size_t) in_buff_length);
     if (!(length-=in_buff_length))
       DBUG_RETURN(0);
diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c
index 056aff5a72b..61dbbb7a18d 100644
--- a/storage/myisam/mi_check.c
+++ b/storage/myisam/mi_check.c
@@ -1946,7 +1946,13 @@ int mi_sort_index(HA_CHECK *param, register MI_INFO *info, char * name)
        key++,keyinfo++)
   {
     if (! mi_is_key_active(info->s->state.key_map, key))
+    {
+      /* Since the key is not active, this should not be read, but we
+      initialize it anyway to silence a Valgrind warn when passing that
+      chunk of memory to pwrite(). */
+      index_pos[key]= HA_OFFSET_ERROR;
       continue;
+    }
 
     if (share->state.key_root[key] != HA_OFFSET_ERROR)
     {
@@ -2145,7 +2151,7 @@ int filecopy(HA_CHECK *param, File to,File from,my_off_t start,
   ulong buff_length;
   DBUG_ENTER("filecopy");
 
-  buff_length=(ulong) min(param->write_buffer_length,length);
+  buff_length=(ulong) MY_MIN(param->write_buffer_length,length);
   if (!(buff=my_malloc(buff_length,MYF(0))))
   {
     buff=tmp_buff; buff_length=IO_SIZE;
@@ -2303,7 +2309,7 @@ int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info,
                   MYF(param->malloc_flags));
 
   if (share->data_file_type == DYNAMIC_RECORD)
-    length=max(share->base.min_pack_length+1,share->base.min_block_length);
+    length=MY_MAX(share->base.min_pack_length+1,share->base.min_block_length);
   else if (share->data_file_type == COMPRESSED_RECORD)
     length=share->base.min_block_length;
   else
@@ -2392,7 +2398,7 @@ int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info,
           (see _create_index_by_sort)
         */
         sort_info.max_records= 10 *
-                               max(param->sort_buffer_length, MIN_SORT_BUFFER) /
+                               MY_MAX(param->sort_buffer_length, MIN_SORT_BUFFER) /
                                sort_param.key_length;
       }
 
@@ -2759,7 +2765,7 @@ int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info,
     mysql_file_seek(param->read_cache.file, 0L, MY_SEEK_END, MYF(0));
 
   if (share->data_file_type == DYNAMIC_RECORD)
-    rec_length=max(share->base.min_pack_length+1,share->base.min_block_length);
+    rec_length=MY_MAX(share->base.min_pack_length+1,share->base.min_block_length);
   else if (share->data_file_type == COMPRESSED_RECORD)
     rec_length=share->base.min_block_length;
   else
@@ -3984,7 +3990,7 @@ word_init_ft_buf:
   ft_buf->buf=ft_buf->lastkey+a_len;
   /*
     32 is just a safety margin here
-    (at least max(val_len, sizeof(nod_flag)) should be there).
+    (at least MY_MAX(val_len, sizeof(nod_flag)) should be there).
     May be better performance could be achieved if we'd put
       (sort_info->keyinfo->block_length-32)/XXX
       instead.
diff --git a/storage/myisam/mi_close.c b/storage/myisam/mi_close.c
index e58c2e0f189..f0a82bcef04 100644
--- a/storage/myisam/mi_close.c
+++ b/storage/myisam/mi_close.c
@@ -31,7 +31,8 @@ int mi_close(register MI_INFO *info)
 		      (long) info, (uint) share->reopen,
                       (uint) share->tot_locks));
 
-  mysql_mutex_lock(&THR_LOCK_myisam);
+  if (info->open_list.data)
+    mysql_mutex_lock(&THR_LOCK_myisam);
   if (info->lock_type == F_EXTRA_LCK)
     info->lock_type=F_UNLCK;			/* HA_EXTRA_NO_USER_CHANGE */
 
@@ -54,7 +55,8 @@ int mi_close(register MI_INFO *info)
     info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
   }
   flag= !--share->reopen;
-  myisam_open_list=list_delete(myisam_open_list,&info->open_list);
+  if (info->open_list.data)
+    myisam_open_list= list_delete(myisam_open_list, &info->open_list);
   mysql_mutex_unlock(&share->intern_lock);
 
   my_free(mi_get_rec_buff_ptr(info, info->rec_buff));
@@ -111,7 +113,8 @@ int mi_close(register MI_INFO *info)
     }
     my_free(info->s);
   }
-  mysql_mutex_unlock(&THR_LOCK_myisam);
+  if (info->open_list.data)
+    mysql_mutex_unlock(&THR_LOCK_myisam);
   if (info->ftparser_param)
   {
     my_free(info->ftparser_param);
diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c
index ad97fba2cbb..cc0cfd0ae3a 100644
--- a/storage/myisam/mi_create.c
+++ b/storage/myisam/mi_create.c
@@ -43,6 +43,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
        base_pos,long_varchar_count,varchar_length,
        max_key_block_length,unique_key_parts,fulltext_keys,offset;
   uint aligned_key_start, block_length, res;
+  uint internal_table= flags & HA_CREATE_INTERNAL_TABLE;
   ulong reclength, real_reclength,min_pack_length;
   char filename[FN_REFLEN],linkname[FN_REFLEN], *linkname_ptr;
   ulong pack_reclength;
@@ -446,8 +447,8 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
     block_length= (keydef->block_length ?
                    my_round_up_to_next_power(keydef->block_length) :
                    myisam_block_size);
-    block_length= max(block_length, MI_MIN_KEY_BLOCK_LENGTH);
-    block_length= min(block_length, MI_MAX_KEY_BLOCK_LENGTH);
+    block_length= MY_MAX(block_length, MI_MIN_KEY_BLOCK_LENGTH);
+    block_length= MY_MIN(block_length, MI_MAX_KEY_BLOCK_LENGTH);
 
     keydef->block_length= (uint16) MI_BLOCK_SIZE(length-real_length_diff,
                                                  pointer,MI_MAX_KEYPTR_SIZE,
@@ -536,7 +537,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
     got from MYI file header (see also myisampack.c:save_state)
   */
   share.base.key_reflength=
-    mi_get_pointer_length(max(ci->key_file_length,tmp),3);
+    mi_get_pointer_length(MY_MAX(ci->key_file_length,tmp),3);
   share.base.keys= share.state.header.keys= keys;
   share.state.header.uniques= uniques;
   share.state.header.fulltext_keys= fulltext_keys;
@@ -569,12 +570,13 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
   share.base.min_block_length=
     (share.base.pack_reclength+3 < MI_EXTEND_BLOCK_LENGTH &&
      ! share.base.blobs) ?
-    max(share.base.pack_reclength,MI_MIN_BLOCK_LENGTH) :
+    MY_MAX(share.base.pack_reclength,MI_MIN_BLOCK_LENGTH) :
     MI_EXTEND_BLOCK_LENGTH;
   if (! (flags & HA_DONT_TOUCH_DATA))
     share.state.create_time= time((time_t*) 0);
 
-  mysql_mutex_lock(&THR_LOCK_myisam);
+  if (!internal_table)
+    mysql_mutex_lock(&THR_LOCK_myisam);
 
   /*
     NOTE: For test_if_reopen() we need a real path name. Hence we need
@@ -631,7 +633,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
     NOTE: The filename is compared against unique_file_name of every
     open table. Hence we need a real path here.
   */
-  if (test_if_reopen(filename))
+  if (!internal_table && test_if_reopen(filename))
   {
     my_printf_error(HA_ERR_TABLE_EXIST, "MyISAM table '%s' is in use "
                     "(most likely by a MERGE table). Try FLUSH TABLES.",
@@ -820,7 +822,8 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
       goto err;
   }
   errpos=0;
-  mysql_mutex_unlock(&THR_LOCK_myisam);
+  if (!internal_table)
+    mysql_mutex_unlock(&THR_LOCK_myisam);
   res= 0;
   if (mysql_file_close(file, MYF(0)))
     res= my_errno;
@@ -828,7 +831,8 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
   DBUG_RETURN(res);
 
 err:
-  mysql_mutex_unlock(&THR_LOCK_myisam);
+  if (!internal_table)
+    mysql_mutex_unlock(&THR_LOCK_myisam);
 
 err_no_lock:
   save_errno=my_errno;
diff --git a/storage/myisam/mi_dynrec.c b/storage/myisam/mi_dynrec.c
index 009a2affe0c..021809ed892 100644
--- a/storage/myisam/mi_dynrec.c
+++ b/storage/myisam/mi_dynrec.c
@@ -118,7 +118,8 @@ int mi_munmap_file(MI_INFO *info)
 {
   int ret;
   DBUG_ENTER("mi_unmap_file");
-  if ((ret= my_munmap(info->s->file_map, (size_t) info->s->mmaped_length)))
+  if ((ret= my_munmap((void*) info->s->file_map,
+                      (size_t) info->s->mmaped_length)))
     DBUG_RETURN(ret);
   info->s->file_read= mi_nommap_pread;
   info->s->file_write= mi_nommap_pwrite;
@@ -865,7 +866,7 @@ static int update_dynamic_record(MI_INFO *info, my_off_t filepos, uchar *record,
 	uint tmp=MY_ALIGN(reclength - length + 3 +
 			  test(reclength >= 65520L),MI_DYN_ALIGN_SIZE);
 	/* Don't create a block bigger than MI_MAX_BLOCK_LENGTH */
-	tmp= min(length+tmp, MI_MAX_BLOCK_LENGTH)-length;
+	tmp= MY_MIN(length+tmp, MI_MAX_BLOCK_LENGTH)-length;
 	/* Check if we can extend this block */
 	if (block_info.filepos + block_info.block_len ==
 	    info->state->data_file_length &&
@@ -1780,15 +1781,21 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, uchar *buf,
     if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
 		  BLOCK_FATAL_ERROR))
     {
-      if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
-	  && skip_deleted_blocks)
-      {
-	filepos=block_info.filepos+block_info.block_len;
-	block_info.second_read=0;
-	continue;		/* Search after next_record */
-      }
-      if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
+      if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)))
       {
+        if (skip_deleted_blocks)
+        {
+          filepos=block_info.filepos+block_info.block_len;
+          block_info.second_read=0;
+          continue;		/* Search after next_record */
+        }
+        /*
+          If we're not on the first block of a record and
+          the block is marked as deleted or out of sync,
+          something's gone wrong: the record is damaged.
+        */
+        if (block_of_record != 0)
+          goto panic;
 	my_errno=HA_ERR_RECORD_DELETED;
 	info->lastpos=block_info.filepos;
 	info->nextpos=block_info.filepos+block_info.block_len;
diff --git a/storage/myisam/mi_extra.c b/storage/myisam/mi_extra.c
index dab1f66ed6d..f57fba5c2c5 100644
--- a/storage/myisam/mi_extra.c
+++ b/storage/myisam/mi_extra.c
@@ -100,7 +100,7 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg)
       cache_size= (extra_arg ? *(ulong*) extra_arg :
 		   my_default_record_cache_size);
       if (!(init_io_cache(&info->rec_cache,info->dfile,
-			 (uint) min(info->state->data_file_length+1,
+			 (uint) MY_MIN(info->state->data_file_length+1,
 				    cache_size),
 			  READ_CACHE,0L,(pbool) (info->lock_type != F_UNLCK),
 			  MYF(share->write_flag & MY_WAIT_IF_FULL))))
diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c
index 438057e22df..f8213b1a3a5 100644
--- a/storage/myisam/mi_open.c
+++ b/storage/myisam/mi_open.c
@@ -14,7 +14,18 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
 
-/* open a isam-database */
+/*
+  open a isam-database
+
+  Internal temporary tables
+  -------------------------
+  Since only single instance of internal temporary table is required by
+  optimizer, such tables are not registered on myisam_open_list. In effect
+  it means (a) THR_LOCK_myisam is not held while such table is being created,
+  opened or closed; (b) no iteration through myisam_open_list while opening a
+  table. This optimization gives nice scalability benefit in concurrent
+  environment. MEMORY internal temporary tables are optimized similarly.
+*/
 
 #include "fulltext.h"
 #include "sp_defs.h"
@@ -74,10 +85,11 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
   int lock_error,kfile,open_mode,save_errno,have_rtree=0, realpath_err;
   uint i,j,len,errpos,head_length,base_pos,offset,info_length,keys,
     key_parts,unique_key_parts,base_key_parts,fulltext_keys,uniques;
+  uint internal_table= open_flags & HA_OPEN_INTERNAL_TABLE;
   char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN],
        data_name[FN_REFLEN];
   uchar *UNINIT_VAR(disk_cache), *disk_pos, *end_pos;
-  MI_INFO info,*UNINIT_VAR(m_info),*old_info;
+  MI_INFO info,*UNINIT_VAR(m_info),*old_info= NULL;
   MYISAM_SHARE share_buff,*share;
   ulong *rec_per_key_part= 0;
   my_off_t *key_root, *key_del;
@@ -99,8 +111,13 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
     DBUG_RETURN (NULL);
   }
 
-  mysql_mutex_lock(&THR_LOCK_myisam);
-  if (!(old_info=test_if_reopen(name_buff)))
+  if (!internal_table)
+  {
+    mysql_mutex_lock(&THR_LOCK_myisam);
+    old_info= test_if_reopen(name_buff);
+  }
+
+  if (!old_info)
   {
     share= &share_buff;
     bzero((uchar*) &share_buff,sizeof(share_buff));
@@ -311,7 +328,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
     strmov(share->index_file_name,  index_name);
     strmov(share->data_file_name,   data_name);
 
-    share->blocksize=min(IO_SIZE,myisam_block_size);
+    share->blocksize=MY_MIN(IO_SIZE,myisam_block_size);
     {
       HA_KEYSEG *pos=share->keyparts;
       uint32 ftkey_nr= 1;
@@ -349,6 +366,12 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
 	  }
 	  else if (pos->type == HA_KEYTYPE_BINARY)
 	    pos->charset= &my_charset_bin;
+          if (!(share->keyinfo[i].flag & HA_SPATIAL) &&
+              pos->start > share->base.reclength)
+          {
+            my_errno= HA_ERR_CRASHED;
+            goto err;
+          }
 	}
 	if (share->keyinfo[i].flag & HA_SPATIAL)
 	{
@@ -491,7 +514,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
     share->base.margin_key_file_length=(share->base.max_key_file_length -
 					(keys ? MI_INDEX_BLOCK_MARGIN *
 					 share->blocksize * keys : 0));
-    share->blocksize=min(IO_SIZE,myisam_block_size);
+    share->blocksize=MY_MIN(IO_SIZE,myisam_block_size);
     share->data_file_type=STATIC_RECORD;
     if (share->options & HA_OPTION_COMPRESS_RECORD)
     {
@@ -638,10 +661,13 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
 
   *m_info=info;
   thr_lock_data_init(&share->lock,&m_info->lock,(void*) m_info);
-  m_info->open_list.data=(void*) m_info;
-  myisam_open_list=list_add(myisam_open_list,&m_info->open_list);
 
-  mysql_mutex_unlock(&THR_LOCK_myisam);
+  if (!internal_table)
+  {
+    m_info->open_list.data= (void*) m_info;
+    myisam_open_list= list_add(myisam_open_list, &m_info->open_list);
+    mysql_mutex_unlock(&THR_LOCK_myisam);
+  }
 
   bzero(info.buff, share->base.max_key_block_length * 2);
   my_free(rec_per_key_part);
@@ -686,7 +712,8 @@ err:
   default:
     break;
   }
-  mysql_mutex_unlock(&THR_LOCK_myisam);
+  if (!internal_table)
+    mysql_mutex_unlock(&THR_LOCK_myisam);
   my_errno=save_errno;
   DBUG_RETURN (NULL);
 } /* mi_open */
@@ -706,10 +733,10 @@ uchar *mi_alloc_rec_buff(MI_INFO *info, ulong length, uchar **buf)
     if (length == (ulong) -1)
     {
       if (info->s->options & HA_OPTION_COMPRESS_RECORD)
-        length= max(info->s->base.pack_reclength, info->s->max_pack_length);
+        length= MY_MAX(info->s->base.pack_reclength, info->s->max_pack_length);
       else
         length= info->s->base.pack_reclength;
-      length= max(length, info->s->base.max_key_length);
+      length= MY_MAX(length, info->s->base.max_key_length);
       /* Avoid unnecessary realloc */
       if (newptr && length == old_length)
 	return newptr;
diff --git a/storage/myisam/mi_packrec.c b/storage/myisam/mi_packrec.c
index 7e2403b64c9..c95afe57725 100644
--- a/storage/myisam/mi_packrec.c
+++ b/storage/myisam/mi_packrec.c
@@ -685,7 +685,7 @@ static uint find_longest_bitstream(uint16 *table, uint16 *end)
       return OFFSET_TABLE_SIZE;
     }
     length2= find_longest_bitstream(next, end) + 1;
-    length=max(length,length2);
+    length=MY_MAX(length,length2);
   }
   return length;
 }
@@ -1399,7 +1399,7 @@ uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
   info->filepos=filepos+head_length;
   if (file > 0)
   {
-    info->offset=min(info->rec_len, ref_length - head_length);
+    info->offset=MY_MIN(info->rec_len, ref_length - head_length);
     memcpy(*rec_buff_p, header + head_length, info->offset);
   }
   return 0;
diff --git a/storage/myisam/mi_search.c b/storage/myisam/mi_search.c
index 968cb9624a6..01fa10de7a3 100644
--- a/storage/myisam/mi_search.c
+++ b/storage/myisam/mi_search.c
@@ -949,9 +949,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
                  ("Found too long binary packed key: %u of %u at 0x%lx",
                   length, keyinfo->maxlength, (long) *page_pos));
       DBUG_DUMP("key", *page_pos, 16);
-      mi_print_error(keyinfo->share, HA_ERR_CRASHED);
-      my_errno=HA_ERR_CRASHED;
-      DBUG_RETURN(0);                                 /* Wrong key */
+      goto crashed;                                  /* Wrong key */
     }
     /* Key is packed against prev key, take prefix from prev key. */
     from= key;
@@ -994,6 +992,8 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
         if (from == from_end) { from=page;  from_end=page_end; }
         length+= (uint) ((*key++ = *from++));
       }
+      if (length > keyseg->length)
+        goto crashed;
     }
     else
       length=keyseg->length;
@@ -1033,15 +1033,18 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
     if (from_end != page_end)
     {
       DBUG_PRINT("error",("Error when unpacking key"));
-      mi_print_error(keyinfo->share, HA_ERR_CRASHED);
-      my_errno=HA_ERR_CRASHED;
-      DBUG_RETURN(0);                                 /* Error */
+      goto crashed;                                 /* Error */
     }
     /* Copy data pointer and, if appropriate, key block pointer. */
     memcpy((uchar*) key,(uchar*) from,(size_t) length);
     *page_pos= from+length;
   }
   DBUG_RETURN((uint) (key-start_key)+keyseg->length);
+
+  crashed:
+    mi_print_error(keyinfo->share, HA_ERR_CRASHED);
+    my_errno= HA_ERR_CRASHED;
+    DBUG_RETURN(0);
 }
 
 
diff --git a/storage/myisam/mi_test1.c b/storage/myisam/mi_test1.c
index 3b2597eb01e..9e4e1c46891 100644
--- a/storage/myisam/mi_test1.c
+++ b/storage/myisam/mi_test1.c
@@ -439,7 +439,7 @@ static void create_record(uchar *record,uint rownr)
     uint tmp;
     uchar *ptr;;
     sprintf((char*) blob_record,"... row: %d", rownr);
-    strappend((char*) blob_record,max(MAX_REC_LENGTH-rownr,10),' ');
+    strappend((char*) blob_record,MY_MAX(MAX_REC_LENGTH-rownr,10),' ');
     tmp=strlen((char*) blob_record);
     int4store(pos,tmp);
     ptr=blob_record;
diff --git a/storage/myisam/mi_test2.c b/storage/myisam/mi_test2.c
index 3ec12ef5cca..e53c68874b2 100644
--- a/storage/myisam/mi_test2.c
+++ b/storage/myisam/mi_test2.c
@@ -597,7 +597,7 @@ int main(int argc, char *argv[])
     goto err;
 
   bmove(read_record2,read_record,reclength);
-  for (i=min(2,keys) ; i-- > 0 ;)
+  for (i=MY_MIN(2,keys) ; i-- > 0 ;)
   {
     if (mi_rsame(file,read_record2,(int) i)) goto err;
     if (memcmp(read_record,read_record2,reclength) != 0)
diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c
index c8546ee56f5..64ffffc3a1e 100644
--- a/storage/myisam/myisamchk.c
+++ b/storage/myisam/myisamchk.c
@@ -16,6 +16,7 @@
 /* Describe, check and repair of MyISAM tables */
 
 #include "fulltext.h"
+#include "my_default.h"
 #include <m_ctype.h>
 #include <stdarg.h>
 #include <my_getopt.h>
diff --git a/storage/myisam/myisamlog.c b/storage/myisam/myisamlog.c
index 1624213851b..86e1978edaa 100644
--- a/storage/myisam/myisamlog.c
+++ b/storage/myisam/myisamlog.c
@@ -91,7 +91,7 @@ int main(int argc, char **argv)
   log_filename=myisam_log_filename;
   get_options(&argc,&argv);
   /* Number of MyISAM files we can have open at one time */
-  max_files= (my_set_max_open_files(min(max_files,8))-6)/2;
+  max_files= (my_set_max_open_files(MY_MIN(max_files,8))-6)/2;
   if (update)
     printf("Trying to %s MyISAM files according to log '%s'\n",
 	   (recover ? "recover" : "update"),log_filename);
diff --git a/storage/myisam/myisampack.c b/storage/myisam/myisampack.c
index 6ce88db87f5..c52bef1e40e 100644
--- a/storage/myisam/myisampack.c
+++ b/storage/myisam/myisampack.c
@@ -20,6 +20,7 @@
 #endif
 
 #include "myisamdef.h"
+#include "my_default.h"
 #include <queues.h>
 #include <my_tree.h>
 #include "mysys_err.h"
@@ -783,7 +784,7 @@ static int create_dest_frm(char *source_table, char *dest_table)
   */
   (void) my_copy(source_name, dest_name, MYF(MY_DONT_OVERWRITE_FILE));
   
-  return 0;
+  DBUG_RETURN(0);
 }
 
 
@@ -1269,7 +1270,7 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
     {
       if (huff_counts->field_length > 2 &&
 	  huff_counts->empty_fields + (records - huff_counts->empty_fields)*
-	  (1+max_bit(max(huff_counts->max_pre_space,
+	  (1+max_bit(MY_MAX(huff_counts->max_pre_space,
 			 huff_counts->max_end_space))) <
 	  records * max_bit(huff_counts->field_length))
       {
@@ -3022,7 +3023,7 @@ static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length,
   if (mrg->src_file_has_indexes_disabled)
   {
     isam_file->s->state.state.key_file_length=
-      max(isam_file->s->state.state.key_file_length, new_length);
+      MY_MAX(isam_file->s->state.state.key_file_length, new_length);
   }
   state.dellink= HA_OFFSET_ERROR;
   state.version=(ulong) time((time_t*) 0);
diff --git a/storage/myisam/rt_mbr.c b/storage/myisam/rt_mbr.c
index deca23bbec7..90569f4a5fc 100644
--- a/storage/myisam/rt_mbr.c
+++ b/storage/myisam/rt_mbr.c
@@ -325,8 +325,8 @@ int rtree_d_mbr(HA_KEYSEG *keyseg, uchar *a, uint key_length, double *res)
   bmin = korr_func(b); \
   amax = korr_func(a+len); \
   bmax = korr_func(b+len); \
-  amin = min(amin, bmin); \
-  amax = max(amax, bmax); \
+  amin = MY_MIN(amin, bmin); \
+  amax = MY_MAX(amax, bmax); \
   store_func(c, amin); \
   store_func(c+len, amax); \
 }
@@ -338,8 +338,8 @@ int rtree_d_mbr(HA_KEYSEG *keyseg, uchar *a, uint key_length, double *res)
   get_func(bmin, b); \
   get_func(amax, a+len); \
   get_func(bmax, b+len); \
-  amin = min(amin, bmin); \
-  amax = max(amax, bmax); \
+  amin = MY_MIN(amin, bmin); \
+  amax = MY_MAX(amax, bmax); \
   store_func(c, amin); \
   store_func(c+len, amax); \
 }
@@ -417,8 +417,8 @@ int rtree_combine_rect(HA_KEYSEG *keyseg, uchar* a, uchar* b, uchar* c,
   bmin = korr_func(b); \
   amax = korr_func(a+len); \
   bmax = korr_func(b+len); \
-  amin = max(amin, bmin); \
-  amax = min(amax, bmax); \
+  amin = MY_MAX(amin, bmin); \
+  amax = MY_MIN(amax, bmax); \
   if (amin >= amax) \
     return 0; \
   res *= amax - amin; \
@@ -431,8 +431,8 @@ int rtree_combine_rect(HA_KEYSEG *keyseg, uchar* a, uchar* b, uchar* c,
   get_func(bmin, b); \
   get_func(amax, a+len); \
   get_func(bmax, b+len); \
-  amin = max(amin, bmin); \
-  amax = min(amax, bmax); \
+  amin = MY_MAX(amin, bmin); \
+  amax = MY_MIN(amax, bmax); \
   if (amin >= amax)  \
     return 0; \
   res *= amax - amin; \
@@ -508,7 +508,7 @@ double rtree_overlapping_area(HA_KEYSEG *keyseg, uchar* a, uchar* b,
    amax = korr_func(a+len); \
    bmax = korr_func(b+len); \
    a_area *= (((double)amax) - ((double)amin)); \
-   loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+   loc_ab_area *= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
 }
 
 #define RT_AREA_INC_GET(type, get_func, len)\
@@ -519,7 +519,7 @@ double rtree_overlapping_area(HA_KEYSEG *keyseg, uchar* a, uchar* b,
    get_func(amax, a+len); \
    get_func(bmax, b+len); \
    a_area *= (((double)amax) - ((double)amin)); \
-   loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+   loc_ab_area *= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
 }
 
 /*
@@ -604,7 +604,7 @@ safe_end:
    amax = korr_func(a+len); \
    bmax = korr_func(b+len); \
    a_perim+= (((double)amax) - ((double)amin)); \
-   *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+   *ab_perim+= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
 }
 
 #define RT_PERIM_INC_GET(type, get_func, len)\
@@ -615,7 +615,7 @@ safe_end:
    get_func(amax, a+len); \
    get_func(bmax, b+len); \
    a_perim+= (((double)amax) - ((double)amin)); \
-   *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+   *ab_perim+= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
 }
 
 /*
diff --git a/storage/myisam/sort.c b/storage/myisam/sort.c
index 4af45ea02e9..6a328f9ef4e 100644
--- a/storage/myisam/sort.c
+++ b/storage/myisam/sort.c
@@ -130,7 +130,7 @@ int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages,
   sort_keys= (uchar **) NULL; error= 1;
   maxbuffer=1;
 
-  memavl= max(sortbuff_size, MIN_SORT_BUFFER);
+  memavl= MY_MAX(sortbuff_size, MIN_SORT_BUFFER);
   records=	info->sort_info->max_records;
   sort_length=	info->key_length;
   LINT_INIT(keys);
@@ -351,7 +351,7 @@ pthread_handler_t thr_find_all_keys(void *arg)
     bzero((char*) &sort_param->unique,  sizeof(sort_param->unique));
     sort_keys= (uchar **) NULL;
 
-    memavl=       max(sort_param->sortbuff_size, MIN_SORT_BUFFER);
+    memavl=       MY_MAX(sort_param->sortbuff_size, MIN_SORT_BUFFER);
     idx=          (uint)sort_param->sort_info->max_records;
     sort_length=  sort_param->key_length;
     maxbuffer=    1;
@@ -824,7 +824,7 @@ static uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
   register uint count;
   uint length;
 
-  if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
+  if ((count=(uint) MY_MIN((ha_rows) buffpek->max_keys,buffpek->count)))
   {
     if (mysql_file_pread(fromfile->file, (uchar*) buffpek->base,
                          (length= sort_length*count),
@@ -846,7 +846,7 @@ static uint read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek,
   uint idx;
   uchar *buffp;
 
-  if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
+  if ((count=(uint) MY_MIN((ha_rows) buffpek->max_keys,buffpek->count)))
   {
     buffp = buffpek->base;
 
diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc
index 7bb7990d9a3..0971e9297d5 100644
--- a/storage/myisammrg/ha_myisammrg.cc
+++ b/storage/myisammrg/ha_myisammrg.cc
@@ -1304,7 +1304,7 @@ int ha_myisammrg::info(uint flag)
       memcpy((char*) table->key_info[0].rec_per_key,
 	     (char*) mrg_info.rec_per_key,
              sizeof(table->key_info[0].rec_per_key[0]) *
-             min(file->keys, table->s->key_parts));
+             MY_MIN(file->keys, table->s->key_parts));
     }
   }
   if (flag & HA_STATUS_ERRKEY)
diff --git a/storage/perfschema/CMakeLists.txt b/storage/perfschema/CMakeLists.txt
index 0c9713d45d4..ef644030317 100644
--- a/storage/perfschema/CMakeLists.txt
+++ b/storage/perfschema/CMakeLists.txt
@@ -118,6 +118,10 @@ table_tiws_by_index_usage.h
 table_tiws_by_table.h
 table_tlws_by_table.h
 table_users.h
+cursor_by_thread_connect_attr.h
+table_session_connect.h
+table_session_connect_attrs.h
+table_session_account_connect_attrs.h
 cursor_by_account.cc
 cursor_by_host.cc
 cursor_by_thread.cc
@@ -126,6 +130,7 @@ ha_perfschema.cc
 pfs.cc
 pfs_account.cc
 pfs_atomic.cc
+pfs_autosize.cc
 pfs_check.cc
 pfs_column_values.cc
 pfs_con_slice.cc
@@ -189,6 +194,10 @@ table_tiws_by_index_usage.cc
 table_tiws_by_table.cc
 table_tlws_by_table.cc
 table_users.cc
+cursor_by_thread_connect_attr.cc
+table_session_connect.cc
+table_session_connect_attrs.cc
+table_session_account_connect_attrs.cc
 )
 
 MYSQL_ADD_PLUGIN(perfschema ${PERFSCHEMA_SOURCES} STORAGE_ENGINE DEFAULT STATIC_ONLY)
diff --git a/storage/perfschema/cursor_by_thread_connect_attr.cc b/storage/perfschema/cursor_by_thread_connect_attr.cc
new file mode 100644
index 00000000000..7a0dd04119d
--- /dev/null
+++ b/storage/perfschema/cursor_by_thread_connect_attr.cc
@@ -0,0 +1,71 @@
+/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software Foundation,
+  51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#include "my_global.h"
+#include "cursor_by_thread_connect_attr.h"
+
+cursor_by_thread_connect_attr::cursor_by_thread_connect_attr(
+  const PFS_engine_table_share *share) :
+  PFS_engine_table(share, &m_pos), m_row_exists(false)
+{}
+
+int cursor_by_thread_connect_attr::rnd_next(void)
+{
+  PFS_thread *thread;
+
+  for (m_pos.set_at(&m_next_pos);
+       m_pos.has_more_thread();
+       m_pos.next_thread())
+  {
+    thread= &thread_array[m_pos.m_index_1];
+
+    if (thread->m_lock.is_populated())
+    {
+      make_row(thread, m_pos.m_index_2);
+      if (m_row_exists)
+      {
+        m_next_pos.set_after(&m_pos);
+        return 0;
+      }
+    }
+  }
+  return HA_ERR_END_OF_FILE;
+}
+
+
+int cursor_by_thread_connect_attr::rnd_pos(const void *pos)
+{
+  PFS_thread *thread;
+
+  set_position(pos);
+  DBUG_ASSERT(m_pos.m_index_1 < thread_max);
+
+  thread= &thread_array[m_pos.m_index_1];
+  if (!thread->m_lock.is_populated())
+    return HA_ERR_RECORD_DELETED;
+
+  make_row(thread, m_pos.m_index_2);
+  if (m_row_exists)
+    return 0;
+
+  return HA_ERR_RECORD_DELETED;
+}
+
+
+void cursor_by_thread_connect_attr::reset_position(void)
+{
+  m_pos.reset();
+  m_next_pos.reset();
+}
diff --git a/storage/perfschema/cursor_by_thread_connect_attr.h b/storage/perfschema/cursor_by_thread_connect_attr.h
new file mode 100644
index 00000000000..fbce56f208d
--- /dev/null
+++ b/storage/perfschema/cursor_by_thread_connect_attr.h
@@ -0,0 +1,81 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software Foundation,
+  51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#ifndef CURSOR_BY_THREAD_CONNECT_ATTR_H
+#define CURSOR_BY_THREAD_CONNECT_ATTR_H
+
+#include "pfs_column_types.h"
+#include "pfs_engine_table.h"
+#include "pfs_instr.h"
+
+/**
+  \addtogroup Performance_schema_tables
+  @{
+*/
+
+struct pos_connect_attr_by_thread_by_attr
+: public PFS_double_index
+{
+  pos_connect_attr_by_thread_by_attr()
+    : PFS_double_index(0, 0)
+  {}
+
+  inline bool has_more_thread(void)
+  {
+    return (m_index_1 < thread_max);
+  }
+
+  inline void next_thread(void)
+  {
+    m_index_1++;
+    m_index_2= 0;
+  }
+
+  inline void reset(void)
+  {
+    m_index_1= 0;
+    m_index_2= 0;
+  }
+};
+
+/** Cursor CURSOR_BY_THREAD_CONNECT_ATTR. */
+class cursor_by_thread_connect_attr : public PFS_engine_table
+{
+public:
+  virtual int rnd_next();
+  virtual int rnd_pos(const void *pos);
+  virtual void reset_position(void);
+
+protected:
+  cursor_by_thread_connect_attr(const PFS_engine_table_share *share);
+
+public:
+  ~cursor_by_thread_connect_attr()
+  {}
+
+protected:
+  virtual void make_row(PFS_thread *thread, uint ordinal)= 0;
+  /** True if row exists */
+  bool m_row_exists;
+
+private:
+  /** Current position. */
+  pos_connect_attr_by_thread_by_attr m_pos;
+  /** Next position. */
+  pos_connect_attr_by_thread_by_attr m_next_pos;
+};
+
+/** @} */
+#endif
diff --git a/storage/perfschema/gen_pfs_lex_token.cc b/storage/perfschema/gen_pfs_lex_token.cc
index b7470061de1..7581255b284 100644
--- a/storage/perfschema/gen_pfs_lex_token.cc
+++ b/storage/perfschema/gen_pfs_lex_token.cc
@@ -1,5 +1,5 @@
 /*
-   Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -243,7 +243,7 @@ void print_tokens()
 int main(int argc,char **argv)
 {
   puts("/*");
-  puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2011, 2012"));
+  puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2011"));
   puts("*/");
 
   printf("/*\n");
diff --git a/storage/perfschema/ha_perfschema.cc b/storage/perfschema/ha_perfschema.cc
index 773d822af2b..50bdb043566 100644
--- a/storage/perfschema/ha_perfschema.cc
+++ b/storage/perfschema/ha_perfschema.cc
@@ -166,6 +166,8 @@ static struct st_mysql_show_var pfs_status_vars[]=
     (char*) &statement_class_lost, SHOW_LONG},
   {"Performance_schema_digest_lost",
     (char*) &digest_lost, SHOW_LONG},
+  {"Performance_schema_session_connect_attrs_lost",
+    (char*) &session_connect_attrs_lost, SHOW_LONG},
   {NullS, NullS, SHOW_LONG}
 };
 
@@ -256,12 +258,12 @@ int ha_perfschema::write_row(uchar *buf)
   int result;
 
   DBUG_ENTER("ha_perfschema::write_row");
+  if (!pfs_initialized)
+    DBUG_RETURN(HA_ERR_WRONG_COMMAND);
 
-  ha_statistic_increment(&SSV::ha_write_count);
   DBUG_ASSERT(m_table_share);
-
+  ha_statistic_increment(&SSV::ha_write_count);
   result= m_table_share->write_row(table, buf, table->field);
-
   DBUG_RETURN(result);
 }
 
@@ -279,7 +281,9 @@ void ha_perfschema::use_hidden_primary_key(void)
 int ha_perfschema::update_row(const uchar *old_data, uchar *new_data)
 {
   DBUG_ENTER("ha_perfschema::update_row");
-
+  if (!pfs_initialized)
+    DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+  
   DBUG_ASSERT(m_table);
   ha_statistic_increment(&SSV::ha_update_count);
   int result= m_table->update_row(table, old_data, new_data, table->field);
@@ -289,6 +293,8 @@ int ha_perfschema::update_row(const uchar *old_data, uchar *new_data)
 int ha_perfschema::delete_row(const uchar *buf)
 {
   DBUG_ENTER("ha_perfschema::delete_row");
+  if (!pfs_initialized)
+    DBUG_RETURN(HA_ERR_WRONG_COMMAND);
 
   DBUG_ASSERT(m_table);
   ha_statistic_increment(&SSV::ha_delete_count);
@@ -329,6 +335,8 @@ int ha_perfschema::rnd_end(void)
 int ha_perfschema::rnd_next(uchar *buf)
 {
   DBUG_ENTER("ha_perfschema::rnd_next");
+  if (!pfs_initialized)
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
 
   DBUG_ASSERT(m_table);
   ha_statistic_increment(&SSV::ha_read_rnd_next_count);
@@ -355,6 +363,8 @@ void ha_perfschema::position(const uchar *record)
 int ha_perfschema::rnd_pos(uchar *buf, uchar *pos)
 {
   DBUG_ENTER("ha_perfschema::rnd_pos");
+  if (!pfs_initialized)
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
 
   DBUG_ASSERT(m_table);
   ha_statistic_increment(&SSV::ha_read_rnd_count);
@@ -380,6 +390,8 @@ int ha_perfschema::delete_all_rows(void)
   int result;
 
   DBUG_ENTER("ha_perfschema::delete_all_rows");
+  if (!pfs_initialized)
+    DBUG_RETURN(0);
 
   DBUG_ASSERT(m_table_share);
   if (m_table_share->m_delete_all_rows)
diff --git a/storage/perfschema/ha_perfschema.h b/storage/perfschema/ha_perfschema.h
index dc465da3758..c2929046f3d 100644
--- a/storage/perfschema/ha_perfschema.h
+++ b/storage/perfschema/ha_perfschema.h
@@ -72,8 +72,7 @@ public:
       records.
     */
     return (HA_NO_TRANSACTIONS | HA_REC_NOT_IN_SEQ | HA_NO_AUTO_INCREMENT |
-            HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
-            HA_PRIMARY_KEY_REQUIRED_FOR_DELETE | HA_HAS_OWN_BINLOGGING);
+            HA_PRIMARY_KEY_REQUIRED_FOR_DELETE);
   }
 
   /**
diff --git a/storage/perfschema/pfs.cc b/storage/perfschema/pfs.cc
index d3de38d025c..33b21ee2817 100644
--- a/storage/perfschema/pfs.cc
+++ b/storage/perfschema/pfs.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -688,6 +688,7 @@ static inline int mysql_mutex_lock(...)
   - socket io (MYSQL_SOCKET)
   - table io
   - table lock
+  - idle
 
   The flow of data between aggregates tables varies for each instrumentation.
 
@@ -857,24 +858,35 @@ static inline int mysql_mutex_lock(...)
   @subsection IMPL_WAIT_SOCKET Socket waits
 
 @verbatim
-  socket_locker(T, F)
+  socket_locker(T, S)
    |
    | [1]
    |
-   |-> pfs_socket(F)                            =====>> [A], [B], [C], [D], [E]
+   |-> pfs_socket(S)                            =====>> [A], [B], [C], [D], [E]
         |
         | [2]
         |
-        |-> pfs_socket_class(F.class)           =====>> [C], [D]
+        |-> pfs_socket_class(S.class)           =====>> [C], [D]
         |
-        |-> pfs_thread(T).event_name(F)         =====>> [A]
+        |-> pfs_thread(T).event_name(S)         =====>> [A]
         |
-        ...
+        | [3]
+        |
+     3a |-> pfs_account(U, H).event_name(S)     =====>> [F], [G], [H]
+        .    |
+        .    | [4-RESET]
+        .    |
+     3b .....+-> pfs_user(U).event_name(S)      =====>> [G]
+        .    |
+     3c .....+-> pfs_host(H).event_name(S)      =====>> [H]
 @endverbatim
 
   Implemented as:
   - [1] @c start_socket_wait_v1(), @c end_socket_wait_v1().
   - [2] @c close_socket_v1()
+  - [3] @c aggregate_thread_waits()
+  - [4] @c PFS_account::aggregate_waits()
+  - [5] @c PFS_host::aggregate_waits()
   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
         @c table_ews_by_thread_by_event_name::make_row()
   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
@@ -885,37 +897,78 @@ static inline int mysql_mutex_lock(...)
         @c table_socket_summary_by_event_name::make_row()
   - [E] SOCKET_SUMMARY_BY_INSTANCE,
         @c table_socket_summary_by_instance::make_row()
+  - [F] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
+        @c table_ews_by_account_by_event_name::make_row()
+  - [G] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
+        @c table_ews_by_user_by_event_name::make_row()
+  - [H] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
+        @c table_ews_by_host_by_event_name::make_row()
 
   @subsection IMPL_WAIT_TABLE Table waits
 
 @verbatim
-  table_locker(T, Tb)
+  table_locker(Thread Th, Table Tb, Event = io or lock)
    |
    | [1]
    |
-   |-> pfs_table(Tb)                          =====>> [B], [C], [D]
-        |
-        | [2]
-        |
-        |-> pfs_table_share(Tb.share)         =====>> [C], [D]
-        |
-        |-> pfs_thread(T).event_name(Tb)      =====>> [A]
-             |
-            ...
+1a |-> pfs_table(Tb)                          =====>> [A], [B], [C]
+   |    |
+   |    | [2]
+   |    |
+   |    |-> pfs_table_share(Tb.share)         =====>> [B], [C]
+   |         |
+   |         | [3]
+   |         |
+   |         |-> global_table_io_stat         =====>> [C]
+   |         |
+   |         |-> global_table_lock_stat       =====>> [C]
+   |
+1b |-> pfs_thread(Th).event_name(E)           =====>> [D], [E], [F], [G]
+   |    |
+   |    | [ 4-RESET]
+   |    |
+   |    |-> pfs_account(U, H).event_name(E)   =====>> [E], [F], [G]
+   |    .    |
+   |    .    | [5-RESET]
+   |    .    |
+   |    .....+-> pfs_user(U).event_name(E)    =====>> [F]
+   |    .    |
+   |    .....+-> pfs_host(H).event_name(E)    =====>> [G]
+   |
+1c |-> pfs_thread(Th).waits_current(W)        =====>> [H]
+   |
+1d |-> pfs_thread(Th).waits_history(W)        =====>> [I]
+   |
+1e |-> waits_history_long(W)                  =====>> [J]
 @endverbatim
 
   Implemented as:
   - [1] @c start_table_io_wait_v1(), @c end_table_io_wait_v1()
   - [2] @c close_table_v1()
-  - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
-        @c table_ews_by_thread_by_event_name::make_row()
-  - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
+  - [3] @c drop_table_share_v1()
+  - [4] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
+  - [5] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
+  - [A] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
         @c table_events_waits_summary_by_instance::make_table_row()
+  - [B] OBJECTS_SUMMARY_GLOBAL_BY_TYPE,
+        @c table_os_global_by_type::make_row()
   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
         @c table_ews_global_by_event_name::make_table_io_row(),
         @c table_ews_global_by_event_name::make_table_lock_row()
-  - [D] OBJECTS_SUMMARY_GLOBAL_BY_TYPE,
-        @c table_os_global_by_type::make_row()
+  - [D] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
+        @c table_ews_by_thread_by_event_name::make_row()
+  - [E] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
+        @c table_ews_by_user_by_account_name::make_row()
+  - [F] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
+        @c table_ews_by_user_by_event_name::make_row()
+  - [G] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
+        @c table_ews_by_host_by_event_name::make_row()
+  - [H] EVENTS_WAITS_CURRENT,
+        @c table_events_waits_common::make_row()
+  - [I] EVENTS_WAITS_HISTORY,
+        @c table_events_waits_common::make_row()
+  - [J] EVENTS_WAITS_HISTORY_LONG,
+        @c table_events_waits_common::make_row()
 
   @section IMPL_STAGE Implementation for stages aggregates
 
@@ -1594,7 +1647,6 @@ static void unbind_table_v1(PSI_table *table)
   PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
   if (likely(pfs != NULL))
   {
-    pfs->aggregate();
     pfs->m_thread_owner= NULL;
   }
 }
@@ -1615,12 +1667,6 @@ rebind_table_v1(PSI_table_share *share, const void *identity, PSI_table *table)
     /* The table handle was already instrumented, reuse it for this thread. */
     thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
 
-    if (unlikely(thread == NULL))
-    {
-      destroy_table(pfs);
-      return NULL;
-    }
-
     if (unlikely(! pfs->m_share->m_enabled))
     {
       destroy_table(pfs);
@@ -1660,8 +1706,6 @@ rebind_table_v1(PSI_table_share *share, const void *identity, PSI_table *table)
     return NULL;
 
   PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
-  if (unlikely(thread == NULL))
-    return NULL;
 
   PFS_table *pfs_table= create_table(pfs_table_share, thread, identity);
   return reinterpret_cast<PSI_table *> (pfs_table);
@@ -1681,9 +1725,18 @@ static void close_table_v1(PSI_table *table)
 }
 
 static PSI_socket*
-init_socket_v1(PSI_socket_key key, const my_socket *fd)
+init_socket_v1(PSI_socket_key key, const my_socket *fd,
+               const struct sockaddr *addr, socklen_t addr_len)
 {
-  INIT_BODY_V1(socket, key, fd);
+  PFS_socket_class *klass;
+  PFS_socket *pfs;
+  klass= find_socket_class(key);
+  if (unlikely(klass == NULL))
+    return NULL;
+  if (! klass->m_enabled)
+    return NULL;
+  pfs= create_socket(klass, fd, addr, addr_len);
+  return reinterpret_cast<PSI_socket *> (pfs);
 }
 
 static void destroy_socket_v1(PSI_socket *socket)
@@ -1731,7 +1784,7 @@ static void create_file_v1(PSI_file_key key, const char *name, File file)
   }
 
   uint len= strlen(name);
-  PFS_file *pfs_file= find_or_create_file(pfs_thread, klass, name, len);
+  PFS_file *pfs_file= find_or_create_file(pfs_thread, klass, name, len, true);
 
   file_handle_array[index]= pfs_file;
 }
@@ -1835,13 +1888,13 @@ static int spawn_thread_v1(PSI_thread_key key,
   @sa PSI_v1::new_thread.
 */
 static PSI_thread*
-new_thread_v1(PSI_thread_key key, const void *identity, ulong thread_id)
+new_thread_v1(PSI_thread_key key, const void *identity, ulonglong processlist_id)
 {
   PFS_thread *pfs;
 
   PFS_thread_class *klass= find_thread_class(key);
   if (likely(klass != NULL))
-    pfs= create_thread(klass, identity, thread_id);
+    pfs= create_thread(klass, identity, processlist_id);
   else
     pfs= NULL;
 
@@ -1852,12 +1905,12 @@ new_thread_v1(PSI_thread_key key, const void *identity, ulong thread_id)
   Implementation of the thread instrumentation interface.
   @sa PSI_v1::set_thread_id.
 */
-static void set_thread_id_v1(PSI_thread *thread, unsigned long id)
+static void set_thread_id_v1(PSI_thread *thread, ulonglong processlist_id)
 {
   PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
   if (unlikely(pfs == NULL))
     return;
-  pfs->m_thread_id= id;
+  pfs->m_processlist_id= processlist_id;
 }
 
 /**
@@ -2045,10 +2098,10 @@ static void set_thread_state_v1(const char* state)
   {
     int state_len= state ? strlen(state) : 0;
 
-    pfs->m_lock.allocated_to_dirty();
+    pfs->m_processlist_lock.allocated_to_dirty();
     pfs->m_processlist_state_ptr= state;
     pfs->m_processlist_state_length= state_len;
-    pfs->m_lock.dirty_to_allocated();
+    pfs->m_processlist_lock.dirty_to_allocated();
   }
 }
 
@@ -2060,12 +2113,14 @@ static void set_thread_info_v1(const char* info, int info_len)
 {
   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
 
+  DBUG_ASSERT((info != NULL) || (info_len == 0));
+
   if (likely(pfs != NULL))
   {
-    pfs->m_lock.allocated_to_dirty();
+    pfs->m_processlist_lock.allocated_to_dirty();
     pfs->m_processlist_info_ptr= info;
     pfs->m_processlist_info_length= info_len;
-    pfs->m_lock.dirty_to_allocated();
+    pfs->m_processlist_lock.dirty_to_allocated();
   }
 }
 
@@ -2196,7 +2251,7 @@ start_mutex_wait_v1(PSI_mutex_locker_state *state,
         Complete shortcut.
       */
       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
-      pfs_mutex->m_wait_stat.aggregate_counted();
+      pfs_mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
       return NULL;
     }
   }
@@ -2294,7 +2349,7 @@ start_rwlock_wait_v1(PSI_rwlock_locker_state *state,
         Complete shortcut.
       */
       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
-      pfs_rwlock->m_wait_stat.aggregate_counted();
+      pfs_rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
       return NULL;
     }
   }
@@ -2401,7 +2456,7 @@ start_cond_wait_v1(PSI_cond_locker_state *state,
         Complete shortcut.
       */
       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
-      pfs_cond->m_wait_stat.aggregate_counted();
+      pfs_cond->m_cond_stat.m_wait_stat.aggregate_counted();
       return NULL;
     }
   }
@@ -2478,8 +2533,6 @@ start_table_io_wait_v1(PSI_table_locker_state *state,
     return NULL;
 
   PFS_thread *pfs_thread= pfs_table->m_thread_owner;
-  if (unlikely(pfs_thread == NULL))
-    return NULL;
 
   DBUG_ASSERT(pfs_thread ==
               my_pthread_getspecific_ptr(PFS_thread*, THR_PFS));
@@ -2489,6 +2542,8 @@ start_table_io_wait_v1(PSI_table_locker_state *state,
 
   if (flag_thread_instrumentation)
   {
+    if (pfs_thread == NULL)
+      return NULL;
     if (! pfs_thread->m_enabled)
       return NULL;
     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
@@ -2538,7 +2593,6 @@ start_table_io_wait_v1(PSI_table_locker_state *state,
 
       pfs_thread->m_events_waits_current++;
     }
-    /* TODO: consider a shortcut here */
   }
   else
   {
@@ -2585,11 +2639,6 @@ start_table_lock_wait_v1(PSI_table_locker_state *state,
     return NULL;
 
   PFS_thread *pfs_thread= pfs_table->m_thread_owner;
-  if (unlikely(pfs_thread == NULL))
-    return NULL;
-
-  DBUG_ASSERT(pfs_thread ==
-              my_pthread_getspecific_ptr(PFS_thread*, THR_PFS));
 
   PFS_TL_LOCK_TYPE lock_type;
 
@@ -2619,6 +2668,8 @@ start_table_lock_wait_v1(PSI_table_locker_state *state,
 
   if (flag_thread_instrumentation)
   {
+    if (pfs_thread == NULL)
+      return NULL;
     if (! pfs_thread->m_enabled)
       return NULL;
     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
@@ -2668,7 +2719,6 @@ start_table_lock_wait_v1(PSI_table_locker_state *state,
 
       pfs_thread->m_events_waits_current++;
     }
-    /* TODO: consider a shortcut here */
   }
   else
   {
@@ -2729,11 +2779,6 @@ get_thread_file_name_locker_v1(PSI_file_locker_state *state,
   if (klass->m_timed)
     flags|= STATE_FLAG_TIMED;
 
-  uint len= strlen(name);
-  PFS_file *pfs_file= find_or_create_file(pfs_thread, klass, name, len);
-  if (unlikely(pfs_file == NULL))
-    return NULL;
-
   if (flag_events_waits_current)
   {
     if (unlikely(pfs_thread->m_events_waits_current >=
@@ -2755,9 +2800,9 @@ get_thread_file_name_locker_v1(PSI_file_locker_state *state,
     wait->m_class= klass;
     wait->m_timer_start= 0;
     wait->m_timer_end= 0;
-    wait->m_object_instance_addr= pfs_file;
-    wait->m_weak_file= pfs_file;
-    wait->m_weak_version= pfs_file->get_version();
+    wait->m_object_instance_addr= NULL;
+    wait->m_weak_file= NULL;
+    wait->m_weak_version= 0;
     wait->m_event_id= pfs_thread->m_event_id++;
     wait->m_end_event_id= 0;
     wait->m_operation= file_operation_map[static_cast<int> (op)];
@@ -2767,7 +2812,9 @@ get_thread_file_name_locker_v1(PSI_file_locker_state *state,
   }
 
   state->m_flags= flags;
-  state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
+  state->m_file= NULL;
+  state->m_name= name;
+  state->m_class= klass;
   state->m_operation= op;
   return reinterpret_cast<PSI_file_locker*> (state);
 }
@@ -2788,6 +2835,7 @@ get_thread_file_stream_locker_v1(PSI_file_locker_state *state,
   if (unlikely(pfs_file == NULL))
     return NULL;
   DBUG_ASSERT(pfs_file->m_class != NULL);
+  PFS_file_class *klass= pfs_file->m_class;
 
   if (! pfs_file->m_enabled)
     return NULL;
@@ -2825,7 +2873,7 @@ get_thread_file_stream_locker_v1(PSI_file_locker_state *state,
       wait->m_nesting_event_type= parent_event->m_event_type;
 
       wait->m_thread= pfs_thread;
-      wait->m_class= pfs_file->m_class;
+      wait->m_class= klass;
       wait->m_timer_start= 0;
       wait->m_timer_end= 0;
       wait->m_object_instance_addr= pfs_file;
@@ -2856,6 +2904,8 @@ get_thread_file_stream_locker_v1(PSI_file_locker_state *state,
   state->m_flags= flags;
   state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
   state->m_operation= op;
+  state->m_name= NULL;
+  state->m_class= klass;
   return reinterpret_cast<PSI_file_locker*> (state);
 }
 
@@ -2890,10 +2940,12 @@ get_thread_file_descriptor_locker_v1(PSI_file_locker_state *state,
   if (op == PSI_FILE_CLOSE)
     file_handle_array[index]= NULL;
 
-  DBUG_ASSERT(pfs_file->m_class != NULL);
   if (! pfs_file->m_enabled)
     return NULL;
 
+  DBUG_ASSERT(pfs_file->m_class != NULL);
+  PFS_file_class *klass= pfs_file->m_class;
+
   register uint flags;
 
   if (flag_thread_instrumentation)
@@ -2927,7 +2979,7 @@ get_thread_file_descriptor_locker_v1(PSI_file_locker_state *state,
       wait->m_nesting_event_type= parent_event->m_event_type;
 
       wait->m_thread= pfs_thread;
-      wait->m_class= pfs_file->m_class;
+      wait->m_class= klass;
       wait->m_timer_start= 0;
       wait->m_timer_end= 0;
       wait->m_object_instance_addr= pfs_file;
@@ -2958,6 +3010,8 @@ get_thread_file_descriptor_locker_v1(PSI_file_locker_state *state,
   state->m_flags= flags;
   state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
   state->m_operation= op;
+  state->m_name= NULL;
+  state->m_class= klass;
   return reinterpret_cast<PSI_file_locker*> (state);
 }
 
@@ -2991,14 +3045,6 @@ start_socket_wait_v1(PSI_socket_locker_state *state,
     if (unlikely(pfs_thread == NULL))
       return NULL;
 
-#ifdef LATER
-    /*
-      Needs refinement, because of KILL.
-    */
-    DBUG_ASSERT(pfs_thread ==
-                my_pthread_getspecific_ptr(PFS_thread*, THR_PFS));
-#endif
-
     if (!pfs_thread->m_enabled)
       return NULL;
 
@@ -3112,22 +3158,15 @@ static void unlock_mutex_v1(PSI_mutex *mutex)
     PFS_mutex::m_lock_stat is not exposed in user visible tables
     currently, so there is no point spending time computing it.
   */
-  PFS_thread *pfs_thread= reinterpret_cast<PFS_thread*> (thread);
-  DBUG_ASSERT(pfs_thread != NULL);
-
-  if (unlikely(! flag_events_waits_current))
-    return;
-  if (! pfs_mutex->m_class->m_enabled)
+  if (! pfs_mutex->m_enabled)
     return;
-  if (! pfs_thread->m_enabled)
+
+  if (! pfs_mutex->m_timed)
     return;
 
-  if (pfs_mutex->m_class->m_timed)
-  {
-    ulonglong locked_time;
-    locked_time= get_timer_pico_value(wait_timer) - pfs_mutex->m_last_locked;
-    aggregate_single_stat_chain(&pfs_mutex->m_lock_stat, locked_time);
-  }
+  ulonglong locked_time;
+  locked_time= get_timer_pico_value(wait_timer) - pfs_mutex->m_last_locked;
+  pfs_mutex->m_mutex_stat.m_lock_stat.aggregate_value(locked_time);
 #endif
 }
 
@@ -3185,32 +3224,23 @@ static void unlock_rwlock_v1(PSI_rwlock *rwlock)
 
 #ifdef LATER_WL2333
   /* See WL#2333: SHOW ENGINE ... LOCK STATUS. */
-  PFS_thread *pfs_thread= reinterpret_cast<PFS_thread*> (thread);
-  DBUG_ASSERT(pfs_thread != NULL);
 
-  if (unlikely(! flag_events_waits_current))
-    return;
-  if (! pfs_rwlock->m_class->m_enabled)
+  if (! pfs_rwlock->m_enabled)
     return;
-  if (! pfs_thread->m_enabled)
+
+  if (! pfs_rwlock->m_timed)
     return;
 
   ulonglong locked_time;
   if (last_writer)
   {
-    if (pfs_rwlock->m_class->m_timed)
-    {
-      locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_written;
-      aggregate_single_stat_chain(&pfs_rwlock->m_write_lock_stat, locked_time);
-    }
+    locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_written;
+    pfs_rwlock->m_rwlock_stat.m_write_lock_stat.aggregate_value(locked_time);
   }
   else if (last_reader)
   {
-    if (pfs_rwlock->m_class->m_timed)
-    {
-      locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_read;
-      aggregate_single_stat_chain(&pfs_rwlock->m_read_lock_stat, locked_time);
-    }
+    locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_read;
+    pfs_rwlock->m_rwlock_stat.m_read_lock_stat.aggregate_value(locked_time);
   }
 #else
   (void) last_reader;
@@ -3352,17 +3382,16 @@ static void end_idle_wait_v1(PSI_idle_locker* locker)
     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
     PFS_single_stat *event_name_array;
     event_name_array= thread->m_instr_class_waits_stats;
-    uint index= global_idle_class.m_event_name_index;
 
     if (flags & STATE_FLAG_TIMED)
     {
       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
-      event_name_array[index].aggregate_value(wait_time);
+      event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_value(wait_time);
     }
     else
     {
       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
-      event_name_array[index].aggregate_counted();
+      event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_counted();
     }
 
     if (flags & STATE_FLAG_EVENT)
@@ -3379,6 +3408,17 @@ static void end_idle_wait_v1(PSI_idle_locker* locker)
       thread->m_events_waits_current--;
     }
   }
+
+  if (flags & STATE_FLAG_TIMED)
+  {
+    /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (timed) */
+    global_idle_stat.aggregate_value(wait_time);
+  }
+  else
+  {
+    /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (counted) */
+    global_idle_stat.aggregate_counted();
+  }
 }
 
 /**
@@ -3404,12 +3444,12 @@ static void end_mutex_wait_v1(PSI_mutex_locker* locker, int rc)
     timer_end= state->m_timer();
     wait_time= timer_end - state->m_timer_start;
     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
-    mutex->m_wait_stat.aggregate_value(wait_time);
+    mutex->m_mutex_stat.m_wait_stat.aggregate_value(wait_time);
   }
   else
   {
     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
-    mutex->m_wait_stat.aggregate_counted();
+    mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
   }
 
   if (likely(rc == 0))
@@ -3471,12 +3511,12 @@ static void end_rwlock_rdwait_v1(PSI_rwlock_locker* locker, int rc)
     timer_end= state->m_timer();
     wait_time= timer_end - state->m_timer_start;
     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
-    rwlock->m_wait_stat.aggregate_value(wait_time);
+    rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
   }
   else
   {
     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
-    rwlock->m_wait_stat.aggregate_counted();
+    rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
   }
 
   if (rc == 0)
@@ -3551,12 +3591,12 @@ static void end_rwlock_wrwait_v1(PSI_rwlock_locker* locker, int rc)
     timer_end= state->m_timer();
     wait_time= timer_end - state->m_timer_start;
     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
-    rwlock->m_wait_stat.aggregate_value(wait_time);
+    rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
   }
   else
   {
     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
-    rwlock->m_wait_stat.aggregate_counted();
+    rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
   }
 
   if (likely(rc == 0))
@@ -3622,12 +3662,12 @@ static void end_cond_wait_v1(PSI_cond_locker* locker, int rc)
     timer_end= state->m_timer();
     wait_time= timer_end - state->m_timer_start;
     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
-    cond->m_wait_stat.aggregate_value(wait_time);
+    cond->m_cond_stat.m_wait_stat.aggregate_value(wait_time);
   }
   else
   {
     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
-    cond->m_wait_stat.aggregate_counted();
+    cond->m_cond_stat.m_wait_stat.aggregate_counted();
   }
 
   if (state->m_flags & STATE_FLAG_THREAD)
@@ -3682,23 +3722,27 @@ static void end_table_io_wait_v1(PSI_table_locker* locker)
   DBUG_ASSERT(table != NULL);
 
   PFS_single_stat *stat;
+  PFS_table_io_stat *table_io_stat;
 
   DBUG_ASSERT((state->m_index < table->m_share->m_key_count) ||
-              (state->m_index == MAX_KEY));
+              (state->m_index == MAX_INDEXES));
+
+  table_io_stat= & table->m_table_stat.m_index_stat[state->m_index];
+  table_io_stat->m_has_data= true;
 
   switch (state->m_io_operation)
   {
   case PSI_TABLE_FETCH_ROW:
-    stat= & table->m_table_stat.m_index_stat[state->m_index].m_fetch;
+    stat= & table_io_stat->m_fetch;
     break;
   case PSI_TABLE_WRITE_ROW:
-    stat= & table->m_table_stat.m_index_stat[state->m_index].m_insert;
+    stat= & table_io_stat->m_insert;
     break;
   case PSI_TABLE_UPDATE_ROW:
-    stat= & table->m_table_stat.m_index_stat[state->m_index].m_update;
+    stat= & table_io_stat->m_update;
     break;
   case PSI_TABLE_DELETE_ROW:
-    stat= & table->m_table_stat.m_index_stat[state->m_index].m_delete;
+    stat= & table_io_stat->m_delete;
     break;
   default:
     DBUG_ASSERT(false);
@@ -3719,22 +3763,40 @@ static void end_table_io_wait_v1(PSI_table_locker* locker)
     stat->aggregate_counted();
   }
 
-  if (flags & STATE_FLAG_EVENT)
+  if (flags & STATE_FLAG_THREAD)
   {
-    DBUG_ASSERT(flags & STATE_FLAG_THREAD);
     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
     DBUG_ASSERT(thread != NULL);
 
-    PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
-    DBUG_ASSERT(wait != NULL);
+    PFS_single_stat *event_name_array;
+    event_name_array= thread->m_instr_class_waits_stats;
 
-    wait->m_timer_end= timer_end;
-    wait->m_end_event_id= thread->m_event_id;
-    if (flag_events_waits_history)
-      insert_events_waits_history(thread, wait);
-    if (flag_events_waits_history_long)
-      insert_events_waits_history_long(wait);
-    thread->m_events_waits_current--;
+    /*
+      Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
+      (for wait/io/table/sql/handler)
+    */
+    if (flags & STATE_FLAG_TIMED)
+    {
+      event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_value(wait_time);
+    }
+    else
+    {
+      event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_counted();
+    }
+
+    if (flags & STATE_FLAG_EVENT)
+    {
+      PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
+      DBUG_ASSERT(wait != NULL);
+
+      wait->m_timer_end= timer_end;
+      wait->m_end_event_id= thread->m_event_id;
+      if (flag_events_waits_history)
+        insert_events_waits_history(thread, wait);
+      if (flag_events_waits_history_long)
+        insert_events_waits_history_long(wait);
+      thread->m_events_waits_current--;
+    }
   }
 
   table->m_has_io_stats= true;
@@ -3770,22 +3832,40 @@ static void end_table_lock_wait_v1(PSI_table_locker* locker)
     stat->aggregate_counted();
   }
 
-  if (flags & STATE_FLAG_EVENT)
+  if (flags & STATE_FLAG_THREAD)
   {
-    DBUG_ASSERT(flags & STATE_FLAG_THREAD);
     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
     DBUG_ASSERT(thread != NULL);
 
-    PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
-    DBUG_ASSERT(wait != NULL);
+    PFS_single_stat *event_name_array;
+    event_name_array= thread->m_instr_class_waits_stats;
 
-    wait->m_timer_end= timer_end;
-    wait->m_end_event_id= thread->m_event_id;
-    if (flag_events_waits_history)
-      insert_events_waits_history(thread, wait);
-    if (flag_events_waits_history_long)
-      insert_events_waits_history_long(wait);
-    thread->m_events_waits_current--;
+    /*
+      Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
+      (for wait/lock/table/sql/handler)
+    */
+    if (flags & STATE_FLAG_TIMED)
+    {
+      event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_value(wait_time);
+    }
+    else
+    {
+      event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_counted();
+    }
+
+    if (flags & STATE_FLAG_EVENT)
+    {
+      PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
+      DBUG_ASSERT(wait != NULL);
+
+      wait->m_timer_end= timer_end;
+      wait->m_end_event_id= thread->m_event_id;
+      if (flag_events_waits_history)
+        insert_events_waits_history(thread, wait);
+      if (flag_events_waits_history_long)
+        insert_events_waits_history_long(wait);
+      thread->m_events_waits_current--;
+    }
   }
 
   table->m_has_lock_stats= true;
@@ -3803,25 +3883,50 @@ static void end_file_wait_v1(PSI_file_locker *locker,
   Implementation of the file instrumentation interface.
   @sa PSI_v1::start_file_open_wait.
 */
-static PSI_file* start_file_open_wait_v1(PSI_file_locker *locker,
-                                         const char *src_file,
-                                         uint src_line)
+static void start_file_open_wait_v1(PSI_file_locker *locker,
+                                    const char *src_file,
+                                    uint src_line)
 {
-  PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
-  DBUG_ASSERT(state != NULL);
-
   start_file_wait_v1(locker, 0, src_file, src_line);
 
-  return state->m_file;
+  return;
 }
 
 /**
   Implementation of the file instrumentation interface.
   @sa PSI_v1::end_file_open_wait.
 */
-static void end_file_open_wait_v1(PSI_file_locker *locker)
+static PSI_file* end_file_open_wait_v1(PSI_file_locker *locker,
+                                       void *result)
 {
+  PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
+  DBUG_ASSERT(state != NULL);
+
+  switch (state->m_operation)
+  {
+  case PSI_FILE_STAT:
+    break;
+  case PSI_FILE_STREAM_OPEN:
+  case PSI_FILE_CREATE:
+    if (result != NULL)
+    {
+      PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
+      PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
+      const char *name= state->m_name;
+      uint len= strlen(name);
+      PFS_file *pfs_file= find_or_create_file(thread, klass, name, len, true);
+      state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
+    }
+    break;
+  case PSI_FILE_OPEN:
+  default:
+    DBUG_ASSERT(false);
+    break;
+  }
+
   end_file_wait_v1(locker, 0);
+
+  return state->m_file;
 }
 
 /**
@@ -3831,25 +3936,33 @@ static void end_file_open_wait_v1(PSI_file_locker *locker)
 static void end_file_open_wait_and_bind_to_descriptor_v1
   (PSI_file_locker *locker, File file)
 {
+  PFS_file *pfs_file= NULL;
   int index= (int) file;
   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
   DBUG_ASSERT(state != NULL);
 
-  end_file_wait_v1(locker, 0);
+  if (index >= 0)
+  {
+    PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
+    PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
+    const char *name= state->m_name;
+    uint len= strlen(name);
+    pfs_file= find_or_create_file(thread, klass, name, len, true);
+    state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
+  }
 
-  PFS_file *pfs_file= reinterpret_cast<PFS_file*> (state->m_file);
-  DBUG_ASSERT(pfs_file != NULL);
+  end_file_wait_v1(locker, 0);
 
   if (likely(index >= 0))
   {
     if (likely(index < file_handle_max))
       file_handle_array[index]= pfs_file;
     else
+    {
+      if (pfs_file != NULL)
+        release_file(pfs_file);
       file_handle_lost++;
-  }
-  else
-  {
-    release_file(pfs_file);
+    }
   }
 }
 
@@ -3896,7 +4009,7 @@ static void end_file_wait_v1(PSI_file_locker *locker,
   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
   DBUG_ASSERT(state != NULL);
   PFS_file *file= reinterpret_cast<PFS_file *> (state->m_file);
-  DBUG_ASSERT(file != NULL);
+  PFS_file_class *klass= reinterpret_cast<PFS_file_class *> (state->m_class);
   PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
 
   ulonglong timer_end= 0;
@@ -3905,15 +4018,26 @@ static void end_file_wait_v1(PSI_file_locker *locker,
   register uint flags= state->m_flags;
   size_t bytes= ((int)byte_count > -1 ? byte_count : 0);
 
+  PFS_file_stat *file_stat;
+
+  if (file != NULL)
+  {
+    file_stat= & file->m_file_stat;
+  }
+  else
+  {
+    file_stat= & klass->m_file_stat;
+  }
+
   switch (state->m_operation)
   {
     /* Group read operations */
     case PSI_FILE_READ:
-      byte_stat= &file->m_file_stat.m_io_stat.m_read;
+      byte_stat= &file_stat->m_io_stat.m_read;
       break;
     /* Group write operations */
     case PSI_FILE_WRITE:
-      byte_stat= &file->m_file_stat.m_io_stat.m_write;
+      byte_stat= &file_stat->m_io_stat.m_write;
       break;
     /* Group remaining operations as miscellaneous */
     case PSI_FILE_CREATE:
@@ -3931,7 +4055,7 @@ static void end_file_wait_v1(PSI_file_locker *locker,
     case PSI_FILE_SYNC:
     case PSI_FILE_STAT:
     case PSI_FILE_CLOSE:
-      byte_stat= &file->m_file_stat.m_io_stat.m_misc;
+      byte_stat= &file_stat->m_io_stat.m_misc;
       break;
     default:
       DBUG_ASSERT(false);
@@ -3959,7 +4083,7 @@ static void end_file_wait_v1(PSI_file_locker *locker,
 
     PFS_single_stat *event_name_array;
     event_name_array= thread->m_instr_class_waits_stats;
-    uint index= file->m_class->m_event_name_index;
+    uint index= klass->m_event_name_index;
 
     if (flags & STATE_FLAG_TIMED)
     {
@@ -3980,6 +4104,9 @@ static void end_file_wait_v1(PSI_file_locker *locker,
       wait->m_timer_end= timer_end;
       wait->m_number_of_bytes= bytes;
       wait->m_end_event_id= thread->m_event_id;
+      wait->m_object_instance_addr= file;
+      wait->m_weak_file= file;
+      wait->m_weak_version= (file ? file->get_version() : 0);
 
       if (flag_events_waits_history)
         insert_events_waits_history(thread, wait);
@@ -3988,22 +4115,79 @@ static void end_file_wait_v1(PSI_file_locker *locker,
       thread->m_events_waits_current--;
     }
   }
+}
 
-  /* Release or destroy the file if necessary */
-  switch(state->m_operation)
+/**
+  Implementation of the file instrumentation interface.
+  @sa PSI_v1::start_file_close_wait.
+*/
+static void start_file_close_wait_v1(PSI_file_locker *locker,
+                                     const char *src_file,
+                                     uint src_line)
+{
+  PFS_thread *thread;
+  const char *name;
+  uint len;
+  PFS_file *pfs_file;
+  PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
+  DBUG_ASSERT(state != NULL);
+
+  switch (state->m_operation)
   {
-  case PSI_FILE_CLOSE:
-  case PSI_FILE_STREAM_CLOSE:
-  case PSI_FILE_STAT:
-    release_file(file);
-    break;
   case PSI_FILE_DELETE:
-    DBUG_ASSERT(thread != NULL);
-    destroy_file(thread, file);
+    thread= reinterpret_cast<PFS_thread*> (state->m_thread);
+    name= state->m_name;
+    len= strlen(name);
+    pfs_file= find_or_create_file(thread, NULL, name, len, false);
+    state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
+    break;
+  case PSI_FILE_STREAM_CLOSE:
+  case PSI_FILE_CLOSE:
     break;
   default:
+    DBUG_ASSERT(false);
     break;
   }
+
+  start_file_wait_v1(locker, 0, src_file, src_line);
+
+  return;
+}
+
+/**
+  Implementation of the file instrumentation interface.
+  @sa PSI_v1::end_file_close_wait.
+*/
+static void end_file_close_wait_v1(PSI_file_locker *locker, int rc)
+{
+  PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
+  DBUG_ASSERT(state != NULL);
+
+  end_file_wait_v1(locker, 0);
+
+  if (rc == 0)
+  {
+    PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
+    PFS_file *file= reinterpret_cast<PFS_file*> (state->m_file);
+
+    /* Release or destroy the file if necessary */
+    switch(state->m_operation)
+    {
+    case PSI_FILE_CLOSE:
+    case PSI_FILE_STREAM_CLOSE:
+      if (file != NULL)
+        release_file(file);
+      break;
+    case PSI_FILE_DELETE:
+      if (file != NULL)
+        destroy_file(thread, file);
+      break;
+    default:
+      DBUG_ASSERT(false);
+      break;
+    }
+  }
+  return;
 }
 
 static void start_stage_v1(PSI_stage_key key, const char *src_file, int src_line)
@@ -4165,7 +4349,8 @@ static void end_stage_v1()
 
 static PSI_statement_locker*
 get_thread_statement_locker_v1(PSI_statement_locker_state *state,
-                               PSI_statement_key key)
+                               PSI_statement_key key,
+                               const void *charset)
 {
   DBUG_ASSERT(state != NULL);
   if (! flag_global_instrumentation)
@@ -4262,9 +4447,11 @@ get_thread_statement_locker_v1(PSI_statement_locker_state *state,
 
   if (flag_statements_digest)
   {
+    const CHARSET_INFO *cs= static_cast <const CHARSET_INFO*> (charset);
     flags|= STATE_FLAG_DIGEST;
     state->m_digest_state.m_last_id_index= 0;
     digest_reset(& state->m_digest_state.m_digest_storage);
+    state->m_digest_state.m_digest_storage.m_charset_number= cs->number;
   }
 
   state->m_discarded= false;
@@ -4288,6 +4475,8 @@ get_thread_statement_locker_v1(PSI_statement_locker_state *state,
   state->m_no_index_used= 0;
   state->m_no_good_index_used= 0;
 
+  state->m_schema_name_length= 0;
+
   return reinterpret_cast<PSI_statement_locker*> (state);
 }
 
@@ -4352,6 +4541,13 @@ static void start_statement_v1(PSI_statement_locker *locker,
     state->m_timer_start= timer_start;
   }
 
+  compile_time_assert(PSI_SCHEMA_NAME_LEN == NAME_LEN);
+  DBUG_ASSERT(db_len <= sizeof(state->m_schema_name));
+
+  if (db_len > 0)
+    memcpy(state->m_schema_name, db, db_len);
+  state->m_schema_name_length= db_len;
+
   if (flags & STATE_FLAG_EVENT)
   {
     PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
@@ -4563,11 +4759,10 @@ static void end_statement_v1(PSI_statement_locker *locker, void *stmt_da)
     if (flags & STATE_FLAG_DIGEST)
     {
       digest_storage= &state->m_digest_state.m_digest_storage;
-
-      /* 
-        Populate PFS_statements_digest_stat with computed digest information.
-      */
-      digest_stat= find_or_create_digest(thread, digest_storage);
+      /* Populate PFS_statements_digest_stat with computed digest information.*/
+      digest_stat= find_or_create_digest(thread, digest_storage,
+                                         state->m_schema_name,
+                                         state->m_schema_name_length);
     }
 
     if (flags & STATE_FLAG_EVENT)
@@ -4633,11 +4828,10 @@ static void end_statement_v1(PSI_statement_locker *locker, void *stmt_da)
       {
         /* Set digest stat. */
         digest_storage= &state->m_digest_state.m_digest_storage;
-
-        /* 
-          Populate PFS_statements_digest_stat with computed digest information.
-        */
-        digest_stat= find_or_create_digest(thread, digest_storage);
+        /* Populate statements_digest_stat with computed digest information. */
+        digest_stat= find_or_create_digest(thread, digest_storage,
+                                           state->m_schema_name,
+                                           state->m_schema_name_length);
       }
     }
 
@@ -4869,6 +5063,42 @@ static void set_socket_thread_owner_v1(PSI_socket *socket)
   pfs_socket->m_thread_owner= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
 }
 
+
+/**
+  Implementation of the thread attribute connection interface
+  @sa PSI_v1::set_thread_connect_attr.
+*/
+static int set_thread_connect_attrs_v1(const char *buffer, uint length,
+                                       const void *from_cs)
+{
+
+  PFS_thread *thd= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
+
+  DBUG_ASSERT(buffer != NULL);
+
+  if (likely(thd != NULL) && session_connect_attrs_size_per_thread > 0)
+  {
+    /* copy from the input buffer as much as we can fit */
+    uint copy_size= (uint)(length < session_connect_attrs_size_per_thread ?
+                           length : session_connect_attrs_size_per_thread);
+    thd->m_lock.allocated_to_dirty();
+    memcpy(thd->m_session_connect_attrs, buffer, copy_size);
+    thd->m_session_connect_attrs_length= copy_size;
+    thd->m_session_connect_attrs_cs= (const CHARSET_INFO *) from_cs;
+    thd->m_lock.dirty_to_allocated();
+    
+    if (copy_size == length)
+      return 0;
+    else
+    {
+      session_connect_attrs_lost++;
+      return 1;
+    }
+  }
+  return 0;
+}
+
+
 /**
   Implementation of the instrumentation interface.
   @sa PSI_v1.
@@ -4939,6 +5169,8 @@ PSI_v1 PFS_v1=
   end_file_open_wait_and_bind_to_descriptor_v1,
   start_file_wait_v1,
   end_file_wait_v1,
+  start_file_close_wait_v1,
+  end_file_close_wait_v1,
   start_stage_v1,
   end_stage_v1,
   get_thread_statement_locker_v1,
@@ -4968,7 +5200,8 @@ PSI_v1 PFS_v1=
   set_socket_info_v1,
   set_socket_thread_owner_v1,
   pfs_digest_start_v1,
-  pfs_digest_add_token_v1
+  pfs_digest_add_token_v1,
+  set_thread_connect_attrs_v1,
 };
 
 static void* get_interface(int version)
diff --git a/storage/perfschema/pfs_account.cc b/storage/perfschema/pfs_account.cc
index 18716478681..9221fc3b991 100644
--- a/storage/perfschema/pfs_account.cc
+++ b/storage/perfschema/pfs_account.cc
@@ -45,7 +45,7 @@ static PFS_single_stat *account_instr_class_waits_array= NULL;
 static PFS_stage_stat *account_instr_class_stages_array= NULL;
 static PFS_statement_stat *account_instr_class_statements_array= NULL;
 
-static LF_HASH account_hash;
+LF_HASH account_hash;
 static bool account_hash_inited= false;
 
 /**
@@ -149,10 +149,11 @@ C_MODE_END
 */
 int init_account_hash(void)
 {
-  if (! account_hash_inited)
+  if ((! account_hash_inited) && (account_max > 0))
   {
     lf_hash_init(&account_hash, sizeof(PFS_account*), LF_HASH_UNIQUE,
                  0, 0, account_hash_get_key, &my_charset_bin);
+    account_hash.size= account_max;
     account_hash_inited= true;
   }
   return 0;
diff --git a/storage/perfschema/pfs_account.h b/storage/perfschema/pfs_account.h
index 77a9dfab7ba..1ac379e0fc9 100644
--- a/storage/perfschema/pfs_account.h
+++ b/storage/perfschema/pfs_account.h
@@ -46,7 +46,7 @@ struct PFS_account_key
   uint m_key_length;
 };
 
-struct PFS_account : PFS_connection_slice
+struct PFS_ALIGNED PFS_account : PFS_connection_slice
 {
 public:
   inline void init_refcount(void)
@@ -115,6 +115,8 @@ extern ulong account_lost;
 
 extern PFS_account *account_array;
 
+extern LF_HASH account_hash;
+
 /** @} */
 #endif
 
diff --git a/storage/perfschema/pfs_atomic.h b/storage/perfschema/pfs_atomic.h
index ffb4c24ecbf..61b8c2b2804 100644
--- a/storage/perfschema/pfs_atomic.h
+++ b/storage/perfschema/pfs_atomic.h
@@ -43,6 +43,16 @@ public:
   }
 
   /** Atomic load. */
+  static inline int64 load_64(volatile int64 *ptr)
+  {
+    int64 result;
+    rdlock(ptr);
+    result= my_atomic_load64(ptr);
+    rdunlock(ptr);
+    return result;
+  }
+
+  /** Atomic load. */
   static inline uint32 load_u32(volatile uint32 *ptr)
   {
     uint32 result;
@@ -52,6 +62,16 @@ public:
     return result;
   }
 
+  /** Atomic load. */
+  static inline uint64 load_u64(volatile uint64 *ptr)
+  {
+    uint64 result;
+    rdlock(ptr);
+    result= (uint64) my_atomic_load64((int64*) ptr);
+    rdunlock(ptr);
+    return result;
+  }
+
   /** Atomic store. */
   static inline void store_32(volatile int32 *ptr, int32 value)
   {
@@ -61,6 +81,14 @@ public:
   }
 
   /** Atomic store. */
+  static inline void store_64(volatile int64 *ptr, int64 value)
+  {
+    wrlock(ptr);
+    my_atomic_store64(ptr, value);
+    wrunlock(ptr);
+  }
+
+  /** Atomic store. */
   static inline void store_u32(volatile uint32 *ptr, uint32 value)
   {
     wrlock(ptr);
@@ -68,6 +96,14 @@ public:
     wrunlock(ptr);
   }
 
+  /** Atomic store. */
+  static inline void store_u64(volatile uint64 *ptr, uint64 value)
+  {
+    wrlock(ptr);
+    my_atomic_store64((int64*) ptr, (int64) value);
+    wrunlock(ptr);
+  }
+
   /** Atomic add. */
   static inline int32 add_32(volatile int32 *ptr, int32 value)
   {
@@ -79,6 +115,16 @@ public:
   }
 
   /** Atomic add. */
+  static inline int64 add_64(volatile int64 *ptr, int64 value)
+  {
+    int64 result;
+    wrlock(ptr);
+    result= my_atomic_add64(ptr, value);
+    wrunlock(ptr);
+    return result;
+  }
+
+  /** Atomic add. */
   static inline uint32 add_u32(volatile uint32 *ptr, uint32 value)
   {
     uint32 result;
@@ -88,6 +134,16 @@ public:
     return result;
   }
 
+  /** Atomic add. */
+  static inline uint64 add_u64(volatile uint64 *ptr, uint64 value)
+  {
+    uint64 result;
+    wrlock(ptr);
+    result= (uint64) my_atomic_add64((int64*) ptr, (int64) value);
+    wrunlock(ptr);
+    return result;
+  }
+
   /** Atomic compare and swap. */
   static inline bool cas_32(volatile int32 *ptr, int32 *old_value,
                             int32 new_value)
@@ -100,6 +156,17 @@ public:
   }
 
   /** Atomic compare and swap. */
+  static inline bool cas_64(volatile int64 *ptr, int64 *old_value,
+                            int64 new_value)
+  {
+    bool result;
+    wrlock(ptr);
+    result= my_atomic_cas64(ptr, old_value, new_value);
+    wrunlock(ptr);
+    return result;
+  }
+
+  /** Atomic compare and swap. */
   static inline bool cas_u32(volatile uint32 *ptr, uint32 *old_value,
                              uint32 new_value)
   {
@@ -111,6 +178,18 @@ public:
     return result;
   }
 
+  /** Atomic compare and swap. */
+  static inline bool cas_u64(volatile uint64 *ptr, uint64 *old_value,
+                             uint64 new_value)
+  {
+    bool result;
+    wrlock(ptr);
+    result= my_atomic_cas64((int64*) ptr, (int64*) old_value,
+                            (uint64) new_value);
+    wrunlock(ptr);
+    return result;
+  }
+
 private:
   static my_atomic_rwlock_t m_rwlock_array[256];
 
diff --git a/storage/perfschema/pfs_autosize.cc b/storage/perfschema/pfs_autosize.cc
new file mode 100644
index 00000000000..38bd36d8321
--- /dev/null
+++ b/storage/perfschema/pfs_autosize.cc
@@ -0,0 +1,366 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software Foundation,
+  51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+/**
+  @file storage/perfschema/pfs_autosize.cc
+  Private interface for the server (implementation).
+*/
+
+#include "my_global.h"
+#include "sql_const.h"
+#include "pfs_server.h"
+
+#include <algorithm>
+using std::min;
+using std::max;
+
+static const ulong fixed_mutex_instances= 500;
+static const ulong fixed_rwlock_instances= 200;
+static const ulong fixed_cond_instances= 50;
+static const ulong fixed_file_instances= 200;
+static const ulong fixed_socket_instances= 10;
+static const ulong fixed_thread_instances= 50;
+
+static const ulong mutex_per_connection= 3;
+static const ulong rwlock_per_connection= 1;
+static const ulong cond_per_connection= 2;
+static const ulong file_per_connection= 0;
+static const ulong socket_per_connection= 1;
+static const ulong thread_per_connection= 1;
+
+static const ulong mutex_per_handle= 0;
+static const ulong rwlock_per_handle= 0;
+static const ulong cond_per_handle= 0;
+static const ulong file_per_handle= 0;
+static const ulong socket_per_handle= 0;
+static const ulong thread_per_handle= 0;
+
+static const ulong mutex_per_share= 5;
+static const ulong rwlock_per_share= 3;
+static const ulong cond_per_share= 1;
+static const ulong file_per_share= 3;
+static const ulong socket_per_share= 0;
+static const ulong thread_per_share= 0;
+
+struct PFS_sizing_data
+{
+  /** Default value for @c PFS_param.m_account_sizing. */
+  ulong m_account_sizing;
+  /** Default value for @c PFS_param.m_user_sizing. */
+  ulong m_user_sizing;
+  /** Default value for @c PFS_param.m_host_sizing. */
+  ulong m_host_sizing;
+
+  /** Default value for @c PFS_param.m_events_waits_history_sizing. */
+  ulong m_events_waits_history_sizing;
+  /** Default value for @c PFS_param.m_events_waits_history_long_sizing. */
+  ulong m_events_waits_history_long_sizing;
+  /** Default value for @c PFS_param.m_events_stages_history_sizing. */
+  ulong m_events_stages_history_sizing;
+  /** Default value for @c PFS_param.m_events_stages_history_long_sizing. */
+  ulong m_events_stages_history_long_sizing;
+  /** Default value for @c PFS_param.m_events_statements_history_sizing. */
+  ulong m_events_statements_history_sizing;
+  /** Default value for @c PFS_param.m_events_statements_history_long_sizing. */
+  ulong m_events_statements_history_long_sizing;
+  /** Default value for @c PFS_param.m_digest_sizing. */
+  ulong m_digest_sizing;
+  /** Default value for @c PFS_param.m_session_connect_attrs_sizing. */
+  ulong m_session_connect_attrs_sizing;
+
+  /**
+    Minimum number of tables to keep statistics for.
+    On small deployments, all the tables can fit into the table definition cache,
+    and this value can be 0.
+    On big deployments, the table definition cache is only a subset of all the tables
+    in the database, which are accounted for here.
+  */
+  ulong m_min_number_of_tables;
+
+  /**
+    Load factor for 'volatile' objects (mutexes, table handles, ...).
+    Instrumented objects that:
+    - use little memory
+    - are created/destroyed very frequently
+    should be stored in a low density (mostly empty) memory buffer,
+    to optimize for speed.
+  */
+  float m_load_factor_volatile;
+  /**
+    Load factor for 'normal' objects (files).
+    Instrumented objects that:
+    - use a medium amount of memory
+    - are created/destroyed 
+    should be stored in a medium density memory buffer,
+    as a trade off between space and speed.
+  */
+  float m_load_factor_normal;
+  /**
+    Load factor for 'static' objects (table shares).
+    Instrumented objects that:
+    - use a lot of memory
+    - are created/destroyed very rarely
+    can be stored in a high density (mostly packed) memory buffer,
+    to optimize for space.
+  */
+  float m_load_factor_static;
+};
+
+PFS_sizing_data small_data=
+{
+  /* Account / user / host */
+  10, 5, 20,
+  /* History sizes */
+  5, 100, 5, 100, 5, 100,
+  /* Digests */
+  1000,
+  /* Session connect attrs. */
+  512,
+  /* Min tables */
+  200,
+  /* Load factors */
+  0.90, 0.90, 0.90
+};
+
+PFS_sizing_data medium_data=
+{
+  /* Account / user / host */
+  100, 100, 100,
+  /* History sizes */
+  10, 1000, 10, 1000, 10, 1000,
+  /* Digests */
+  5000,
+  /* Session connect attrs. */
+  512,
+  /* Min tables */
+  500,
+  /* Load factors */
+  0.70, 0.80, 0.90
+};
+
+PFS_sizing_data large_data=
+{
+  /* Account / user / host */
+  100, 100, 100,
+  /* History sizes */
+  10, 10000, 10, 10000, 10, 10000,
+  /* Digests */
+  10000,
+  /* Session connect attrs. */
+  512,
+  /* Min tables */
+  10000,
+  /* Load factors */
+  0.50, 0.65, 0.80
+};
+
+static inline ulong apply_load_factor(ulong raw_value, float factor)
+{
+  float value = ((float) raw_value) / factor;
+  return (ulong) ceil(value);
+}
+
+PFS_sizing_data *estimate_hints(PFS_global_param *param)
+{
+  if ((param->m_hints.m_max_connections <= MAX_CONNECTIONS_DEFAULT) &&
+      (param->m_hints.m_table_definition_cache <= TABLE_DEF_CACHE_DEFAULT) &&
+      (param->m_hints.m_table_open_cache <= TABLE_OPEN_CACHE_DEFAULT))
+  {
+    /* The my.cnf used is either unchanged, or lower than factory defaults. */
+    return & small_data;
+  }
+
+  if ((param->m_hints.m_max_connections <= MAX_CONNECTIONS_DEFAULT * 2) &&
+      (param->m_hints.m_table_definition_cache <= TABLE_DEF_CACHE_DEFAULT * 2) &&
+      (param->m_hints.m_table_open_cache <= TABLE_OPEN_CACHE_DEFAULT * 2))
+  {
+    /* Some defaults have been increased, to "moderate" values. */
+    return & medium_data;
+  }
+
+  /* Looks like a server in production. */
+  return & large_data;
+}
+
+static void apply_heuristic(PFS_global_param *p, PFS_sizing_data *h)
+{
+  ulong count;
+  ulong con = p->m_hints.m_max_connections;
+  ulong handle = p->m_hints.m_table_open_cache;
+  ulong share = p->m_hints.m_table_definition_cache;
+  ulong file = p->m_hints.m_open_files_limit;
+
+  if (p->m_table_sizing < 0)
+  {
+    count= handle;
+
+    p->m_table_sizing= apply_load_factor(count, h->m_load_factor_volatile);
+  }
+
+  if (p->m_table_share_sizing < 0)
+  {
+    count= share;
+
+    count= max<ulong>(count, h->m_min_number_of_tables);
+    p->m_table_share_sizing= apply_load_factor(count, h->m_load_factor_static);
+  }
+
+  if (p->m_account_sizing < 0)
+  {
+    p->m_account_sizing= h->m_account_sizing;
+  }
+
+  if (p->m_user_sizing < 0)
+  {
+    p->m_user_sizing= h->m_user_sizing;
+  }
+
+  if (p->m_host_sizing < 0)
+  {
+    p->m_host_sizing= h->m_host_sizing;
+  }
+
+  if (p->m_events_waits_history_sizing < 0)
+  {
+    p->m_events_waits_history_sizing= h->m_events_waits_history_sizing;
+  }
+
+  if (p->m_events_waits_history_long_sizing < 0)
+  {
+    p->m_events_waits_history_long_sizing= h->m_events_waits_history_long_sizing;
+  }
+
+  if (p->m_events_stages_history_sizing < 0)
+  {
+    p->m_events_stages_history_sizing= h->m_events_stages_history_sizing;
+  }
+
+  if (p->m_events_stages_history_long_sizing < 0)
+  {
+    p->m_events_stages_history_long_sizing= h->m_events_stages_history_long_sizing;
+  }
+
+  if (p->m_events_statements_history_sizing < 0)
+  {
+    p->m_events_statements_history_sizing= h->m_events_statements_history_sizing;
+  }
+
+  if (p->m_events_statements_history_long_sizing < 0)
+  {
+    p->m_events_statements_history_long_sizing= h->m_events_statements_history_long_sizing;
+  }
+
+  if (p->m_digest_sizing < 0)
+  {
+    p->m_digest_sizing= h->m_digest_sizing;
+  }
+
+  if (p->m_session_connect_attrs_sizing < 0)
+  {
+    p->m_session_connect_attrs_sizing= h->m_session_connect_attrs_sizing;
+  }
+
+  if (p->m_mutex_sizing < 0)
+  {
+    count= fixed_mutex_instances
+      + con * mutex_per_connection
+      + handle * mutex_per_handle
+      + share * mutex_per_share;
+
+    p->m_mutex_sizing= apply_load_factor(count, h->m_load_factor_volatile);
+  }
+
+  if (p->m_rwlock_sizing < 0)
+  {
+    count= fixed_rwlock_instances
+      + con * rwlock_per_connection
+      + handle * rwlock_per_handle
+      + share * rwlock_per_share;
+
+    p->m_rwlock_sizing= apply_load_factor(count, h->m_load_factor_volatile);
+  }
+
+  if (p->m_cond_sizing < 0)
+  {
+    ulong count;
+    count= fixed_cond_instances
+      + con * cond_per_connection
+      + handle * cond_per_handle
+      + share * cond_per_share;
+
+    p->m_cond_sizing= apply_load_factor(count, h->m_load_factor_volatile);
+  }
+
+  if (p->m_file_sizing < 0)
+  {
+    count= fixed_file_instances
+      + con * file_per_connection
+      + handle * file_per_handle
+      + share * file_per_share;
+
+    count= max<ulong>(count, file);
+    p->m_file_sizing= apply_load_factor(count, h->m_load_factor_normal);
+  }
+
+  if (p->m_socket_sizing < 0)
+  {
+    count= fixed_socket_instances
+      + con * socket_per_connection
+      + handle * socket_per_handle
+      + share * socket_per_share;
+
+    p->m_socket_sizing= apply_load_factor(count, h->m_load_factor_volatile);
+  }
+
+  if (p->m_thread_sizing < 0)
+  {
+    count= fixed_thread_instances
+      + con * thread_per_connection
+      + handle * thread_per_handle
+      + share * thread_per_share;
+
+    p->m_thread_sizing= apply_load_factor(count, h->m_load_factor_volatile);
+  }
+}
+
+void pfs_automated_sizing(PFS_global_param *param)
+{
+  PFS_sizing_data *heuristic;
+  heuristic= estimate_hints(param);
+  apply_heuristic(param, heuristic);
+
+  DBUG_ASSERT(param->m_account_sizing >= 0);
+  DBUG_ASSERT(param->m_digest_sizing >= 0);
+  DBUG_ASSERT(param->m_host_sizing >= 0);
+  DBUG_ASSERT(param->m_user_sizing >= 0);
+
+  DBUG_ASSERT(param->m_events_waits_history_sizing >= 0);
+  DBUG_ASSERT(param->m_events_waits_history_long_sizing >= 0);
+  DBUG_ASSERT(param->m_events_stages_history_sizing >= 0);
+  DBUG_ASSERT(param->m_events_stages_history_long_sizing >= 0);
+  DBUG_ASSERT(param->m_events_statements_history_sizing >= 0);
+  DBUG_ASSERT(param->m_events_statements_history_long_sizing >= 0);
+  DBUG_ASSERT(param->m_session_connect_attrs_sizing >= 0);
+
+  DBUG_ASSERT(param->m_mutex_sizing >= 0);
+  DBUG_ASSERT(param->m_rwlock_sizing >= 0);
+  DBUG_ASSERT(param->m_cond_sizing >= 0);
+  DBUG_ASSERT(param->m_file_sizing >= 0);
+  DBUG_ASSERT(param->m_socket_sizing >= 0);
+  DBUG_ASSERT(param->m_thread_sizing >= 0);
+  DBUG_ASSERT(param->m_table_sizing >= 0);
+  DBUG_ASSERT(param->m_table_share_sizing >= 0);
+}
+
diff --git a/storage/perfschema/pfs_digest.cc b/storage/perfschema/pfs_digest.cc
index 92c27b2e85f..c5df64d9243 100644
--- a/storage/perfschema/pfs_digest.cc
+++ b/storage/perfschema/pfs_digest.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -30,6 +30,7 @@
 #include "table_helper.h"
 #include "my_md5.h"
 #include "sql_lex.h"
+#include "sql_string.h"
 #include <string.h>
 
 /* Generated code */
@@ -58,7 +59,6 @@
 ulong digest_max= 0;
 ulong digest_lost= 0;
 
-
 /** EVENTS_STATEMENTS_HISTORY_LONG circular buffer. */
 PFS_statements_digest_stat *statements_digest_stat_array= NULL;
 /** Consumer flag for table EVENTS_STATEMENTS_SUMMARY_BY_DIGEST. */
@@ -69,7 +69,7 @@ bool flag_statements_digest= true;
 */
 volatile uint32 digest_index= 1;
 
-static LF_HASH digest_hash;
+LF_HASH digest_hash;
 static bool digest_hash_inited= false;
 
 /**
@@ -123,8 +123,8 @@ static uchar *digest_hash_get_key(const uchar *entry, size_t *length,
   DBUG_ASSERT(typed_entry != NULL);
   digest= *typed_entry;
   DBUG_ASSERT(digest != NULL);
-  *length= PFS_MD5_SIZE; 
-  result= digest->m_digest_hash.m_md5;
+  *length= sizeof (PFS_digest_key);
+  result= & digest->m_digest_key;
   return const_cast<uchar*> (reinterpret_cast<const uchar*> (result));
 }
 C_MODE_END
@@ -136,11 +136,12 @@ C_MODE_END
 */
 int init_digest_hash(void)
 {
-  if (! digest_hash_inited)
+  if ((! digest_hash_inited) && (digest_max > 0))
   {
     lf_hash_init(&digest_hash, sizeof(PFS_statements_digest_stat*),
                  LF_HASH_UNIQUE, 0, 0, digest_hash_get_key,
                  &my_charset_bin);
+    digest_hash.size= digest_max;
     digest_hash_inited= true;
   }
   return 0;
@@ -167,8 +168,10 @@ static LF_PINS* get_digest_hash_pins(PFS_thread *thread)
 }
 
 PFS_statement_stat*
-find_or_create_digest(PFS_thread* thread,
-                      PSI_digest_storage* digest_storage)
+find_or_create_digest(PFS_thread *thread,
+                      PSI_digest_storage *digest_storage,
+                      const char *schema_name,
+                      uint schema_name_length)
 {
   if (statements_digest_stat_array == NULL)
     return NULL;
@@ -180,13 +183,21 @@ find_or_create_digest(PFS_thread* thread,
   if (unlikely(pins == NULL))
     return NULL;
 
+  /*
+    Note: the LF_HASH key is a block of memory,
+    make sure to clean unused bytes,
+    so that memcmp() can compare keys.
+  */
+  PFS_digest_key hash_key;
+  memset(& hash_key, 0, sizeof(hash_key));
   /* Compute MD5 Hash of the tokens received. */
-  PFS_digest_hash md5;
-  compute_md5_hash((char *) md5.m_md5,
+  compute_md5_hash((char *) hash_key.m_md5,
                    (char *) digest_storage->m_token_array,
                    digest_storage->m_byte_count);
-
-  unsigned char* hash_key= md5.m_md5;
+  /* Add the current schema to the key */
+  hash_key.m_schema_name_length= schema_name_length;
+  if (schema_name_length > 0)
+    memcpy(hash_key.m_schema_name, schema_name, schema_name_length);
 
   int res;
   ulong safe_index;
@@ -202,7 +213,7 @@ search:
   /* Lookup LF_HASH using this new key. */
   entry= reinterpret_cast<PFS_statements_digest_stat**>
     (lf_hash_search(&digest_hash, pins,
-                    hash_key, PFS_MD5_SIZE));
+                    &hash_key, sizeof(PFS_digest_key)));
 
   if (entry && (entry != MY_ERRPTR))
   {
@@ -244,7 +255,7 @@ search:
   pfs= &statements_digest_stat_array[safe_index];
 
   /* Copy digest hash/LF Hash search key. */
-  memcpy(pfs->m_digest_hash.m_md5, md5.m_md5, PFS_MD5_SIZE);
+  memcpy(& pfs->m_digest_key, &hash_key, sizeof(PFS_digest_key));
 
   /*
     Copy digest storage to statement_digest_stat_array so that it could be
@@ -278,7 +289,7 @@ search:
   return NULL;
 }
 
-void purge_digest(PFS_thread* thread, unsigned char* hash_key)
+void purge_digest(PFS_thread* thread, PFS_digest_key *hash_key)
 {
   LF_PINS *pins= get_digest_hash_pins(thread);
   if (unlikely(pins == NULL))
@@ -289,12 +300,12 @@ void purge_digest(PFS_thread* thread, unsigned char* hash_key)
   /* Lookup LF_HASH using this new key. */
   entry= reinterpret_cast<PFS_statements_digest_stat**>
     (lf_hash_search(&digest_hash, pins,
-                    hash_key, PFS_MD5_SIZE));
+                    hash_key, sizeof(PFS_digest_key)));
 
   if (entry && (entry != MY_ERRPTR))
-  { 
+  {
     lf_hash_delete(&digest_hash, pins,
-                   hash_key, PFS_MD5_SIZE);
+                   hash_key, sizeof(PFS_digest_key));
   }
   lf_hash_search_unpin(pins);
   return;
@@ -313,7 +324,7 @@ void PFS_statements_digest_stat::reset_index(PFS_thread *thread)
   /* Only remove entries that exists in the HASH index. */
   if (m_digest_storage.m_byte_count > 0)
   {
-    purge_digest(thread, m_digest_hash.m_md5);
+    purge_digest(thread, & m_digest_key);
   }
 }
 
@@ -347,98 +358,130 @@ void reset_esms_by_digest()
 */
 void get_digest_text(char* digest_text, PSI_digest_storage* digest_storage)
 {
+  DBUG_ASSERT(digest_storage != NULL);
   bool truncated= false;
   int byte_count= digest_storage->m_byte_count;
-  int need_bytes;
+  int bytes_needed= 0;
   uint tok= 0;
-  char *id_string;
-  int id_length;
   int current_byte= 0;
   lex_token_string *tok_data;
   /* -4 is to make sure extra space for '...' and a '\0' at the end. */
-  int available_bytes_to_write= COL_DIGEST_TEXT_SIZE - 4;
+  int bytes_available= COL_DIGEST_TEXT_SIZE - 4;
+  
+  /* Convert text to utf8 */
+  const CHARSET_INFO *from_cs= get_charset(digest_storage->m_charset_number, MYF(0));
+  const CHARSET_INFO *to_cs= &my_charset_utf8_bin;
+
+  if (from_cs == NULL)
+  {
+    /*
+      Can happen, as we do dirty reads on digest_storage,
+      which can be written to in another thread.
+    */
+    *digest_text= '\0';
+    return;
+  }
+
+  /*
+     Max converted size is number of characters * max multibyte length of the
+     target charset, which is 4 for UTF8.
+   */
+  const uint max_converted_size= PSI_MAX_DIGEST_STORAGE_SIZE * 4;
+  char id_buffer[max_converted_size];
+  char *id_string;
+  int  id_length;
+  bool convert_text= !my_charset_same(from_cs, to_cs);
 
   DBUG_ASSERT(byte_count <= PSI_MAX_DIGEST_STORAGE_SIZE);
 
   while ((current_byte < byte_count) &&
-         (available_bytes_to_write > 0) &&
-         (! truncated))
+         (bytes_available > 0) &&
+         !truncated)
   {
     current_byte= read_token(digest_storage, current_byte, &tok);
-    tok_data= & lex_token_array[tok];
+    tok_data= &lex_token_array[tok];
     
     switch (tok)
     {
     /* All identifiers are printed with their name. */
     case IDENT:
-      current_byte= read_identifier(digest_storage, current_byte,
-                                    & id_string, & id_length);
-      need_bytes= id_length + 1; /* <id> space */
-      if (need_bytes <= available_bytes_to_write)
+    case IDENT_QUOTED:
       {
-        if (id_length > 0)
+        char *id_ptr;
+        int id_len;
+        uint err_cs= 0;
+
+        /* Get the next identifier from the storage buffer. */
+        current_byte= read_identifier(digest_storage, current_byte,
+                                      &id_ptr, &id_len);
+        if (convert_text)
         {
-          strncpy(digest_text, id_string, id_length);
-          digest_text+= id_length;
+          /* Verify that the converted text will fit. */
+          if (to_cs->mbmaxlen*id_len > max_converted_size)
+          {
+            truncated= true;
+            break;
+          }
+          /* Convert identifier string into the storage character set. */
+          id_length= my_convert(id_buffer, max_converted_size, to_cs,
+                                id_ptr, id_len, from_cs, &err_cs);
+          id_string= id_buffer;
         }
-        *digest_text= ' ';
-        digest_text++;
-        available_bytes_to_write-= need_bytes;
-      }
-      else
-      {
-        truncated= true;
-      }
-      break;
-    case IDENT_QUOTED:
-      current_byte= read_identifier(digest_storage, current_byte,
-                                    & id_string, & id_length);
-      need_bytes= id_length + 3; /* quote <id> quote space  */
-      if (need_bytes <= available_bytes_to_write)
-      {
-        *digest_text= '`';
-        digest_text++;
-        if (id_length > 0)
+        else
         {
-          strncpy(digest_text, id_string, id_length);
-          digest_text+= id_length;
+          id_string= id_ptr;
+          id_length= id_len;
+        }
+
+        if (id_length == 0 || err_cs != 0)
+        {
+          truncated= true;
+          break;
+        }
+        /* Copy the converted identifier into the digest string. */
+        bytes_needed= id_length + (tok == IDENT ? 1 : 3); 
+        if (bytes_needed <= bytes_available)
+        {
+          if (tok == IDENT_QUOTED)
+            *digest_text++= '`';
+          if (id_length > 0)
+          {
+            memcpy(digest_text, id_string, id_length);
+            digest_text+= id_length;
+          }
+          if (tok == IDENT_QUOTED)
+            *digest_text++= '`';
+          *digest_text++= ' ';
+          bytes_available-= bytes_needed;
+        }
+        else
+        {
+          truncated= true;
         }
-        *digest_text= '`';
-        digest_text++;
-        *digest_text= ' ';
-        digest_text++;
-        available_bytes_to_write-= need_bytes;
-      }
-      else
-      {
-        truncated= true;
       }
       break;
 
     /* Everything else is printed as is. */
     default:
       /* 
-        Make sure not to overflow digest_text buffer while writing
-        this token string.
+        Make sure not to overflow digest_text buffer.
         +1 is to make sure extra space for ' '.
       */
       int tok_length= tok_data->m_token_length;
-      need_bytes= tok_length + 1;
+      bytes_needed= tok_length + 1;
 
-      if (need_bytes <= available_bytes_to_write)
+      if (bytes_needed <= bytes_available)
       {
-        strncpy(digest_text,
-                tok_data->m_token_string,
-                tok_length);
+        strncpy(digest_text, tok_data->m_token_string, tok_length);
         digest_text+= tok_length;
-        *digest_text= ' ';
-        digest_text++;
-        available_bytes_to_write-= need_bytes;
+        *digest_text++= ' ';
+        bytes_available-= bytes_needed;
       }
       else
       {
         truncated= true;
       }
+      break;
     }
   }
 
@@ -524,7 +567,11 @@ PSI_digest_locker* pfs_digest_add_token_v1(PSI_digest_locker *locker,
 
   digest_storage= &state->m_digest_storage;
 
-  if (digest_storage->m_full)
+  /*
+    Stop collecting further tokens if digest storage is full or
+    if END token is received.
+  */
+  if (digest_storage->m_full || token == END_OF_INPUT)
     return NULL;
 
   /* 
@@ -555,19 +602,23 @@ PSI_digest_locker* pfs_digest_add_token_v1(PSI_digest_locker *locker,
         TOK_PFS_GENERIC_VALUE := BIN_NUM | DECIMAL_NUM | ... | ULONGLONG_NUM
       */
       token= TOK_PFS_GENERIC_VALUE;
-
+    }
+    /* fall through */
+    case NULL_SYM:
+    {
       if ((last_token2 == TOK_PFS_GENERIC_VALUE ||
-           last_token2 == TOK_PFS_GENERIC_VALUE_LIST) &&
+           last_token2 == TOK_PFS_GENERIC_VALUE_LIST ||
+           last_token2 == NULL_SYM) &&
           (last_token == ','))
       {
         /*
           REDUCE:
           TOK_PFS_GENERIC_VALUE_LIST :=
-            TOK_PFS_GENERIC_VALUE ',' TOK_PFS_GENERIC_VALUE
+            (TOK_PFS_GENERIC_VALUE|NULL_SYM) ',' (TOK_PFS_GENERIC_VALUE|NULL_SYM)
           
           REDUCE:
           TOK_PFS_GENERIC_VALUE_LIST :=
-            TOK_PFS_GENERIC_VALUE_LIST ',' TOK_PFS_GENERIC_VALUE
+            TOK_PFS_GENERIC_VALUE_LIST ',' (TOK_PFS_GENERIC_VALUE|NULL_SYM)
         */
         digest_storage->m_byte_count-= 2*PFS_SIZE_OF_A_TOKEN;
         token= TOK_PFS_GENERIC_VALUE_LIST;
diff --git a/storage/perfschema/pfs_digest.h b/storage/perfschema/pfs_digest.h
index 2646596171c..d2453dc32c6 100644
--- a/storage/perfschema/pfs_digest.h
+++ b/storage/perfschema/pfs_digest.h
@@ -38,32 +38,26 @@ struct PFS_thread;
 /**
   Structure to store a MD5 hash value (digest) for a statement.
 */
-struct PFS_digest_hash
+struct PFS_digest_key
 {
   unsigned char m_md5[PFS_MD5_SIZE];
+  char m_schema_name[NAME_LEN];
+  uint m_schema_name_length;
 };
 
 /** A statement digest stat record. */
-struct PFS_statements_digest_stat
+struct PFS_ALIGNED PFS_statements_digest_stat
 {
-  /**
-    Digest MD5 Hash.
-  */
-  PFS_digest_hash m_digest_hash;
+  /** Digest Schema + MD5 Hash. */
+  PFS_digest_key m_digest_key;
 
-  /**
-    Digest Storage.
-  */
+  /** Digest Storage. */
   PSI_digest_storage m_digest_storage;
 
-  /**
-    Statement stat.
-  */
+  /** Statement stat. */
   PFS_statement_stat m_stat;
 
-  /**
-    First Seen/last seen.
-  */
+  /** First and last seen timestamps.*/
   ulonglong m_first_seen;
   ulonglong m_last_seen;
 
@@ -78,10 +72,12 @@ void cleanup_digest();
 
 int init_digest_hash(void);
 void cleanup_digest_hash(void);
-PFS_statement_stat* find_or_create_digest(PFS_thread*,
-                                          PSI_digest_storage*);
+PFS_statement_stat* find_or_create_digest(PFS_thread *thread,
+                                          PSI_digest_storage *digest_storage,
+                                          const char *schema_name,
+                                          uint schema_name_length);
 
-void get_digest_text(char* digest_text, PSI_digest_storage*);
+void get_digest_text(char *digest_text, PSI_digest_storage *digest_storage);
 
 void reset_esms_by_digest();
 
@@ -90,8 +86,8 @@ extern PFS_statements_digest_stat *statements_digest_stat_array;
 
 /* Instrumentation callbacks for pfs.cc */
 
-struct PSI_digest_locker* pfs_digest_start_v1(PSI_statement_locker *locker);
-PSI_digest_locker* pfs_digest_add_token_v1(PSI_digest_locker *locker,
+struct PSI_digest_locker *pfs_digest_start_v1(PSI_statement_locker *locker);
+PSI_digest_locker *pfs_digest_add_token_v1(PSI_digest_locker *locker,
                                            uint token,
                                            OPAQUE_LEX_YYSTYPE *yylval);
 
@@ -99,6 +95,7 @@ static inline void digest_reset(PSI_digest_storage *digest)
 {
   digest->m_full= false;
   digest->m_byte_count= 0;
+  digest->m_charset_number= 0;
 }
 
 static inline void digest_copy(PSI_digest_storage *to, const PSI_digest_storage *from)
@@ -107,20 +104,21 @@ static inline void digest_copy(PSI_digest_storage *to, const PSI_digest_storage
   {
     to->m_full= from->m_full;
     to->m_byte_count= from->m_byte_count;
+    to->m_charset_number= from->m_charset_number;
     DBUG_ASSERT(to->m_byte_count <= PSI_MAX_DIGEST_STORAGE_SIZE);
     memcpy(to->m_token_array, from->m_token_array, to->m_byte_count);
   }
   else
   {
-    DBUG_ASSERT(! from->m_full);
     DBUG_ASSERT(from->m_byte_count == 0);
     to->m_full= false;
     to->m_byte_count= 0;
+    to->m_charset_number= 0;
   }
 }
 
 /** 
-  Function to read a single token from token array.
+  Read a single token from token array.
 */
 inline int read_token(PSI_digest_storage *digest_storage,
                       int index, uint *tok)
@@ -141,7 +139,7 @@ inline int read_token(PSI_digest_storage *digest_storage,
 }
 
 /**
-  Function to store a single token in token array.
+  Store a single token in token array.
 */
 inline void store_token(PSI_digest_storage* digest_storage, uint token)
 {
@@ -162,7 +160,7 @@ inline void store_token(PSI_digest_storage* digest_storage, uint token)
 }
 
 /**
-  Function to read an identifier from token array.
+  Read an identifier from token array.
 */
 inline int read_identifier(PSI_digest_storage* digest_storage,
                            int index, char ** id_string, int *id_length)
@@ -186,7 +184,7 @@ inline int read_identifier(PSI_digest_storage* digest_storage,
 }
 
 /**
-  Function to store an identifier in token array.
+  Store an identifier in token array.
 */
 inline void store_token_identifier(PSI_digest_storage* digest_storage,
                                    uint token,
@@ -207,9 +205,7 @@ inline void store_token_identifier(PSI_digest_storage* digest_storage,
     dest[3]= (id_length >> 8) & 0xff;
     /* Write the string data */
     if (id_length > 0)
-    {
-      strncpy((char *)(dest + 4), id_name, id_length);
-    }
+      memcpy((char *)(dest + 4), id_name, id_length);
     digest_storage->m_byte_count+= bytes_needed; 
   }
   else
@@ -218,4 +214,6 @@ inline void store_token_identifier(PSI_digest_storage* digest_storage,
   }
 }
 
+extern LF_HASH digest_hash;
+
 #endif
diff --git a/storage/perfschema/pfs_engine_table.cc b/storage/perfschema/pfs_engine_table.cc
index 304e837fa84..8f6f0fa3bcd 100644
--- a/storage/perfschema/pfs_engine_table.cc
+++ b/storage/perfschema/pfs_engine_table.cc
@@ -20,6 +20,7 @@
 
 #include "my_global.h"
 #include "my_pthread.h"
+#include "hostname.h" /* For Host_entry */
 #include "pfs_engine_table.h"
 
 #include "table_events_waits.h"
@@ -69,6 +70,8 @@
 #include "table_socket_instances.h"
 #include "table_socket_summary_by_instance.h"
 #include "table_socket_summary_by_event_name.h"
+#include "table_session_connect_attrs.h"
+#include "table_session_account_connect_attrs.h"
 
 /* For show status */
 #include "pfs_column_values.h"
@@ -145,6 +148,8 @@ static PFS_engine_table_share *all_shares[]=
   &table_socket_instances::m_share,
   &table_socket_summary_by_instance::m_share,
   &table_socket_summary_by_event_name::m_share,
+  &table_session_connect_attrs::m_share,
+  &table_session_account_connect_attrs::m_share,
   NULL
 };
 
@@ -683,20 +688,22 @@ PFS_unknown_acl pfs_unknown_acl;
 ACL_internal_access_result
 PFS_unknown_acl::check(ulong want_access, ulong *save_priv) const
 {
-  const ulong always_forbidden= INSERT_ACL | UPDATE_ACL | DELETE_ACL
-    | CREATE_ACL | REFERENCES_ACL | INDEX_ACL | ALTER_ACL
-    | CREATE_VIEW_ACL | TRIGGER_ACL | LOCK_TABLES_ACL;
+  const ulong always_forbidden= CREATE_ACL
+    | REFERENCES_ACL | INDEX_ACL | ALTER_ACL
+    | CREATE_VIEW_ACL | TRIGGER_ACL;
 
   if (unlikely(want_access & always_forbidden))
     return ACL_INTERNAL_ACCESS_DENIED;
 
   /*
-    There is no point in hidding (by enforcing ACCESS_DENIED for SELECT_ACL
+    There is no point in hiding (by enforcing ACCESS_DENIED for SELECT_ACL
     on performance_schema.*) tables that do not exist anyway.
     When SELECT_ACL is granted on performance_schema.* or *.*,
     SELECT * from performance_schema.wrong_table
     will fail with a more understandable ER_NO_SUCH_TABLE error,
     instead of ER_TABLEACCESS_DENIED_ERROR.
+    The same goes for other DML (INSERT_ACL | UPDATE_ACL | DELETE_ACL),
+    for ease of use: error messages will be less surprising.
   */
   return ACL_INTERNAL_ACCESS_CHECK_GRANT;
 }
@@ -978,363 +985,445 @@ bool pfs_show_status(handlerton *hton, THD *thd,
       total_memory+= size;
       break;
     case 56:
-      name= "events_waits_summary_global_by_event_name.row_size";
-      size= sizeof(PFS_single_stat);
-      break;
-    case 57:
-      name= "events_waits_summary_global_by_event_name.row_count";
-      size= wait_class_max;
-      break;
-    case 58:
-      name= "events_waits_summary_global_by_event_name.memory";
-      size= wait_class_max * sizeof(PFS_single_stat);
-      total_memory+= size;
-      break;
-    case 59:
       name= "(pfs_account).row_size";
       size= sizeof(PFS_account);
       break;
-    case 60:
+    case 57:
       name= "(pfs_account).row_count";
       size= account_max;
       break;
-    case 61:
+    case 58:
       name= "(pfs_account).memory";
       size= account_max * sizeof(PFS_account);
       total_memory+= size;
       break;
-    case 62:
+    case 59:
       name= "events_waits_summary_by_account_by_event_name.row_size";
       size= sizeof(PFS_single_stat);
       break;
-    case 63:
+    case 60:
       name= "events_waits_summary_by_account_by_event_name.row_count";
       size= account_max * wait_class_max;
       break;
-    case 64:
+    case 61:
       name= "events_waits_summary_by_account_by_event_name.memory";
       size= account_max * wait_class_max * sizeof(PFS_single_stat);
       total_memory+= size;
       break;
-    case 65:
+    case 62:
       name= "events_waits_summary_by_user_by_event_name.row_size";
       size= sizeof(PFS_single_stat);
       break;
-    case 66:
+    case 63:
       name= "events_waits_summary_by_user_by_event_name.row_count";
       size= user_max * wait_class_max;
       break;
-    case 67:
+    case 64:
       name= "events_waits_summary_by_user_by_event_name.memory";
       size= user_max * wait_class_max * sizeof(PFS_single_stat);
       total_memory+= size;
       break;
-    case 68:
+    case 65:
       name= "events_waits_summary_by_host_by_event_name.row_size";
       size= sizeof(PFS_single_stat);
       break;
-    case 69:
+    case 66:
       name= "events_waits_summary_by_host_by_event_name.row_count";
       size= host_max * wait_class_max;
       break;
-    case 70:
+    case 67:
       name= "events_waits_summary_by_host_by_event_name.memory";
       size= host_max * wait_class_max * sizeof(PFS_single_stat);
       total_memory+= size;
       break;
-    case 71:
+    case 68:
       name= "(pfs_user).row_size";
       size= sizeof(PFS_user);
       break;
-    case 72:
+    case 69:
       name= "(pfs_user).row_count";
       size= user_max;
       break;
-    case 73:
+    case 70:
       name= "(pfs_user).memory";
       size= user_max * sizeof(PFS_user);
       total_memory+= size;
       break;
-    case 74:
+    case 71:
       name= "(pfs_host).row_size";
       size= sizeof(PFS_host);
       break;
-    case 75:
+    case 72:
       name= "(pfs_host).row_count";
       size= host_max;
       break;
-    case 76:
+    case 73:
       name= "(pfs_host).memory";
       size= host_max * sizeof(PFS_host);
       total_memory+= size;
       break;
-    case 77:
+    case 74:
       name= "(pfs_stage_class).row_size";
       size= sizeof(PFS_stage_class);
       break;
-    case 78:
+    case 75:
       name= "(pfs_stage_class).row_count";
       size= stage_class_max;
       break;
-    case 79:
+    case 76:
       name= "(pfs_stage_class).memory";
       size= stage_class_max * sizeof(PFS_stage_class);
       total_memory+= size;
       break;
-    case 80:
+    case 77:
       name= "events_stages_history.row_size";
       size= sizeof(PFS_events_stages);
       break;
-    case 81:
+    case 78:
       name= "events_stages_history.row_count";
       size= events_stages_history_per_thread * thread_max;
       break;
-    case 82:
+    case 79:
       name= "events_stages_history.memory";
       size= events_stages_history_per_thread * thread_max
         * sizeof(PFS_events_stages);
       total_memory+= size;
       break;
-    case 83:
+    case 80:
       name= "events_stages_history_long.row_size";
       size= sizeof(PFS_events_stages);
       break;
-    case 84:
+    case 81:
       name= "events_stages_history_long.row_count";
       size= events_stages_history_long_size;
       break;
-    case 85:
+    case 82:
       name= "events_stages_history_long.memory";
       size= events_stages_history_long_size * sizeof(PFS_events_stages);
       total_memory+= size;
       break;
-    case 86:
+    case 83:
       name= "events_stages_summary_by_thread_by_event_name.row_size";
       size= sizeof(PFS_stage_stat);
       break;
-    case 87:
+    case 84:
       name= "events_stages_summary_by_thread_by_event_name.row_count";
       size= thread_max * stage_class_max;
       break;
-    case 88:
+    case 85:
       name= "events_stages_summary_by_thread_by_event_name.memory";
       size= thread_max * stage_class_max * sizeof(PFS_stage_stat);
       total_memory+= size;
       break;
-    case 89:
+    case 86:
       name= "events_stages_summary_global_by_event_name.row_size";
       size= sizeof(PFS_stage_stat);
       break;
-    case 90:
+    case 87:
       name= "events_stages_summary_global_by_event_name.row_count";
       size= stage_class_max;
       break;
-    case 91:
+    case 88:
       name= "events_stages_summary_global_by_event_name.memory";
       size= stage_class_max * sizeof(PFS_stage_stat);
       total_memory+= size;
       break;
-    case 92:
+    case 89:
       name= "events_stages_summary_by_account_by_event_name.row_size";
       size= sizeof(PFS_stage_stat);
       break;
-    case 93:
+    case 90:
       name= "events_stages_summary_by_account_by_event_name.row_count";
       size= account_max * stage_class_max;
       break;
-    case 94:
+    case 91:
       name= "events_stages_summary_by_account_by_event_name.memory";
       size= account_max * stage_class_max * sizeof(PFS_stage_stat);
       total_memory+= size;
       break;
-    case 95:
+    case 92:
       name= "events_stages_summary_by_user_by_event_name.row_size";
       size= sizeof(PFS_stage_stat);
       break;
-    case 96:
+    case 93:
       name= "events_stages_summary_by_user_by_event_name.row_count";
       size= user_max * stage_class_max;
       break;
-    case 97:
+    case 94:
       name= "events_stages_summary_by_user_by_event_name.memory";
       size= user_max * stage_class_max * sizeof(PFS_stage_stat);
       total_memory+= size;
       break;
-    case 98:
+    case 95:
       name= "events_stages_summary_by_host_by_event_name.row_size";
       size= sizeof(PFS_stage_stat);
       break;
-    case 99:
+    case 96:
       name= "events_stages_summary_by_host_by_event_name.row_count";
       size= host_max * stage_class_max;
       break;
-    case 100:
+    case 97:
       name= "events_stages_summary_by_host_by_event_name.memory";
       size= host_max * stage_class_max * sizeof(PFS_stage_stat);
       total_memory+= size;
       break;
-    case 101:
+    case 98:
       name= "(pfs_statement_class).row_size";
       size= sizeof(PFS_statement_class);
       break;
-    case 102:
+    case 99:
       name= "(pfs_statement_class).row_count";
       size= statement_class_max;
       break;
-    case 103:
+    case 100:
       name= "(pfs_statement_class).memory";
       size= statement_class_max * sizeof(PFS_statement_class);
       total_memory+= size;
       break;
-    case 104:
+    case 101:
       name= "events_statements_history.row_size";
       size= sizeof(PFS_events_statements);
       break;
-    case 105:
+    case 102:
       name= "events_statements_history.row_count";
       size= events_statements_history_per_thread * thread_max;
       break;
-    case 106:
+    case 103:
       name= "events_statements_history.memory";
       size= events_statements_history_per_thread * thread_max
         * sizeof(PFS_events_statements);
       total_memory+= size;
       break;
-    case 107:
+    case 104:
       name= "events_statements_history_long.row_size";
       size= sizeof(PFS_events_statements);
       break;
-    case 108:
+    case 105:
       name= "events_statements_history_long.row_count";
       size= events_statements_history_long_size;
       break;
-    case 109:
+    case 106:
       name= "events_statements_history_long.memory";
       size= events_statements_history_long_size * sizeof(PFS_events_statements);
       total_memory+= size;
       break;
-    case 110:
+    case 107:
       name= "events_statements_summary_by_thread_by_event_name.row_size";
       size= sizeof(PFS_statement_stat);
       break;
-    case 111:
+    case 108:
       name= "events_statements_summary_by_thread_by_event_name.row_count";
       size= thread_max * statement_class_max;
       break;
-    case 112:
+    case 109:
       name= "events_statements_summary_by_thread_by_event_name.memory";
       size= thread_max * statement_class_max * sizeof(PFS_statement_stat);
       total_memory+= size;
       break;
-    case 113:
+    case 110:
       name= "events_statements_summary_global_by_event_name.row_size";
       size= sizeof(PFS_statement_stat);
       break;
-    case 114:
+    case 111:
       name= "events_statements_summary_global_by_event_name.row_count";
       size= statement_class_max;
       break;
-    case 115:
+    case 112:
       name= "events_statements_summary_global_by_event_name.memory";
       size= statement_class_max * sizeof(PFS_statement_stat);
       total_memory+= size;
       break;
-    case 116:
+    case 113:
       name= "events_statements_summary_by_account_by_event_name.row_size";
       size= sizeof(PFS_statement_stat);
       break;
-    case 117:
+    case 114:
       name= "events_statements_summary_by_account_by_event_name.row_count";
       size= account_max * statement_class_max;
       break;
-    case 118:
+    case 115:
       name= "events_statements_summary_by_account_by_event_name.memory";
       size= account_max * statement_class_max * sizeof(PFS_statement_stat);
       total_memory+= size;
       break;
-    case 119:
+    case 116:
       name= "events_statements_summary_by_user_by_event_name.row_size";
       size= sizeof(PFS_statement_stat);
       break;
-    case 120:
+    case 117:
       name= "events_statements_summary_by_user_by_event_name.row_count";
       size= user_max * statement_class_max;
       break;
-    case 121:
+    case 118:
       name= "events_statements_summary_by_user_by_event_name.memory";
       size= user_max * statement_class_max * sizeof(PFS_statement_stat);
       total_memory+= size;
       break;
-    case 122:
+    case 119:
       name= "events_statements_summary_by_host_by_event_name.row_size";
       size= sizeof(PFS_statement_stat);
       break;
-    case 123:
+    case 120:
       name= "events_statements_summary_by_host_by_event_name.row_count";
       size= host_max * statement_class_max;
       break;
-    case 124:
+    case 121:
       name= "events_statements_summary_by_host_by_event_name.memory";
       size= host_max * statement_class_max * sizeof(PFS_statement_stat);
       total_memory+= size;
       break;
-    case 125:
+    case 122:
       name= "events_statements_current.row_size";
       size= sizeof(PFS_events_statements);
       break;
-    case 126:
+    case 123:
       name= "events_statements_current.row_count";
       size= thread_max * statement_stack_max;
       break;
-    case 127:
+    case 124:
       name= "events_statements_current.memory";
       size= thread_max * statement_stack_max * sizeof(PFS_events_statements);
       total_memory+= size;
       break;
-    case 128:
+    case 125:
       name= "(pfs_socket_class).row_size";
       size= sizeof(PFS_socket_class);
       break;
-    case 129:
+    case 126:
       name= "(pfs_socket_class).row_count";
       size= socket_class_max;
       break;
-    case 130:
+    case 127:
       name= "(pfs_socket_class).memory";
       size= socket_class_max * sizeof(PFS_socket_class);
       total_memory+= size;
       break;
-    case 131:
+    case 128:
       name= "socket_instances.row_size";
       size= sizeof(PFS_socket);
       break;
-    case 132:
+    case 129:
       name= "socket_instances.row_count";
       size= socket_max;
       break;
-    case 133:
+    case 130:
       name= "socket_instances.memory";
       size= socket_max * sizeof(PFS_socket);
       total_memory+= size;
       break;
-    case 134:
+    case 131:
       name= "events_statements_summary_by_digest.row_size";
       size= sizeof(PFS_statements_digest_stat);
       break;
-    case 135:
+    case 132:
       name= "events_statements_summary_by_digest.row_count";
       size= digest_max;
       break;
-    case 136:
+    case 133:
       name= "events_statements_summary_by_digest.memory";
       size= digest_max * sizeof(PFS_statements_digest_stat);
       total_memory+= size;
-      break;    
+      break;
+    case 134:
+      name= "session_connect_attrs.row_size";
+      size= thread_max;
+      break;
+    case 135:
+      name= "session_connect_attrs.row_count";
+      size= session_connect_attrs_size_per_thread;
+      break;
+    case 136:
+      name= "session_connect_attrs.memory";
+      size= thread_max * session_connect_attrs_size_per_thread;
+      total_memory+= size;
+      break;
+
+    case 137:
+      name= "(account_hash).count";
+      size= account_hash.count;
+      break;
+    case 138:
+      name= "(account_hash).size";
+      size= account_hash.size;
+      break;
+    case 139:
+      name= "(digest_hash).count";
+      size= digest_hash.count;
+      break;
+    case 140:
+      name= "(digest_hash).size";
+      size= digest_hash.size;
+      break;
+    case 141:
+      name= "(filename_hash).count";
+      size= filename_hash.count;
+      break;
+    case 142:
+      name= "(filename_hash).size";
+      size= filename_hash.size;
+      break;
+    case 143:
+      name= "(host_hash).count";
+      size= host_hash.count;
+      break;
+    case 144:
+      name= "(host_hash).size";
+      size= host_hash.size;
+      break;
+    case 145:
+      name= "(setup_actor_hash).count";
+      size= setup_actor_hash.count;
+      break;
+    case 146:
+      name= "(setup_actor_hash).size";
+      size= setup_actor_hash.size;
+      break;
+    case 147:
+      name= "(setup_object_hash).count";
+      size= setup_object_hash.count;
+      break;
+    case 148:
+      name= "(setup_object_hash).size";
+      size= setup_object_hash.size;
+      break;
+    case 149:
+      name= "(table_share_hash).count";
+      size= table_share_hash.count;
+      break;
+    case 150:
+      name= "(table_share_hash).size";
+      size= table_share_hash.size;
+      break;
+    case 151:
+      name= "(user_hash).count";
+      size= user_hash.count;
+      break;
+    case 152:
+      name= "(user_hash).size";
+      size= user_hash.size;
+      break;
+    case 153:
+      /*
+        This is not a performance_schema buffer,
+        the data is maintained in the server,
+        in hostname_cache.
+        Print the size only, there are:
+        - no host_cache.count
+        - no host_cache.memory
+      */
+      name= "host_cache.size";
+#ifdef NOT_YET_IMPLEMENTED
+      size= sizeof(Host_entry);
+#else
+      size= 0;
+#endif
+      break;
+
     /*
       This case must be last,
       for aggregation in total_memory.
     */
-    case 137:
+    case 154:
       name= "performance_schema.memory";
       size= total_memory;
       /* This will fail if something is not advertised here */
diff --git a/storage/perfschema/pfs_engine_table.h b/storage/perfschema/pfs_engine_table.h
index 40f5404d0b7..981d72ee19e 100644
--- a/storage/perfschema/pfs_engine_table.h
+++ b/storage/perfschema/pfs_engine_table.h
@@ -263,7 +263,7 @@ public:
   ~PFS_readonly_acl()
   {}
 
-  ACL_internal_access_result check(ulong want_access, ulong *save_priv) const;
+  virtual ACL_internal_access_result check(ulong want_access, ulong *save_priv) const;
 };
 
 /** Singleton instance of PFS_readonly_acl. */
diff --git a/storage/perfschema/pfs_events.h b/storage/perfschema/pfs_events.h
index c9586df11bd..97fb7e08d63 100644
--- a/storage/perfschema/pfs_events.h
+++ b/storage/perfschema/pfs_events.h
@@ -29,7 +29,7 @@ struct PFS_instr_class;
 struct PFS_events
 {
   /** THREAD_ID. */
-  ulong m_thread_internal_id;
+  ulonglong m_thread_internal_id;
   /** EVENT_ID. */
   ulonglong m_event_id;
   /** END_EVENT_ID. */
diff --git a/storage/perfschema/pfs_events_waits.cc b/storage/perfschema/pfs_events_waits.cc
index 2ee9ec292a2..c8a9d20a2f1 100644
--- a/storage/perfschema/pfs_events_waits.cc
+++ b/storage/perfschema/pfs_events_waits.cc
@@ -230,16 +230,6 @@ void reset_events_waits_by_host()
   }
 }
 
-/** Reset table EVENTS_WAITS_GLOBAL_BY_EVENT_NAME data. */
-void reset_events_waits_global()
-{
-  PFS_single_stat *stat= global_instr_class_waits_array;
-  PFS_single_stat *stat_last= global_instr_class_waits_array + wait_class_max;
-
-  for ( ; stat < stat_last; stat++)
-    stat->reset();
-}
-
 void reset_table_waits_by_table()
 {
   PFS_table_share *pfs= table_share_array;
diff --git a/storage/perfschema/pfs_global.cc b/storage/perfschema/pfs_global.cc
index 6c3b79a3e1f..0c022b85748 100644
--- a/storage/perfschema/pfs_global.cc
+++ b/storage/perfschema/pfs_global.cc
@@ -18,13 +18,16 @@
   Miscellaneous global dependencies (implementation).
 */
 
-#include "my_global.h"
-#include "my_sys.h"
 #include "pfs_global.h"
-#include "my_net.h"
+#include <my_sys.h>
+#include <my_net.h>
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>                             /* memalign() may be here */
+#endif
 
-#include <stdlib.h>
-#include <string.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
 
 #ifdef __WIN__
   #include <winsock2.h>
@@ -45,18 +48,65 @@ void *pfs_malloc(size_t size, myf flags)
   DBUG_ASSERT(! pfs_initialized);
   DBUG_ASSERT(size > 0);
 
-  void *ptr= malloc(size);
-  if (likely(ptr != NULL))
-    pfs_allocated_memory+= size;
-  if (likely((ptr != NULL) && (flags & MY_ZEROFILL)))
+  void *ptr;
+
+#ifdef PFS_ALIGNEMENT
+#ifdef HAVE_POSIX_MEMALIGN
+  /* Linux */
+  if (unlikely(posix_memalign(& ptr, PFS_ALIGNEMENT, size)))
+    return NULL;
+#else
+#ifdef HAVE_MEMALIGN
+  /* Solaris */
+  ptr= memalign(PFS_ALIGNEMENT, size);
+  if (unlikely(ptr == NULL))
+    return NULL;
+#else
+#ifdef HAVE_ALIGNED_MALLOC
+  /* Windows */
+  ptr= _aligned_malloc(size, PFS_ALIGNEMENT);
+  if (unlikely(ptr == NULL))
+    return NULL;
+#else
+#error "Missing implementation for PFS_ALIGNENT"
+#endif /* HAVE_ALIGNED_MALLOC */
+#endif /* HAVE_MEMALIGN */
+#endif /* HAVE_POSIX_MEMALIGN */
+#else /* PFS_ALIGNMENT */
+  /* Everything else */
+  ptr= malloc(size);
+  if (unlikely(ptr == NULL))
+    return NULL;
+#endif
+
+  pfs_allocated_memory+= size;
+  if (flags & MY_ZEROFILL)
     memset(ptr, 0, size);
   return ptr;
 }
 
 void pfs_free(void *ptr)
 {
-  if (ptr != NULL)
-    free(ptr);
+  if (ptr == NULL)
+    return;
+
+#ifdef HAVE_POSIX_MEMALIGN
+  /* Allocated with posix_memalign() */
+  free(ptr);
+#else
+#ifdef HAVE_MEMALIGN
+  /* Allocated with memalign() */
+  free(ptr);
+#else
+#ifdef HAVE_ALIGNED_MALLOC
+  /* Allocated with _aligned_malloc() */
+  _aligned_free(ptr);
+#else
+  /* Allocated with malloc() */
+  free(ptr);
+#endif /* HAVE_ALIGNED_MALLOC */
+#endif /* HAVE_MEMALIGN */
+#endif /* HAVE_POSIX_MEMALIGN */
 }
 
 void pfs_print_error(const char *format, ...)
diff --git a/storage/perfschema/pfs_global.h b/storage/perfschema/pfs_global.h
index 693153cb097..cddf688ddf4 100644
--- a/storage/perfschema/pfs_global.h
+++ b/storage/perfschema/pfs_global.h
@@ -16,6 +16,9 @@
 #ifndef PFS_GLOBAL_H
 #define PFS_GLOBAL_H
 
+#include "my_global.h"
+#include "my_compiler.h"
+
 /**
   @file storage/perfschema/pfs_global.h
   Miscellaneous global dependencies (declarations).
@@ -26,6 +29,18 @@ extern bool pfs_initialized;
 /** Total memory allocated by the performance schema, in bytes. */
 extern ulonglong pfs_allocated_memory;
 
+#if defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN) || defined(HAVE_ALIGNED_MALLOC)
+#define PFS_ALIGNEMENT 64
+#define PFS_ALIGNED MY_ALIGNED(PFS_ALIGNEMENT)
+#else
+/*
+  Known platforms that do not provide aligned memory:
+  - MacOSX Darwin (osx10.5)
+  For these platforms, compile without the alignment optimization.
+*/
+#define PFS_ALIGNED
+#endif /* HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_MALLOC */
+
 void *pfs_malloc(size_t size, myf flags);
 
 /**
diff --git a/storage/perfschema/pfs_host.cc b/storage/perfschema/pfs_host.cc
index 82b78e19ce8..09763b0bd8b 100644
--- a/storage/perfschema/pfs_host.cc
+++ b/storage/perfschema/pfs_host.cc
@@ -42,7 +42,7 @@ static PFS_single_stat *host_instr_class_waits_array= NULL;
 static PFS_stage_stat *host_instr_class_stages_array= NULL;
 static PFS_statement_stat *host_instr_class_statements_array= NULL;
 
-static LF_HASH host_hash;
+LF_HASH host_hash;
 static bool host_hash_inited= false;
 
 /**
@@ -146,10 +146,11 @@ C_MODE_END
 */
 int init_host_hash(void)
 {
-  if (! host_hash_inited)
+  if ((! host_hash_inited) && (host_max > 0))
   {
     lf_hash_init(&host_hash, sizeof(PFS_host*), LF_HASH_UNIQUE,
                  0, 0, host_hash_get_key, &my_charset_bin);
+    host_hash.size= host_max;
     host_hash_inited= true;
   }
   return 0;
diff --git a/storage/perfschema/pfs_host.h b/storage/perfschema/pfs_host.h
index d04b88e62f3..eb0ff6efc6f 100644
--- a/storage/perfschema/pfs_host.h
+++ b/storage/perfschema/pfs_host.h
@@ -44,7 +44,7 @@ struct PFS_host_key
   uint m_key_length;
 };
 
-struct PFS_host : PFS_connection_slice
+struct PFS_ALIGNED PFS_host : PFS_connection_slice
 {
 public:
   inline void init_refcount(void)
@@ -105,6 +105,8 @@ extern ulong host_lost;
 
 extern PFS_host *host_array;
 
+extern LF_HASH host_hash;
+
 /** @} */
 #endif
 
diff --git a/storage/perfschema/pfs_instr.cc b/storage/perfschema/pfs_instr.cc
index 39caabaf030..25e78ee7b5e 100644
--- a/storage/perfschema/pfs_instr.cc
+++ b/storage/perfschema/pfs_instr.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -82,6 +82,10 @@ uint statement_stack_max;
 ulong locker_lost= 0;
 /** Number of statement lost. @sa STATEMENT_STACK_SIZE. */
 ulong statement_lost= 0;
+/** Size of connection attribute storage per thread */
+ulong session_connect_attrs_size_per_thread;
+/** Number of connection attributes lost */
+ulong session_connect_attrs_lost= 0;
 
 /**
   Mutex instrumentation instances array.
@@ -140,11 +144,10 @@ PFS_table *table_array= NULL;
 */
 PFS_socket *socket_array= NULL;
 
-PFS_single_stat *global_instr_class_waits_array= NULL;
 PFS_stage_stat *global_instr_class_stages_array= NULL;
 PFS_statement_stat *global_instr_class_statements_array= NULL;
 
-static volatile uint32 thread_internal_id_counter= 0;
+static volatile uint64 thread_internal_id_counter= 0;
 
 static uint thread_instr_class_waits_sizing;
 static uint thread_instr_class_stages_sizing;
@@ -157,9 +160,10 @@ static PFS_events_waits *thread_waits_history_array= NULL;
 static PFS_events_stages *thread_stages_history_array= NULL;
 static PFS_events_statements *thread_statements_history_array= NULL;
 static PFS_events_statements *thread_statements_stack_array= NULL;
+static char *thread_session_connect_attrs_array= NULL;
 
 /** Hash table for instrumented files. */
-static LF_HASH filename_hash;
+LF_HASH filename_hash;
 /** True if filename_hash is initialized. */
 static bool filename_hash_inited= false;
 
@@ -174,6 +178,7 @@ int init_instruments(const PFS_global_param *param)
   uint thread_stages_history_sizing;
   uint thread_statements_history_sizing;
   uint thread_statements_stack_sizing;
+  uint thread_session_connect_attrs_sizing;
   uint index;
   DBUG_ENTER("init_instruments");
 
@@ -221,6 +226,11 @@ int init_instruments(const PFS_global_param *param)
   thread_instr_class_statements_sizing= param->m_thread_sizing
     * param->m_statement_class_sizing;
 
+  session_connect_attrs_size_per_thread= param->m_session_connect_attrs_sizing;
+  thread_session_connect_attrs_sizing= param->m_thread_sizing
+    * session_connect_attrs_size_per_thread;
+  session_connect_attrs_lost= 0;
+
   mutex_array= NULL;
   rwlock_array= NULL;
   cond_array= NULL;
@@ -366,6 +376,14 @@ int init_instruments(const PFS_global_param *param)
       thread_instr_class_statements_array[index].reset();
   }
 
+  if (thread_session_connect_attrs_sizing > 0)
+  {
+    thread_session_connect_attrs_array=
+      (char *)pfs_malloc(thread_session_connect_attrs_sizing, MYF(MY_ZEROFILL));
+    if (unlikely(thread_session_connect_attrs_array == NULL))
+      return 1;
+  }
+
   for (index= 0; index < thread_max; index++)
   {
     thread_array[index].m_waits_history=
@@ -382,18 +400,8 @@ int init_instruments(const PFS_global_param *param)
       &thread_statements_stack_array[index * statement_stack_max];
     thread_array[index].m_instr_class_statements_stats=
       &thread_instr_class_statements_array[index * statement_class_max];
-  }
-
-  if (wait_class_max > 0)
-  {
-    global_instr_class_waits_array=
-      PFS_MALLOC_ARRAY(wait_class_max,
-                       PFS_single_stat, MYF(MY_ZEROFILL));
-    if (unlikely(global_instr_class_waits_array == NULL))
-      DBUG_RETURN(1);
-
-    for (index= 0; index < wait_class_max; index++)
-      global_instr_class_waits_array[index].reset();
+    thread_array[index].m_session_connect_attrs=
+      &thread_session_connect_attrs_array[index * session_connect_attrs_size_per_thread];
   }
 
   if (stage_class_max > 0)
@@ -461,8 +469,6 @@ void cleanup_instruments(void)
   thread_statements_stack_array= NULL;
   pfs_free(thread_instr_class_waits_array);
   thread_instr_class_waits_array= NULL;
-  pfs_free(global_instr_class_waits_array);
-  global_instr_class_waits_array= NULL;
   pfs_free(global_instr_class_stages_array);
   global_instr_class_stages_array= NULL;
   pfs_free(global_instr_class_statements_array);
@@ -471,6 +477,9 @@ void cleanup_instruments(void)
   thread_instr_class_statements_array= NULL;
   pfs_free(thread_instr_class_stages_array);
   thread_instr_class_stages_array= NULL;
+  pfs_free(thread_session_connect_attrs_array);
+  thread_session_connect_attrs_array=NULL;
+
   DBUG_VOID_RETURN;
 }
 
@@ -502,10 +511,11 @@ int init_file_hash(void)
 {
   DBUG_ENTER("init_file_hash");
 
-  if (! filename_hash_inited)
+  if ((! filename_hash_inited) && (file_max > 0))
   {
     lf_hash_init(&filename_hash, sizeof(PFS_file*), LF_HASH_UNIQUE,
                  0, 0, filename_hash_get_key, &my_charset_bin);
+    filename_hash.size= file_max;
     filename_hash_inited= true;
   }
   DBUG_RETURN(0);
@@ -604,7 +614,7 @@ void PFS_scan::init(uint random, uint max_size)
 */
 PFS_mutex* create_mutex(PFS_mutex_class *klass, const void *identity)
 {
-  static uint mutex_monotonic_index= 0;
+  static uint PFS_ALIGNED mutex_monotonic_index= 0;
   uint index;
   uint attempts= 0;
   PFS_mutex *pfs;
@@ -642,8 +652,7 @@ PFS_mutex* create_mutex(PFS_mutex_class *klass, const void *identity)
         pfs->m_class= klass;
         pfs->m_enabled= klass->m_enabled && flag_global_instrumentation;
         pfs->m_timed= klass->m_timed;
-        pfs->m_wait_stat.reset();
-        pfs->m_lock_stat.reset();
+        pfs->m_mutex_stat.reset();
         pfs->m_owner= NULL;
         pfs->m_last_locked= 0;
         pfs->m_lock.dirty_to_allocated();
@@ -667,10 +676,9 @@ void destroy_mutex(PFS_mutex *pfs)
   DBUG_ENTER("destroy_mutex");
   DBUG_ASSERT(pfs != NULL);
   PFS_mutex_class *klass= pfs->m_class;
-  /* Aggregate to EVENTS_WAITS_SUMMARY_BY_EVENT_NAME */
-  uint index= klass->m_event_name_index;
-  global_instr_class_waits_array[index].aggregate(& pfs->m_wait_stat);
-  pfs->m_wait_stat.reset();
+  /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME */
+  klass->m_mutex_stat.aggregate(& pfs->m_mutex_stat);
+  pfs->m_mutex_stat.reset();
   if (klass->is_singleton())
     klass->m_singleton= NULL;
   pfs->m_lock.allocated_to_free();
@@ -685,7 +693,7 @@ void destroy_mutex(PFS_mutex *pfs)
 */
 PFS_rwlock* create_rwlock(PFS_rwlock_class *klass, const void *identity)
 {
-  static uint rwlock_monotonic_index= 0;
+  static uint PFS_ALIGNED rwlock_monotonic_index= 0;
   uint index;
   uint attempts= 0;
   PFS_rwlock *pfs;
@@ -705,10 +713,8 @@ PFS_rwlock* create_rwlock(PFS_rwlock_class *klass, const void *identity)
         pfs->m_class= klass;
         pfs->m_enabled= klass->m_enabled && flag_global_instrumentation;
         pfs->m_timed= klass->m_timed;
-        pfs->m_wait_stat.reset();
+        pfs->m_rwlock_stat.reset();
         pfs->m_lock.dirty_to_allocated();
-        pfs->m_read_lock_stat.reset();
-        pfs->m_write_lock_stat.reset();
         pfs->m_writer= NULL;
         pfs->m_readers= 0;
         pfs->m_last_written= 0;
@@ -733,10 +739,9 @@ void destroy_rwlock(PFS_rwlock *pfs)
   DBUG_ENTER("destroy_rwlock");
   DBUG_ASSERT(pfs != NULL);
   PFS_rwlock_class *klass= pfs->m_class;
-  /* Aggregate to EVENTS_WAITS_SUMMARY_BY_EVENT_NAME */
-  uint index= klass->m_event_name_index;
-  global_instr_class_waits_array[index].aggregate(& pfs->m_wait_stat);
-  pfs->m_wait_stat.reset();
+  /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME */
+  klass->m_rwlock_stat.aggregate(& pfs->m_rwlock_stat);
+  pfs->m_rwlock_stat.reset();
   if (klass->is_singleton())
     klass->m_singleton= NULL;
   pfs->m_lock.allocated_to_free();
@@ -751,7 +756,7 @@ void destroy_rwlock(PFS_rwlock *pfs)
 */
 PFS_cond* create_cond(PFS_cond_class *klass, const void *identity)
 {
-  static uint cond_monotonic_index= 0;
+  static uint PFS_ALIGNED cond_monotonic_index= 0;
   uint index;
   uint attempts= 0;
   PFS_cond *pfs;
@@ -796,9 +801,8 @@ void destroy_cond(PFS_cond *pfs)
 
   DBUG_ASSERT(pfs != NULL);
   PFS_cond_class *klass= pfs->m_class;
-  /* Aggregate to EVENTS_WAITS_SUMMARY_BY_EVENT_NAME */
-  uint index= klass->m_event_name_index;
-  global_instr_class_waits_array[index].aggregate(& pfs->m_wait_stat);
+  /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME */
+  klass->m_cond_stat.aggregate(& pfs->m_cond_stat);
   pfs->m_wait_stat.reset();
   if (klass->is_singleton())
     klass->m_singleton= NULL;
@@ -812,19 +816,32 @@ PFS_thread* PFS_thread::get_current_thread()
   return pfs;
 }
 
+void PFS_thread::reset_session_connect_attrs()
+{
+  m_session_connect_attrs_length= 0;
+  m_session_connect_attrs_cs= NULL;
+
+  if ((m_session_connect_attrs != NULL) &&
+      (session_connect_attrs_size_per_thread > 0) )
+  {
+    /* Do not keep user data */
+    memset(m_session_connect_attrs, 0, session_connect_attrs_size_per_thread);
+  }
+}
+
 /**
   Create instrumentation for a thread instance.
   @param klass                        the thread class
   @param identity                     the thread address,
     or a value characteristic of this thread
-  @param thread_id                    the PROCESSLIST thread id,
+  @param processlist_id               the PROCESSLIST id,
     or 0 if unknown
   @return a thread instance, or NULL
 */
 PFS_thread* create_thread(PFS_thread_class *klass, const void *identity,
-                          ulong thread_id)
+                          ulonglong processlist_id)
 {
-  static uint thread_monotonic_index= 0;
+  static uint PFS_ALIGNED thread_monotonic_index= 0;
   uint index;
   uint attempts= 0;
   PFS_thread *pfs;
@@ -841,9 +858,9 @@ PFS_thread* create_thread(PFS_thread_class *klass, const void *identity,
       if (pfs->m_lock.free_to_dirty())
       {
         pfs->m_thread_internal_id=
-          PFS_atomic::add_u32(&thread_internal_id_counter, 1);
+          PFS_atomic::add_u64(&thread_internal_id_counter, 1);
         pfs->m_parent_thread_internal_id= 0;
-        pfs->m_thread_id= thread_id;
+        pfs->m_processlist_id= processlist_id;
         pfs->m_event_id= 1;
         pfs->m_enabled= true;
         pfs->m_class= klass;
@@ -856,6 +873,7 @@ PFS_thread* create_thread(PFS_thread_class *klass, const void *identity,
         pfs->m_statements_history_index= 0;
 
         pfs->reset_stats();
+        pfs->reset_session_connect_attrs();
 
         pfs->m_filename_hash_pins= NULL;
         pfs->m_table_share_hash_pins= NULL;
@@ -871,8 +889,11 @@ PFS_thread* create_thread(PFS_thread_class *klass, const void *identity,
         pfs->m_dbname_length= 0;
         pfs->m_command= 0;
         pfs->m_start_time= 0;
+        pfs->m_processlist_state_ptr= NULL;
         pfs->m_processlist_state_length= 0;
+        pfs->m_processlist_info_ptr= NULL;
         pfs->m_processlist_info_length= 0;
+        pfs->m_processlist_lock.set_allocated();
 
         pfs->m_host= NULL;
         pfs->m_user= NULL;
@@ -999,6 +1020,7 @@ PFS_socket *sanitize_socket(PFS_socket *unsafe)
 void destroy_thread(PFS_thread *pfs)
 {
   DBUG_ASSERT(pfs != NULL);
+  pfs->reset_session_connect_attrs();
   if (pfs->m_account != NULL)
   {
     pfs->m_account->release();
@@ -1084,11 +1106,12 @@ LF_PINS* get_filename_hash_pins(PFS_thread *thread)
   @param klass                        the file class
   @param filename                     the file name
   @param len                          the length in bytes of filename
+  @param create                       create a file instance if none found
   @return a file instance, or NULL
 */
 PFS_file*
 find_or_create_file(PFS_thread *thread, PFS_file_class *klass,
-                    const char *filename, uint len)
+                    const char *filename, uint len, bool create)
 {
   PFS_file *pfs;
   LF_PINS *pins;
@@ -1096,6 +1119,8 @@ find_or_create_file(PFS_thread *thread, PFS_file_class *klass,
   const char *safe_filename;
   DBUG_ENTER("find_or_create_file");
 
+  DBUG_ASSERT(klass != NULL || ! create);
+
   pins= get_filename_hash_pins(thread);
   if (unlikely(pins == NULL))
   {
@@ -1171,7 +1196,7 @@ find_or_create_file(PFS_thread *thread, PFS_file_class *klass,
   /* Append the unresolved file name to the resolved path */
   char *ptr= buffer + strlen(buffer);
   char *buf_end= &buffer[sizeof(buffer)-1];
-  if (buf_end > ptr)
+  if ((buf_end > ptr) && (*(ptr-1) != FN_LIBCHAR))
     *ptr++= FN_LIBCHAR;
   if (buf_end > ptr)
     strncpy(ptr, safe_filename + dirlen, buf_end - ptr);
@@ -1183,7 +1208,7 @@ find_or_create_file(PFS_thread *thread, PFS_file_class *klass,
   PFS_file **entry;
   uint retry_count= 0;
   const uint retry_max= 3;
-  static uint file_monotonic_index= 0;
+  static uint PFS_ALIGNED file_monotonic_index= 0;
   uint index;
   uint attempts= 0;
 
@@ -1202,6 +1227,12 @@ search:
 
   lf_hash_search_unpin(pins);
 
+  if (! create)
+  {
+    /* No lost counter, just looking for the file existence. */
+    return NULL;
+  }
+
   while (++attempts <= file_max)
   {
     /* See create_mutex() */
@@ -1218,7 +1249,6 @@ search:
         strncpy(pfs->m_filename, normalized_filename, normalized_length);
         pfs->m_filename[normalized_length]= '\0';
         pfs->m_filename_length= normalized_length;
-        pfs->m_wait_stat.reset();
         pfs->m_file_stat.m_open_count= 1;
         pfs->m_file_stat.m_io_stat.reset();
         pfs->m_identity= (const void *)pfs;
@@ -1285,14 +1315,9 @@ void destroy_file(PFS_thread *thread, PFS_file *pfs)
   DBUG_ASSERT(pfs != NULL);
   PFS_file_class *klass= pfs->m_class;
 
-  /* Aggregate to EVENTS_WAITS_SUMMARY_BY_EVENT_NAME */
-  uint index= klass->m_event_name_index;
-  global_instr_class_waits_array[index].aggregate(& pfs->m_wait_stat);
-  pfs->m_wait_stat.reset();
-
   /* Aggregate to FILE_SUMMARY_BY_EVENT_NAME */
-  klass->m_file_stat.m_io_stat.aggregate(& pfs->m_file_stat.m_io_stat);
-  pfs->m_file_stat.m_io_stat.reset();
+  klass->m_file_stat.aggregate(& pfs->m_file_stat);
+  pfs->m_file_stat.reset();
 
   if (klass->is_singleton())
     klass->m_singleton= NULL;
@@ -1318,7 +1343,7 @@ void destroy_file(PFS_thread *thread, PFS_file *pfs)
 PFS_table* create_table(PFS_table_share *share, PFS_thread *opening_thread,
                         const void *identity)
 {
-  static uint table_monotonic_index= 0;
+  static uint PFS_ALIGNED table_monotonic_index= 0;
   uint index;
   uint attempts= 0;
   PFS_table *pfs;
@@ -1364,23 +1389,33 @@ void PFS_table::sanitized_aggregate(void)
     and not own the table handle.
   */
   PFS_table_share *safe_share= sanitize_table_share(m_share);
-  PFS_thread *safe_thread= sanitize_thread(m_thread_owner);
-  if ((safe_share != NULL && safe_thread != NULL) &&
-      (m_has_io_stats || m_has_lock_stats))
+  if (safe_share != NULL)
   {
-    safe_aggregate(& m_table_stat, safe_share, safe_thread);
-    m_has_io_stats= false;
-    m_has_lock_stats= false;
+    if (m_has_io_stats && m_has_lock_stats)
+    {
+      safe_aggregate(& m_table_stat, safe_share);
+      m_has_io_stats= false;
+      m_has_lock_stats= false;
+    }
+    else if (m_has_io_stats)
+    {
+      safe_aggregate_io(& m_table_stat, safe_share);
+      m_has_io_stats= false;
+    }
+    else if (m_has_lock_stats)
+    {
+      safe_aggregate_lock(& m_table_stat, safe_share);
+      m_has_lock_stats= false;
+    }
   }
 }
 
 void PFS_table::sanitized_aggregate_io(void)
 {
   PFS_table_share *safe_share= sanitize_table_share(m_share);
-  PFS_thread *safe_thread= sanitize_thread(m_thread_owner);
-  if (safe_share != NULL && safe_thread != NULL && m_has_io_stats)
+  if (safe_share != NULL && m_has_io_stats)
   {
-    safe_aggregate_io(& m_table_stat, safe_share, safe_thread);
+    safe_aggregate_io(& m_table_stat, safe_share);
     m_has_io_stats= false;
   }
 }
@@ -1388,96 +1423,44 @@ void PFS_table::sanitized_aggregate_io(void)
 void PFS_table::sanitized_aggregate_lock(void)
 {
   PFS_table_share *safe_share= sanitize_table_share(m_share);
-  PFS_thread *safe_thread= sanitize_thread(m_thread_owner);
-  if (safe_share != NULL && safe_thread != NULL && m_has_lock_stats)
+  if (safe_share != NULL && m_has_lock_stats)
   {
-    safe_aggregate_lock(& m_table_stat, safe_share, safe_thread);
+    safe_aggregate_lock(& m_table_stat, safe_share);
     m_has_lock_stats= false;
   }
 }
 
 void PFS_table::safe_aggregate(PFS_table_stat *table_stat,
-                               PFS_table_share *table_share,
-                               PFS_thread *thread)
+                               PFS_table_share *table_share)
 {
   DBUG_ASSERT(table_stat != NULL);
   DBUG_ASSERT(table_share != NULL);
-  DBUG_ASSERT(thread != NULL);
-
-  if (flag_thread_instrumentation && thread->m_enabled)
-  {
-    PFS_single_stat *event_name_array;
-    uint index;
-    event_name_array= thread->m_instr_class_waits_stats;
 
-    /*
-      Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
-      (for wait/io/table/sql/handler)
-    */
-    index= global_table_io_class.m_event_name_index;
-    table_stat->sum_io(& event_name_array[index]);
-
-    /*
-      Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
-      (for wait/lock/table/sql/handler)
-    */
-    index= global_table_lock_class.m_event_name_index;
-    table_stat->sum_lock(& event_name_array[index]);
-  }
+  uint key_count= sanitize_index_count(table_share->m_key_count);
 
   /* Aggregate to TABLE_IO_SUMMARY, TABLE_LOCK_SUMMARY */
-  table_share->m_table_stat.aggregate(table_stat);
+  table_share->m_table_stat.aggregate(table_stat, key_count);
   table_stat->fast_reset();
 }
 
 void PFS_table::safe_aggregate_io(PFS_table_stat *table_stat,
-                                  PFS_table_share *table_share,
-                                  PFS_thread *thread)
+                                  PFS_table_share *table_share)
 {
   DBUG_ASSERT(table_stat != NULL);
   DBUG_ASSERT(table_share != NULL);
-  DBUG_ASSERT(thread != NULL);
-
-  if (flag_thread_instrumentation && thread->m_enabled)
-  {
-    PFS_single_stat *event_name_array;
-    uint index;
-    event_name_array= thread->m_instr_class_waits_stats;
 
-    /*
-      Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
-      (for wait/io/table/sql/handler)
-    */
-    index= global_table_io_class.m_event_name_index;
-    table_stat->sum_io(& event_name_array[index]);
-  }
+  uint key_count= sanitize_index_count(table_share->m_key_count);
 
   /* Aggregate to TABLE_IO_SUMMARY */
-  table_share->m_table_stat.aggregate_io(table_stat);
+  table_share->m_table_stat.aggregate_io(table_stat, key_count);
   table_stat->fast_reset_io();
 }
 
 void PFS_table::safe_aggregate_lock(PFS_table_stat *table_stat,
-                                    PFS_table_share *table_share,
-                                    PFS_thread *thread)
+                                    PFS_table_share *table_share)
 {
   DBUG_ASSERT(table_stat != NULL);
   DBUG_ASSERT(table_share != NULL);
-  DBUG_ASSERT(thread != NULL);
-
-  if (flag_thread_instrumentation && thread->m_enabled)
-  {
-    PFS_single_stat *event_name_array;
-    uint index;
-    event_name_array= thread->m_instr_class_waits_stats;
-
-    /*
-      Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
-      (for wait/lock/table/sql/handler)
-    */
-    index= global_table_lock_class.m_event_name_index;
-    table_stat->sum_lock(& event_name_array[index]);
-  }
 
   /* Aggregate to TABLE_LOCK_SUMMARY */
   table_share->m_table_stat.aggregate_lock(table_stat);
@@ -1504,47 +1487,59 @@ void destroy_table(PFS_table *pfs)
   @param identity                     the socket descriptor
   @return a socket instance, or NULL
 */
-PFS_socket* create_socket(PFS_socket_class *klass, const void *identity)
+PFS_socket* create_socket(PFS_socket_class *klass, const my_socket *fd,
+                          const struct sockaddr *addr, socklen_t addr_len)
 {
-  PFS_scan scan;
+  static uint PFS_ALIGNED socket_monotonic_index= 0;
+  uint index;
+  uint attempts= 0;
+  PFS_socket *pfs;
   DBUG_ENTER("create_socket");
 
-  /**
-    Unlike other instrumented objects, there is no socket 'object' to use as a
-    unique identifier. Instead, a pointer to the PFS_socket object will be used
-    to identify this socket instance. The socket descriptor will be used to
-    seed the the random index assignment.
-    */
-  my_socket fd= likely(identity != NULL) ?
-                *(reinterpret_cast<const my_socket*>(identity)) : 0;
-  my_ptrdiff_t ptr= fd;
-  uint random= randomized_index((const void *)ptr, socket_max);
-
-  for (scan.init(random, socket_max);
-       scan.has_pass();
-       scan.next_pass())
-  {
-    PFS_socket *pfs= socket_array + scan.first();
-    PFS_socket *pfs_last= socket_array + scan.last();
-    for ( ; pfs < pfs_last; pfs++)
+  uint fd_used= 0;
+  uint addr_len_used= addr_len;
+
+  if (fd != NULL)
+    fd_used= *fd;
+
+  if (addr_len_used > sizeof(sockaddr_storage))
+    addr_len_used= sizeof(sockaddr_storage);
+
+  while (++attempts <= socket_max)
+  {
+    index= PFS_atomic::add_u32(& socket_monotonic_index, 1) % socket_max;
+    pfs= socket_array + index;
+
+    if (pfs->m_lock.is_free())
     {
-      if (pfs->m_lock.is_free())
+      if (pfs->m_lock.free_to_dirty())
       {
-        if (pfs->m_lock.free_to_dirty())
+        pfs->m_fd= fd_used;
+        /* There is no socket object, so we use the instrumentation. */
+        pfs->m_identity= pfs;
+        pfs->m_class= klass;
+        pfs->m_enabled= klass->m_enabled && flag_global_instrumentation;
+        pfs->m_timed= klass->m_timed;
+        pfs->m_idle= false;
+        pfs->m_socket_stat.reset();
+        pfs->m_thread_owner= NULL;
+
+        pfs->m_addr_len= addr_len_used;
+        if ((addr != NULL) && (addr_len_used > 0))
         {
-          pfs->m_fd= fd;
-          pfs->m_identity= pfs;
-          pfs->m_class= klass;
-          pfs->m_enabled= klass->m_enabled && flag_global_instrumentation;
-          pfs->m_timed= klass->m_timed;
-          pfs->m_idle= false;
-          pfs->m_socket_stat.reset();
-          pfs->m_lock.dirty_to_allocated();
-          pfs->m_thread_owner= NULL;
-          if (klass->is_singleton())
-            klass->m_singleton= pfs;
-          DBUG_RETURN(pfs);
+          pfs->m_addr_len= addr_len_used;
+          memcpy(&pfs->m_sock_addr, addr, addr_len_used);
         }
+        else
+        {
+          pfs->m_addr_len= 0;
+        }
+
+        pfs->m_lock.dirty_to_allocated();
+
+        if (klass->is_singleton())
+          klass->m_singleton= pfs;
+        DBUG_RETURN(pfs);
       }
     }
   }
@@ -1598,7 +1593,7 @@ static void reset_mutex_waits_by_instance(void)
   DBUG_ENTER("reset_mutex_waits_by_instance");
 
   for ( ; pfs < pfs_last; pfs++)
-    pfs->m_wait_stat.reset();
+    pfs->m_mutex_stat.reset();
   DBUG_VOID_RETURN;
 }
 
@@ -1609,7 +1604,7 @@ static void reset_rwlock_waits_by_instance(void)
   DBUG_ENTER("reset_rwlock_waits_by_instance");
 
   for ( ; pfs < pfs_last; pfs++)
-    pfs->m_wait_stat.reset();
+    pfs->m_rwlock_stat.reset();
   DBUG_VOID_RETURN;
 }
 
@@ -1620,7 +1615,7 @@ static void reset_cond_waits_by_instance(void)
   DBUG_ENTER("reset_cond_waits_by_instance");
 
   for ( ; pfs < pfs_last; pfs++)
-    pfs->m_wait_stat.reset();
+    pfs->m_cond_stat.reset();
   DBUG_VOID_RETURN;
 }
 
@@ -1678,15 +1673,6 @@ void reset_socket_instance_io(void)
   DBUG_VOID_RETURN;
 }
 
-void reset_global_wait_stat()
-{
-  PFS_single_stat *stat= global_instr_class_waits_array;
-  PFS_single_stat *stat_last= global_instr_class_waits_array + wait_class_max;
-
-  for ( ; stat < stat_last; stat++)
-    stat->reset();
-}
-
 void aggregate_all_event_names(PFS_single_stat *from_array,
                                PFS_single_stat *to_array)
 {
diff --git a/storage/perfschema/pfs_instr.h b/storage/perfschema/pfs_instr.h
index b579c1d7902..2ea44830d2b 100644
--- a/storage/perfschema/pfs_instr.h
+++ b/storage/perfschema/pfs_instr.h
@@ -34,6 +34,8 @@ struct PFS_socket_class;
 #else
 #include <arpa/inet.h>
 #endif
+#include "my_global.h"
+#include "my_compiler.h"
 #include "pfs_lock.h"
 #include "pfs_stat.h"
 #include "pfs_instr_class.h"
@@ -63,24 +65,17 @@ struct PFS_instr
   bool m_enabled;
   /** Timed flag. */
   bool m_timed;
-  /** Instrument wait statistics. */
-  PFS_single_stat m_wait_stat;
 };
 
 /** Instrumented mutex implementation. @see PSI_mutex. */
-struct PFS_mutex : public PFS_instr
+struct PFS_ALIGNED PFS_mutex : public PFS_instr
 {
   /** Mutex identity, typically a pthread_mutex_t. */
   const void *m_identity;
   /** Mutex class. */
   PFS_mutex_class *m_class;
-  /** Instrument wait statistics. */
-  PFS_single_stat m_wait_stat;
-  /**
-    Mutex lock usage statistics.
-    This statistic is not exposed in user visible tables yet.
-  */
-  PFS_single_stat m_lock_stat;
+  /** Instrument statistics. */
+  PFS_mutex_stat m_mutex_stat;
   /** Current owner. */
   PFS_thread *m_owner;
   /**
@@ -91,24 +86,14 @@ struct PFS_mutex : public PFS_instr
 };
 
 /** Instrumented rwlock implementation. @see PSI_rwlock. */
-struct PFS_rwlock : public PFS_instr
+struct PFS_ALIGNED PFS_rwlock : public PFS_instr
 {
   /** RWLock identity, typically a pthread_rwlock_t. */
   const void *m_identity;
   /** RWLock class. */
   PFS_rwlock_class *m_class;
-  /** Instrument wait statistics. */
-  PFS_single_stat m_wait_stat;
-  /**
-    RWLock read lock usage statistics.
-    This statistic is not exposed in user visible tables yet.
-  */
-  PFS_single_stat m_read_lock_stat;
-  /**
-    RWLock write lock usage statistics.
-    This statistic is not exposed in user visible tables yet.
-  */
-  PFS_single_stat m_write_lock_stat;
+  /** Instrument statistics. */
+  PFS_rwlock_stat m_rwlock_stat;
   /** Current writer thread. */
   PFS_thread *m_writer;
   /** Current count of readers. */
@@ -126,7 +111,7 @@ struct PFS_rwlock : public PFS_instr
 };
 
 /** Instrumented cond implementation. @see PSI_cond. */
-struct PFS_cond : public PFS_instr
+struct PFS_ALIGNED PFS_cond : public PFS_instr
 {
   /** Condition identity, typically a pthread_cond_t. */
   const void *m_identity;
@@ -139,7 +124,7 @@ struct PFS_cond : public PFS_instr
 };
 
 /** Instrumented File and FILE implementation. @see PSI_file. */
-struct PFS_file : public PFS_instr
+struct PFS_ALIGNED PFS_file : public PFS_instr
 {
   uint32 get_version()
   { return m_lock.get_version(); }
@@ -152,14 +137,12 @@ struct PFS_file : public PFS_instr
   uint m_filename_length;
   /** File class. */
   PFS_file_class *m_class;
-  /** Instrument wait statistics. */
-  PFS_single_stat m_wait_stat;
   /** File usage statistics. */
   PFS_file_stat m_file_stat;
 };
 
 /** Instrumented table implementation. @see PSI_table. */
-struct PFS_table
+struct PFS_ALIGNED PFS_table
 {
   /**
     True if table io instrumentation is enabled.
@@ -196,12 +179,22 @@ public:
   */
   void aggregate(void)
   {
-    if (likely((m_thread_owner != NULL) && (m_has_io_stats || m_has_lock_stats)))
+    if (m_has_io_stats && m_has_lock_stats)
     {
-      safe_aggregate(& m_table_stat, m_share, m_thread_owner);
+      safe_aggregate(& m_table_stat, m_share);
       m_has_io_stats= false;
       m_has_lock_stats= false;
     }
+    else if (m_has_io_stats)
+    {
+      safe_aggregate_io(& m_table_stat, m_share);
+      m_has_io_stats= false;
+    }
+    else if (m_has_lock_stats)
+    {
+      safe_aggregate_lock(& m_table_stat, m_share);
+      m_has_lock_stats= false;
+    }
   }
 
   /**
@@ -238,18 +231,15 @@ public:
 
 private:
   static void safe_aggregate(PFS_table_stat *stat,
-                             PFS_table_share *safe_share,
-                             PFS_thread *safe_thread);
+                             PFS_table_share *safe_share);
   static void safe_aggregate_io(PFS_table_stat *stat,
-                                PFS_table_share *safe_share,
-                                PFS_thread *safe_thread);
+                                PFS_table_share *safe_share);
   static void safe_aggregate_lock(PFS_table_stat *stat,
-                                  PFS_table_share *safe_share,
-                                  PFS_thread *safe_thread);
+                                  PFS_table_share *safe_share);
 };
 
 /** Instrumented socket implementation. @see PSI_socket. */
-struct PFS_socket : public PFS_instr
+struct PFS_ALIGNED PFS_socket : public PFS_instr
 {
   uint32 get_version()
   { return m_lock.get_version(); }
@@ -371,7 +361,7 @@ private:
 
 
 /** Instrumented thread implementation. @see PSI_thread. */
-struct PFS_thread : PFS_connection_slice
+struct PFS_ALIGNED PFS_thread : PFS_connection_slice
 {
   static PFS_thread* get_current_thread(void);
 
@@ -400,11 +390,11 @@ struct PFS_thread : PFS_connection_slice
   /** Pins for digest_hash. */
   LF_PINS *m_digest_hash_pins;
   /** Internal thread identifier, unique. */
-  ulong m_thread_internal_id;
+  ulonglong m_thread_internal_id;
   /** Parent internal thread identifier. */
-  ulong m_parent_thread_internal_id;
+  ulonglong m_parent_thread_internal_id;
   /** External (SHOW PROCESSLIST) thread identifier, not unique. */
-  ulong m_thread_id;
+  ulong m_processlist_id;
   /** Thread class. */
   PFS_thread_class *m_class;
   /**
@@ -486,6 +476,8 @@ struct PFS_thread : PFS_connection_slice
   int m_command;
   /** Start time. */
   time_t m_start_time;
+  /** Lock for Processlist state, Processlist info. */
+  pfs_lock m_processlist_lock;
   /** Processlist state. */
   const char *m_processlist_state_ptr;
   /** Length of @c m_processlist_state_ptr. */
@@ -504,9 +496,18 @@ struct PFS_thread : PFS_connection_slice
   PFS_host *m_host;
   PFS_user *m_user;
   PFS_account *m_account;
+
+  /** Reset session connect attributes */
+  void reset_session_connect_attrs();
+
+  /** a buffer for the connection attributes */
+  char *m_session_connect_attrs;
+  /** length used by @c m_connect_attrs */
+  uint m_session_connect_attrs_length;
+  /** character set in which @c m_connect_attrs are encoded */
+  const CHARSET_INFO *m_session_connect_attrs_cs;
 };
 
-extern PFS_single_stat *global_instr_class_waits_array;
 extern PFS_stage_stat *global_instr_class_stages_array;
 extern PFS_statement_stat *global_instr_class_statements_array;
 
@@ -529,12 +530,12 @@ PFS_cond* create_cond(PFS_cond_class *klass, const void *identity);
 void destroy_cond(PFS_cond *pfs);
 
 PFS_thread* create_thread(PFS_thread_class *klass, const void *identity,
-                          ulong thread_id);
+                          ulonglong processlist_id);
 
 void destroy_thread(PFS_thread *pfs);
 
 PFS_file* find_or_create_file(PFS_thread *thread, PFS_file_class *klass,
-                              const char *filename, uint len);
+                              const char *filename, uint len, bool create);
 
 void release_file(PFS_file *pfs);
 void destroy_file(PFS_thread *thread, PFS_file *pfs);
@@ -542,7 +543,10 @@ PFS_table* create_table(PFS_table_share *share, PFS_thread *opening_thread,
                         const void *identity);
 void destroy_table(PFS_table *pfs);
 
-PFS_socket* create_socket(PFS_socket_class *socket_class, const void *identity);
+PFS_socket* create_socket(PFS_socket_class *socket_class,
+                          const my_socket *fd,
+                          const struct sockaddr *addr,
+                          socklen_t addr_len);
 void destroy_socket(PFS_socket *pfs);
 
 /* For iterators and show status. */
@@ -568,6 +572,8 @@ extern ulong events_stages_history_per_thread;
 extern ulong events_statements_history_per_thread;
 extern ulong locker_lost;
 extern ulong statement_lost;
+extern ulong session_connect_attrs_lost;
+extern ulong session_connect_attrs_size_per_thread;
 
 /* Exposing the data directly, for iterators. */
 
@@ -624,6 +630,8 @@ void update_socket_derived_flags();
 /** Update derived flags for all instruments. */
 void update_instruments_derived_flags();
 
+extern LF_HASH filename_hash;
+
 /** @} */
 #endif
 
diff --git a/storage/perfschema/pfs_instr_class.cc b/storage/perfschema/pfs_instr_class.cc
index 0a4b47404a4..05c85104a94 100644
--- a/storage/perfschema/pfs_instr_class.cc
+++ b/storage/perfschema/pfs_instr_class.cc
@@ -135,9 +135,12 @@ static PFS_thread_class *thread_class_array= NULL;
 */
 PFS_table_share *table_share_array= NULL;
 
-PFS_instr_class global_table_io_class;
-PFS_instr_class global_table_lock_class;
-PFS_instr_class global_idle_class;
+PFS_ALIGNED PFS_single_stat global_idle_stat;
+PFS_ALIGNED PFS_table_io_stat global_table_io_stat;
+PFS_ALIGNED PFS_table_lock_stat global_table_lock_stat;
+PFS_ALIGNED PFS_instr_class global_table_io_class;
+PFS_ALIGNED PFS_instr_class global_table_lock_class;
+PFS_ALIGNED PFS_instr_class global_idle_class;
 
 /** Class-timer map */
 enum_timer_name *class_timers[] =
@@ -165,7 +168,7 @@ enum_timer_name *class_timers[] =
   @sa table_share_hash_get_key
   @sa get_table_share_hash_pins
 */
-static LF_HASH table_share_hash;
+LF_HASH table_share_hash;
 /** True if table_share_hash is initialized. */
 static bool table_share_hash_inited= false;
 
@@ -193,19 +196,17 @@ uint mutex_class_start= 0;
 uint rwlock_class_start= 0;
 uint cond_class_start= 0;
 uint file_class_start= 0;
-uint table_class_start= 0;
 uint wait_class_max= 0;
 uint socket_class_start= 0;
 
 void init_event_name_sizing(const PFS_global_param *param)
 {
-  mutex_class_start= 0;
+  mutex_class_start= 3; /* global table io, table lock, idle */
   rwlock_class_start= mutex_class_start + param->m_mutex_class_sizing;
   cond_class_start= rwlock_class_start + param->m_rwlock_class_sizing;
   file_class_start= cond_class_start + param->m_cond_class_sizing;
   socket_class_start= file_class_start + param->m_file_class_sizing;
-  table_class_start= socket_class_start + param->m_socket_class_sizing;
-  wait_class_max= table_class_start + 3; /* global table io, lock, idle */
+  wait_class_max= socket_class_start + param->m_socket_class_sizing;
 }
 
 void register_global_classes()
@@ -213,19 +214,19 @@ void register_global_classes()
   /* Table IO class */
   init_instr_class(&global_table_io_class, "wait/io/table/sql/handler", 25,
                    0, PFS_CLASS_TABLE_IO);
-  global_table_io_class.m_event_name_index= table_class_start;
+  global_table_io_class.m_event_name_index= GLOBAL_TABLE_IO_EVENT_INDEX;
   configure_instr_class(&global_table_io_class);
 
   /* Table lock class */
   init_instr_class(&global_table_lock_class, "wait/lock/table/sql/handler", 27,
                    0, PFS_CLASS_TABLE_LOCK);
-  global_table_lock_class.m_event_name_index= table_class_start + 1;
+  global_table_lock_class.m_event_name_index= GLOBAL_TABLE_LOCK_EVENT_INDEX;
   configure_instr_class(&global_table_lock_class);
   
   /* Idle class */
   init_instr_class(&global_idle_class, "idle", 4,
                    0, PFS_CLASS_IDLE);
-  global_idle_class.m_event_name_index= table_class_start + 2;
+  global_idle_class.m_event_name_index= GLOBAL_IDLE_EVENT_INDEX;
   configure_instr_class(&global_idle_class);
 }
 
@@ -384,6 +385,7 @@ int init_table_share_hash(void)
   {
     lf_hash_init(&table_share_hash, sizeof(PFS_table_share*), LF_HASH_UNIQUE,
                  0, 0, table_share_hash_get_key, &my_charset_bin);
+    table_share_hash.size= table_share_max;
     table_share_hash_inited= true;
   }
   return 0;
@@ -715,7 +717,7 @@ PFS_sync_key register_mutex_class(const char *name, uint name_length,
     */
     entry= &mutex_class_array[index];
     init_instr_class(entry, name, name_length, flags, PFS_CLASS_MUTEX);
-    entry->m_lock_stat.reset();
+    entry->m_mutex_stat.reset();
     entry->m_event_name_index= mutex_class_start + index;
     entry->m_singleton= NULL;
     entry->m_enabled= false; /* disabled by default */
@@ -781,8 +783,7 @@ PFS_sync_key register_rwlock_class(const char *name, uint name_length,
   {
     entry= &rwlock_class_array[index];
     init_instr_class(entry, name, name_length, flags, PFS_CLASS_RWLOCK);
-    entry->m_read_lock_stat.reset();
-    entry->m_write_lock_stat.reset();
+    entry->m_rwlock_stat.reset();
     entry->m_event_name_index= rwlock_class_start + index;
     entry->m_singleton= NULL;
     entry->m_enabled= false; /* disabled by default */
@@ -1193,7 +1194,7 @@ static void set_keys(PFS_table_share *pfs, const TABLE_SHARE *share)
     pfs_key->m_name_length= len;
   }
 
-  pfs_key_last= pfs->m_keys + MAX_KEY;
+  pfs_key_last= pfs->m_keys + MAX_INDEXES;
   for ( ; pfs_key < pfs_key_last; pfs_key++)
     pfs_key->m_name_length= 0;
 }
@@ -1256,7 +1257,7 @@ PFS_table_share* find_or_create_table_share(PFS_thread *thread,
   const uint retry_max= 3;
   bool enabled= true;
   bool timed= true;
-  static uint table_share_monotonic_index= 0;
+  static uint PFS_ALIGNED table_share_monotonic_index= 0;
   uint index;
   uint attempts= 0;
   PFS_table_share *pfs;
@@ -1299,8 +1300,7 @@ search:
   while (++attempts <= table_share_max)
   {
     /* See create_mutex() */
-    PFS_atomic::add_u32(& table_share_monotonic_index, 1);
-    index= table_share_monotonic_index % table_share_max;
+    index= PFS_atomic::add_u32(& table_share_monotonic_index, 1) % table_share_max;
     pfs= table_share_array + index;
 
     if (pfs->m_lock.is_free())
@@ -1353,17 +1353,28 @@ search:
 
 void PFS_table_share::aggregate_io(void)
 {
-  uint index= global_table_io_class.m_event_name_index;
-  PFS_single_stat *table_io_total= & global_instr_class_waits_array[index];
-  m_table_stat.sum_io(table_io_total);
+  uint safe_key_count= sanitize_index_count(m_key_count);
+  PFS_table_io_stat *from_stat;
+  PFS_table_io_stat *from_stat_last;
+  PFS_table_io_stat sum_io;
+
+  /* Aggregate stats for each index, if any */
+  from_stat= & m_table_stat.m_index_stat[0];
+  from_stat_last= from_stat + safe_key_count;
+  for ( ; from_stat < from_stat_last ; from_stat++)
+    sum_io.aggregate(from_stat);
+
+  /* Aggregate stats for the table */
+  sum_io.aggregate(& m_table_stat.m_index_stat[MAX_INDEXES]);
+
+  /* Add this table stats to the global sink. */
+  global_table_io_stat.aggregate(& sum_io);
   m_table_stat.fast_reset_io();
 }
 
 void PFS_table_share::aggregate_lock(void)
 {
-  uint index= global_table_lock_class.m_event_name_index;
-  PFS_single_stat *table_lock_total= & global_instr_class_waits_array[index];
-  m_table_stat.sum_lock(table_lock_total);
+  global_table_lock_stat.aggregate(& m_table_stat.m_lock_stat);
   m_table_stat.fast_reset_lock();
 }
 
@@ -1418,6 +1429,16 @@ PFS_table_share *sanitize_table_share(PFS_table_share *unsafe)
   SANITIZE_ARRAY_BODY(PFS_table_share, table_share_array, table_share_max, unsafe);
 }
 
+/** Reset the wait statistics per instrument class. */
+void reset_events_waits_by_class()
+{
+  reset_file_class_io();
+  reset_socket_class_io();
+  global_idle_stat.reset();
+  global_table_io_stat.reset();
+  global_table_lock_stat.reset();
+}
+
 /** Reset the io statistics per file class. */
 void reset_file_class_io(void)
 {
diff --git a/storage/perfschema/pfs_instr_class.h b/storage/perfschema/pfs_instr_class.h
index bef25e76467..d0b90734b66 100644
--- a/storage/perfschema/pfs_instr_class.h
+++ b/storage/perfschema/pfs_instr_class.h
@@ -16,7 +16,10 @@
 #ifndef PFS_INSTR_CLASS_H
 #define PFS_INSTR_CLASS_H
 
+#include "my_global.h"
 #include "mysql_com.h"                          /* NAME_LEN */
+#include "lf.h"
+#include "pfs_global.h"
 
 /**
   @file storage/perfschema/pfs_instr_class.h
@@ -112,7 +115,6 @@ extern uint mutex_class_start;
 extern uint rwlock_class_start;
 extern uint cond_class_start;
 extern uint file_class_start;
-extern uint table_class_start;
 extern uint socket_class_start;
 extern uint wait_class_max;
 
@@ -166,13 +168,10 @@ struct PFS_instr_class
 struct PFS_mutex;
 
 /** Instrumentation metadata for a MUTEX. */
-struct PFS_mutex_class : public PFS_instr_class
+struct PFS_ALIGNED PFS_mutex_class : public PFS_instr_class
 {
-  /**
-    Lock statistics.
-    This statistic is not exposed in user visible tables yet.
-  */
-  PFS_single_stat m_lock_stat;
+  /** Mutex usage statistics. */
+  PFS_mutex_stat m_mutex_stat;
   /** Singleton instance. */
   PFS_mutex *m_singleton;
 };
@@ -180,18 +179,10 @@ struct PFS_mutex_class : public PFS_instr_class
 struct PFS_rwlock;
 
 /** Instrumentation metadata for a RWLOCK. */
-struct PFS_rwlock_class : public PFS_instr_class
+struct PFS_ALIGNED PFS_rwlock_class : public PFS_instr_class
 {
-  /**
-    Read lock statistics.
-    This statistic is not exposed in user visible tables yet.
-  */
-  PFS_single_stat m_read_lock_stat;
-  /**
-    Write lock statistics.
-    This statistic is not exposed in user visible tables yet.
-  */
-  PFS_single_stat m_write_lock_stat;
+  /** Rwlock usage statistics. */
+  PFS_rwlock_stat m_rwlock_stat;
   /** Singleton instance. */
   PFS_rwlock *m_singleton;
 };
@@ -199,7 +190,7 @@ struct PFS_rwlock_class : public PFS_instr_class
 struct PFS_cond;
 
 /** Instrumentation metadata for a COND. */
-struct PFS_cond_class : public PFS_instr_class
+struct PFS_ALIGNED PFS_cond_class : public PFS_instr_class
 {
   /**
     Condition usage statistics.
@@ -211,7 +202,7 @@ struct PFS_cond_class : public PFS_instr_class
 };
 
 /** Instrumentation metadata of a thread. */
-struct PFS_thread_class
+struct PFS_ALIGNED PFS_thread_class
 {
   /** True if this thread instrument is enabled. */
   bool m_enabled;
@@ -249,7 +240,7 @@ struct PFS_table_key
 };
 
 /** Instrumentation metadata for a table share. */
-struct PFS_table_share
+struct PFS_ALIGNED PFS_table_share
 {
 public:
   uint32 get_version()
@@ -318,13 +309,31 @@ public:
   /** Table statistics. */
   PFS_table_stat m_table_stat;
   /** Index names. */
-  PFS_table_key m_keys[MAX_KEY];
+  PFS_table_key m_keys[MAX_INDEXES];
 
 private:
   /** Number of opened table handles. */
   int m_refcount;
 };
 
+/** Statistics for the IDLE instrument. */
+extern PFS_single_stat global_idle_stat;
+/** Statistics for dropped table io. */
+extern PFS_table_io_stat global_table_io_stat;
+/** Statistics for dropped table lock. */
+extern PFS_table_lock_stat global_table_lock_stat;
+
+inline uint sanitize_index_count(uint count)
+{
+  if (likely(count <= MAX_INDEXES))
+    return count;
+  return 0;
+}
+
+#define GLOBAL_TABLE_IO_EVENT_INDEX 0
+#define GLOBAL_TABLE_LOCK_EVENT_INDEX 1
+#define GLOBAL_IDLE_EVENT_INDEX 2
+
 /**
   Instrument controlling all table io.
   This instrument is used with table SETUP_OBJECTS.
@@ -345,7 +354,7 @@ extern PFS_instr_class global_idle_class;
 struct PFS_file;
 
 /** Instrumentation metadata for a file. */
-struct PFS_file_class : public PFS_instr_class
+struct PFS_ALIGNED PFS_file_class : public PFS_instr_class
 {
   /** File usage statistics. */
   PFS_file_stat m_file_stat;
@@ -354,21 +363,21 @@ struct PFS_file_class : public PFS_instr_class
 };
 
 /** Instrumentation metadata for a stage. */
-struct PFS_stage_class : public PFS_instr_class
+struct PFS_ALIGNED PFS_stage_class : public PFS_instr_class
 {
   /** Stage usage statistics. */
   PFS_stage_stat m_stage_stat;
 };
 
 /** Instrumentation metadata for a statement. */
-struct PFS_statement_class : public PFS_instr_class
+struct PFS_ALIGNED PFS_statement_class : public PFS_instr_class
 {
 };
 
 struct  PFS_socket;
 
 /** Instrumentation metadata for a socket. */
-struct PFS_socket_class : public PFS_instr_class
+struct PFS_ALIGNED PFS_socket_class : public PFS_instr_class
 {
   /** Socket usage statistics. */
   PFS_socket_stat m_socket_stat;
@@ -483,12 +492,15 @@ extern PFS_cond_class *cond_class_array;
 extern PFS_file_class *file_class_array;
 extern PFS_table_share *table_share_array;
 
+void reset_events_waits_by_class();
 void reset_file_class_io();
 void reset_socket_class_io();
 
 /** Update derived flags for all table shares. */
 void update_table_share_derived_flags(PFS_thread *thread);
 
+extern LF_HASH table_share_hash;
+
 /** @} */
 #endif
 
diff --git a/storage/perfschema/pfs_lock.h b/storage/perfschema/pfs_lock.h
index 65937e94ece..09efecd1c5f 100644
--- a/storage/perfschema/pfs_lock.h
+++ b/storage/perfschema/pfs_lock.h
@@ -33,7 +33,7 @@
   Values of a free record should not be read by a reader.
   Writers can concurrently attempt to allocate a free record.
 */
-#define PFS_LOCK_FREE 0
+#define PFS_LOCK_FREE 0x00
 /**
   State of a dirty record.
   Values of a dirty record should not be read by a reader,
@@ -41,14 +41,18 @@
   Only one writer, the writer which owns the record, should
   modify the record content.
 */
-#define PFS_LOCK_DIRTY 1
+#define PFS_LOCK_DIRTY 0x01
 /**
   State of an allocated record.
   Values of an allocated record are safe to read by a reader.
   A writer may modify some but not all properties of the record:
   only modifying values that can never cause the reader to crash is allowed.
 */
-#define PFS_LOCK_ALLOCATED 2
+#define PFS_LOCK_ALLOCATED 0x02
+
+#define VERSION_MASK 0xFFFFFFFC
+#define STATE_MASK   0x00000003
+#define VERSION_INC  4
 
 /**
   A 'lock' protecting performance schema internal buffers.
@@ -60,15 +64,11 @@
 struct pfs_lock
 {
   /**
-    The record internal state.
+    The record internal version and state
     @sa PFS_LOCK_FREE
     @sa PFS_LOCK_DIRTY
     @sa PFS_LOCK_ALLOCATED
-  */
-  volatile int32 m_state;
-  /**
-    The record internal version number.
-    This version number is to transform the 'ABA' problem
+    The version number is to transform the 'ABA' problem
     (see http://en.wikipedia.org/wiki/ABA_problem)
     into an 'A(n)BA(n + 1)' problem, where 'n' is the m_version number.
     When the performance schema instrumentation deletes a record,
@@ -76,21 +76,23 @@ struct pfs_lock
     the version number is incremented, so that a reader can detect that
     the record was changed. Note that the version number is never
     reset to zero when a new record is created.
+    The version number is stored in the high 30 bits.
+    The state is stored in the low 2 bits.
   */
-  volatile uint32 m_version;
+  volatile uint32 m_version_state;
 
   /** Returns true if the record is free. */
   bool is_free(void)
   {
-    /* This is a dirty read */
-    return (m_state == PFS_LOCK_FREE);
+    uint32 copy= m_version_state; /* non volatile copy, and dirty read */
+    return ((copy & STATE_MASK) == PFS_LOCK_FREE);
   }
 
   /** Returns true if the record contains values that can be read. */
   bool is_populated(void)
   {
-    int32 copy= m_state; /* non volatile copy, and dirty read */
-    return (copy == PFS_LOCK_ALLOCATED);
+    uint32 copy= m_version_state; /* non volatile copy, and dirty read */
+    return ((copy & STATE_MASK) == PFS_LOCK_ALLOCATED);
   }
 
   /**
@@ -101,10 +103,11 @@ struct pfs_lock
   */
   bool free_to_dirty(void)
   {
-    int32 old_state= PFS_LOCK_FREE;
-    int32 new_state= PFS_LOCK_DIRTY;
+    uint32 copy= m_version_state; /* non volatile copy, and dirty read */
+    uint32 old_val= (copy & VERSION_MASK) + PFS_LOCK_FREE;
+    uint32 new_val= (copy & VERSION_MASK) + PFS_LOCK_DIRTY;
 
-    return (PFS_atomic::cas_32(&m_state, &old_state, new_state));
+    return (PFS_atomic::cas_u32(&m_version_state, &old_val, new_val));
   }
 
   /**
@@ -114,8 +117,13 @@ struct pfs_lock
   */
   void allocated_to_dirty(void)
   {
-    DBUG_ASSERT(m_state == PFS_LOCK_ALLOCATED);
-    PFS_atomic::store_32(&m_state, PFS_LOCK_DIRTY);
+    uint32 copy= PFS_atomic::load_u32(&m_version_state);
+    /* Make sure the record was ALLOCATED. */
+    DBUG_ASSERT((copy & STATE_MASK) == PFS_LOCK_ALLOCATED);
+    /* Keep the same version, set the DIRTY state */
+    uint32 new_val= (copy & VERSION_MASK) + PFS_LOCK_DIRTY;
+    /* We own the record, no need to use compare and swap. */
+    PFS_atomic::store_u32(&m_version_state, new_val);
   }
 
   /**
@@ -125,9 +133,26 @@ struct pfs_lock
   */
   void dirty_to_allocated(void)
   {
-    DBUG_ASSERT(m_state == PFS_LOCK_DIRTY);
-    PFS_atomic::add_u32(&m_version, 1);
-    PFS_atomic::store_32(&m_state, PFS_LOCK_ALLOCATED);
+    uint32 copy= PFS_atomic::load_u32(&m_version_state);
+    /* Make sure the record was DIRTY. */
+    DBUG_ASSERT((copy & STATE_MASK) == PFS_LOCK_DIRTY);
+    /* Increment the version, set the ALLOCATED state */
+    uint32 new_val= (copy & VERSION_MASK) + VERSION_INC + PFS_LOCK_ALLOCATED;
+    PFS_atomic::store_u32(&m_version_state, new_val);
+  }
+
+  /**
+    Initialize a lock to allocated.
+    This transition should be executed by the writer that owns the record and the lock,
+    after the record is in a state ready to be read.
+  */
+  void set_allocated(void)
+  {
+    /* Do not set the version to 0, read the previous value. */
+    uint32 copy= PFS_atomic::load_u32(&m_version_state);
+    /* Increment the version, set the ALLOCATED state */
+    uint32 new_val= (copy & VERSION_MASK) + VERSION_INC + PFS_LOCK_ALLOCATED;
+    PFS_atomic::store_u32(&m_version_state, new_val);
   }
 
   /**
@@ -136,8 +161,12 @@ struct pfs_lock
   */
   void dirty_to_free(void)
   {
-    DBUG_ASSERT(m_state == PFS_LOCK_DIRTY);
-    PFS_atomic::store_32(&m_state, PFS_LOCK_FREE);
+    uint32 copy= PFS_atomic::load_u32(&m_version_state);
+    /* Make sure the record was DIRTY. */
+    DBUG_ASSERT((copy & STATE_MASK) == PFS_LOCK_DIRTY);
+    /* Keep the same version, set the FREE state */
+    uint32 new_val= (copy & VERSION_MASK) + PFS_LOCK_FREE;
+    PFS_atomic::store_u32(&m_version_state, new_val);
   }
 
   /**
@@ -153,8 +182,12 @@ struct pfs_lock
       The correct assert to use here to guarantee data integrity is simply:
         DBUG_ASSERT(m_state == PFS_LOCK_ALLOCATED);
     */
-    DBUG_ASSERT(m_state == PFS_LOCK_ALLOCATED);
-    PFS_atomic::store_32(&m_state, PFS_LOCK_FREE);
+    uint32 copy= PFS_atomic::load_u32(&m_version_state);
+    /* Make sure the record was ALLOCATED. */
+    DBUG_ASSERT(((copy & STATE_MASK) == PFS_LOCK_ALLOCATED));
+    /* Keep the same version, set the FREE state */
+    uint32 new_val= (copy & VERSION_MASK) + PFS_LOCK_FREE;
+    PFS_atomic::store_u32(&m_version_state, new_val);
   }
 
   /**
@@ -163,8 +196,7 @@ struct pfs_lock
   */
   void begin_optimistic_lock(struct pfs_lock *copy)
   {
-    copy->m_version= PFS_atomic::load_u32(&m_version);
-    copy->m_state= PFS_atomic::load_32(&m_state);
+    copy->m_version_state= PFS_atomic::load_u32(&m_version_state);
   }
 
   /**
@@ -174,19 +206,20 @@ struct pfs_lock
   */
   bool end_optimistic_lock(struct pfs_lock *copy)
   {
-    /*
-      return true if:
-      - the version + state has not changed
-      - and there was valid data to look at
-    */
-    return ((copy->m_version == PFS_atomic::load_u32(&m_version)) &&
-            (copy->m_state == PFS_atomic::load_32(&m_state)) &&
-            (copy->m_state == PFS_LOCK_ALLOCATED));
+    /* Check there was valid data to look at. */
+    if ((copy->m_version_state & STATE_MASK) != PFS_LOCK_ALLOCATED)
+      return false;
+
+    /* Check the version + state has not changed. */
+    if (copy->m_version_state != PFS_atomic::load_u32(&m_version_state))
+      return false;
+
+    return true;
   }
 
   uint32 get_version()
   {
-    return PFS_atomic::load_u32(&m_version);
+    return (PFS_atomic::load_u32(&m_version_state) & VERSION_MASK);
   }
 };
 
diff --git a/storage/perfschema/pfs_server.cc b/storage/perfschema/pfs_server.cc
index 3df0f27f652..383a46785fb 100644
--- a/storage/perfschema/pfs_server.cc
+++ b/storage/perfschema/pfs_server.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -50,11 +50,16 @@ static void cleanup_performance_schema(void);
 void cleanup_instrument_config(void);
 
 struct PSI_bootstrap*
-initialize_performance_schema(const PFS_global_param *param)
+initialize_performance_schema(PFS_global_param *param)
 {
   pfs_initialized= false;
 
   PFS_table_stat::g_reset_template.reset();
+  global_idle_stat.reset();
+  global_table_io_stat.reset();
+  global_table_lock_stat.reset();
+
+  pfs_automated_sizing(param);
 
   if (! param->m_enabled)
   {
diff --git a/storage/perfschema/pfs_server.h b/storage/perfschema/pfs_server.h
index f65febdeb6d..e0c782fde58 100644
--- a/storage/perfschema/pfs_server.h
+++ b/storage/perfschema/pfs_server.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -24,96 +24,50 @@
 #ifndef PFS_MAX_MUTEX_CLASS
   #define PFS_MAX_MUTEX_CLASS 200
 #endif
-#ifndef PFS_MAX_MUTEX
-  #define PFS_MAX_MUTEX 1000000
-#endif
 #ifndef PFS_MAX_RWLOCK_CLASS
   #define PFS_MAX_RWLOCK_CLASS 30
 #endif
-#ifndef PFS_MAX_RWLOCK
-  #define PFS_MAX_RWLOCK 1000000
-#endif
 #ifndef PFS_MAX_COND_CLASS
   #define PFS_MAX_COND_CLASS 80
 #endif
-#ifndef PFS_MAX_COND
-  #define PFS_MAX_COND 1000
-#endif
 #ifndef PFS_MAX_THREAD_CLASS
   #define PFS_MAX_THREAD_CLASS 50
 #endif
-#ifndef PFS_MAX_THREAD
-  #define PFS_MAX_THREAD 1000
-#endif
 #ifndef PFS_MAX_FILE_CLASS
   #define PFS_MAX_FILE_CLASS 50
 #endif
-#ifndef PFS_MAX_FILE
-  #define PFS_MAX_FILE 10000
-#endif
 #ifndef PFS_MAX_FILE_HANDLE
   #define PFS_MAX_FILE_HANDLE 32768
 #endif
-#ifndef PFS_MAX_SOCKETS
-  #define PFS_MAX_SOCKETS 1000
-#endif
 #ifndef PFS_MAX_SOCKET_CLASS
   #define PFS_MAX_SOCKET_CLASS 10
 #endif
-#ifndef PFS_MAX_TABLE_SHARE
-  #define PFS_MAX_TABLE_SHARE 1000
-#endif
-#ifndef PFS_MAX_TABLE
-  #define PFS_MAX_TABLE 10000
-#endif
-#ifndef PFS_WAITS_HISTORY_SIZE
-  #define PFS_WAITS_HISTORY_SIZE 10
-#endif
-#ifndef PFS_WAITS_HISTORY_LONG_SIZE
-  #define PFS_WAITS_HISTORY_LONG_SIZE 10000
-#endif
 #ifndef PFS_MAX_SETUP_ACTOR
   #define PFS_MAX_SETUP_ACTOR 100
 #endif
 #ifndef PFS_MAX_SETUP_OBJECT
   #define PFS_MAX_SETUP_OBJECT 100
 #endif
-#ifndef PFS_MAX_HOST
-  #define PFS_MAX_HOST 100
-#endif
-#ifndef PFS_MAX_USER
-  #define PFS_MAX_USER 100
-#endif
-#ifndef PFS_MAX_ACCOUNT
-  #define PFS_MAX_ACCOUNT 100
-#endif
 #ifndef PFS_MAX_STAGE_CLASS
   #define PFS_MAX_STAGE_CLASS 150
 #endif
-#ifndef PFS_STAGES_HISTORY_SIZE
-  #define PFS_STAGES_HISTORY_SIZE 10
-#endif
-#ifndef PFS_STAGES_HISTORY_LONG_SIZE
-  #define PFS_STAGES_HISTORY_LONG_SIZE 10000
-#endif
-#ifndef PFS_STATEMENTS_HISTORY_SIZE
-  #define PFS_STATEMENTS_HISTORY_SIZE 10
-#endif
-#ifndef PFS_STATEMENTS_HISTORY_LONG_SIZE
-  #define PFS_STATEMENTS_HISTORY_LONG_SIZE 10000
-#endif
 #ifndef PFS_STATEMENTS_STACK_SIZE
   #define PFS_STATEMENTS_STACK_SIZE 10
 #endif
-#ifndef PFS_DIGEST_SIZE
-  #define PFS_DIGEST_SIZE 200
-#endif
+
+struct PFS_sizing_hints
+{
+  long m_table_definition_cache;
+  long m_table_open_cache;
+  long m_max_connections;
+  long m_open_files_limit;
+};
 
 /** Performance schema global sizing parameters. */
 struct PFS_global_param
 {
   /** True if the performance schema is enabled. */
-  bool m_enabled; 
+  bool m_enabled;
   /** Default values for SETUP_CONSUMERS. */
   bool m_consumer_events_stages_current_enabled;
   bool m_consumer_events_stages_history_enabled;
@@ -155,7 +109,7 @@ struct PFS_global_param
     Maximum number of instrumented table share.
     @sa table_share_lost.
   */
-  ulong m_table_share_sizing;
+  long m_table_share_sizing;
   /**
     Maximum number of instrumented file classes.
     @sa file_class_lost.
@@ -165,81 +119,86 @@ struct PFS_global_param
     Maximum number of instrumented mutex instances.
     @sa mutex_lost.
   */
-  ulong m_mutex_sizing;
+  long m_mutex_sizing;
   /**
     Maximum number of instrumented rwlock instances.
     @sa rwlock_lost.
   */
-  ulong m_rwlock_sizing;
+  long m_rwlock_sizing;
   /**
     Maximum number of instrumented cond instances.
     @sa cond_lost.
   */
-  ulong m_cond_sizing;
+  long m_cond_sizing;
   /**
     Maximum number of instrumented thread instances.
     @sa thread_lost.
   */
-  ulong m_thread_sizing;
+  long m_thread_sizing;
   /**
     Maximum number of instrumented table handles.
     @sa table_lost.
   */
-  ulong m_table_sizing;
+  long m_table_sizing;
   /**
     Maximum number of instrumented file instances.
     @sa file_lost.
   */
-  ulong m_file_sizing;
+  long m_file_sizing;
   /**
     Maximum number of instrumented file handles.
     @sa file_handle_lost.
   */
-  ulong m_file_handle_sizing;
+  long m_file_handle_sizing;
   /**
     Maxium number of instrumented socket instances
     @sa socket_lost  
   */
-  ulong m_socket_sizing;
+  long m_socket_sizing;
   /**
     Maximum number of instrumented socket classes.
     @sa socket_class_lost.
   */
   ulong m_socket_class_sizing;
   /** Maximum number of rows per thread in table EVENTS_WAITS_HISTORY. */
-  ulong m_events_waits_history_sizing;
+  long m_events_waits_history_sizing;
   /** Maximum number of rows in table EVENTS_WAITS_HISTORY_LONG. */
-  ulong m_events_waits_history_long_sizing;
+  long m_events_waits_history_long_sizing;
   /** Maximum number of rows in table SETUP_ACTORS. */
   ulong m_setup_actor_sizing;
   /** Maximum number of rows in table SETUP_OBJECTS. */
   ulong m_setup_object_sizing;
   /** Maximum number of rows in table HOSTS. */
-  ulong m_host_sizing;
+  long m_host_sizing;
   /** Maximum number of rows in table USERS. */
-  ulong m_user_sizing;
+  long m_user_sizing;
   /** Maximum number of rows in table ACCOUNTS. */
-  ulong m_account_sizing;
+  long m_account_sizing;
   /**
     Maximum number of instrumented stage classes.
     @sa stage_class_lost.
   */
   ulong m_stage_class_sizing;
   /** Maximum number of rows per thread in table EVENTS_STAGES_HISTORY. */
-  ulong m_events_stages_history_sizing;
+  long m_events_stages_history_sizing;
   /** Maximum number of rows in table EVENTS_STAGES_HISTORY_LONG. */
-  ulong m_events_stages_history_long_sizing;
+  long m_events_stages_history_long_sizing;
   /**
     Maximum number of instrumented statement classes.
     @sa statement_class_lost.
   */
   ulong m_statement_class_sizing;
   /** Maximum number of rows per thread in table EVENTS_STATEMENT_HISTORY. */
-  ulong m_events_statements_history_sizing;
+  long m_events_statements_history_sizing;
   /** Maximum number of rows in table EVENTS_STATEMENTS_HISTORY_LONG. */
-  ulong m_events_statements_history_long_sizing;
+  long m_events_statements_history_long_sizing;
   /** Maximum number of digests to be captured */
-  ulong m_digest_sizing;
+  long m_digest_sizing;
+  /** Maximum number of session attribute strings per thread */
+  long m_session_connect_attrs_sizing;
+
+  /** Sizing hints, for auto tuning. */
+  PFS_sizing_hints m_hints;
 };
 
 /**
@@ -254,7 +213,9 @@ extern PFS_global_param pfs_param;
   @return A boostrap handle, or NULL.
 */
 struct PSI_bootstrap*
-initialize_performance_schema(const PFS_global_param *param);
+initialize_performance_schema(PFS_global_param *param);
+
+void pfs_automated_sizing(PFS_global_param *param);
 
 /**
   Initialize the performance schema ACL.
diff --git a/storage/perfschema/pfs_setup_actor.cc b/storage/perfschema/pfs_setup_actor.cc
index a587d3643d2..943654ce1c9 100644
--- a/storage/perfschema/pfs_setup_actor.cc
+++ b/storage/perfschema/pfs_setup_actor.cc
@@ -43,7 +43,7 @@ ulong setup_actor_max;
 PFS_setup_actor *setup_actor_array= NULL;
 
 /** Hash table for setup_actor records. */
-static LF_HASH setup_actor_hash;
+LF_HASH setup_actor_hash;
 /** True if @c setup_actor_hash is initialized. */
 static bool setup_actor_hash_inited= false;
 
@@ -100,10 +100,11 @@ C_MODE_END
 */
 int init_setup_actor_hash(void)
 {
-  if (! setup_actor_hash_inited)
+  if ((! setup_actor_hash_inited) && (setup_actor_max > 0))
   {
     lf_hash_init(&setup_actor_hash, sizeof(PFS_setup_actor*), LF_HASH_UNIQUE,
                  0, 0, setup_actor_hash_get_key, &my_charset_bin);
+    setup_actor_hash.size= setup_actor_max;
     setup_actor_hash_inited= true;
   }
   return 0;
@@ -167,7 +168,7 @@ int insert_setup_actor(const String *user, const String *host, const String *rol
   if (unlikely(pins == NULL))
     return HA_ERR_OUT_OF_MEM;
 
-  static uint setup_actor_monotonic_index= 0;
+  static uint PFS_ALIGNED setup_actor_monotonic_index= 0;
   uint index;
   uint attempts= 0;
   PFS_setup_actor *pfs;
@@ -175,8 +176,7 @@ int insert_setup_actor(const String *user, const String *host, const String *rol
   while (++attempts <= setup_actor_max)
   {
     /* See create_mutex() */
-    PFS_atomic::add_u32(& setup_actor_monotonic_index, 1);
-    index= setup_actor_monotonic_index % setup_actor_max;
+    index= PFS_atomic::add_u32(& setup_actor_monotonic_index, 1) % setup_actor_max;
     pfs= setup_actor_array + index;
 
     if (pfs->m_lock.is_free())
diff --git a/storage/perfschema/pfs_setup_actor.h b/storage/perfschema/pfs_setup_actor.h
index 8b0ee8a485c..baebd27f0ad 100644
--- a/storage/perfschema/pfs_setup_actor.h
+++ b/storage/perfschema/pfs_setup_actor.h
@@ -49,7 +49,7 @@ struct PFS_setup_actor_key
 };
 
 /** A setup_actor record. */
-struct PFS_setup_actor
+struct PFS_ALIGNED PFS_setup_actor
 {
   /** Internal lock. */
   pfs_lock m_lock;
@@ -92,6 +92,8 @@ extern ulong setup_actor_max;
 
 extern PFS_setup_actor *setup_actor_array;
 
+extern LF_HASH setup_actor_hash;
+
 /** @} */
 #endif
 
diff --git a/storage/perfschema/pfs_setup_object.cc b/storage/perfschema/pfs_setup_object.cc
index a9e9bb7881b..0ca7986e818 100644
--- a/storage/perfschema/pfs_setup_object.cc
+++ b/storage/perfschema/pfs_setup_object.cc
@@ -39,7 +39,7 @@ ulong setup_object_max;
 
 PFS_setup_object *setup_object_array= NULL;
 
-static LF_HASH setup_object_hash;
+LF_HASH setup_object_hash;
 static bool setup_object_hash_inited= false;
 
 /**
@@ -95,10 +95,11 @@ C_MODE_END
 */
 int init_setup_object_hash(void)
 {
-  if (! setup_object_hash_inited)
+  if ((! setup_object_hash_inited) && (setup_object_max > 0))
   {
     lf_hash_init(&setup_object_hash, sizeof(PFS_setup_object*), LF_HASH_UNIQUE,
                  0, 0, setup_object_hash_get_key, &my_charset_bin);
+    setup_object_hash.size= setup_object_max;
     setup_object_hash_inited= true;
   }
   return 0;
@@ -161,7 +162,7 @@ int insert_setup_object(enum_object_type object_type, const String *schema,
   if (unlikely(pins == NULL))
     return HA_ERR_OUT_OF_MEM;
 
-  static uint setup_object_monotonic_index= 0;
+  static uint PFS_ALIGNED setup_object_monotonic_index= 0;
   uint index;
   uint attempts= 0;
   PFS_setup_object *pfs;
@@ -169,8 +170,7 @@ int insert_setup_object(enum_object_type object_type, const String *schema,
   while (++attempts <= setup_object_max)
   {
     /* See create_mutex() */
-    PFS_atomic::add_u32(& setup_object_monotonic_index, 1);
-    index= setup_object_monotonic_index % setup_object_max;
+    index= PFS_atomic::add_u32(& setup_object_monotonic_index, 1) % setup_object_max;
     pfs= setup_object_array + index;
 
     if (pfs->m_lock.is_free())
diff --git a/storage/perfschema/pfs_setup_object.h b/storage/perfschema/pfs_setup_object.h
index 44d2b76c627..2615802fe01 100644
--- a/storage/perfschema/pfs_setup_object.h
+++ b/storage/perfschema/pfs_setup_object.h
@@ -45,7 +45,7 @@ struct PFS_setup_object_key
 };
 
 /** A setup_object record. */
-struct PFS_setup_object
+struct PFS_ALIGNED PFS_setup_object
 {
   enum_object_type get_object_type()
   {
@@ -96,6 +96,8 @@ extern ulong setup_object_max;
 
 extern PFS_setup_object *setup_object_array;
 
+extern LF_HASH setup_object_hash;
+
 /** @} */
 #endif
 
diff --git a/storage/perfschema/pfs_stat.h b/storage/perfschema/pfs_stat.h
index 32c462b8ba2..2a255a9e5b2 100644
--- a/storage/perfschema/pfs_stat.h
+++ b/storage/perfschema/pfs_stat.h
@@ -140,13 +140,90 @@ struct PFS_byte_stat : public PFS_single_stat
   }
 };
 
+/** Statistics for mutex usage. */
+struct PFS_mutex_stat
+{
+  /** Wait statistics. */
+  PFS_single_stat m_wait_stat;
+  /**
+    Lock statistics.
+    This statistic is not exposed in user visible tables yet.
+  */
+  PFS_single_stat m_lock_stat;
+
+  inline void aggregate(const PFS_mutex_stat *stat)
+  {
+    m_wait_stat.aggregate(&stat->m_wait_stat);
+    m_lock_stat.aggregate(&stat->m_lock_stat);
+  }
+
+  inline void reset(void)
+  {
+    m_wait_stat.reset();
+    m_lock_stat.reset();
+  }
+};
+
+/** Statistics for rwlock usage. */
+struct PFS_rwlock_stat
+{
+  /** Wait statistics. */
+  PFS_single_stat m_wait_stat;
+  /**
+    RWLock read lock usage statistics.
+    This statistic is not exposed in user visible tables yet.
+  */
+  PFS_single_stat m_read_lock_stat;
+  /**
+    RWLock write lock usage statistics.
+    This statistic is not exposed in user visible tables yet.
+  */
+  PFS_single_stat m_write_lock_stat;
+
+  inline void aggregate(const PFS_rwlock_stat *stat)
+  {
+    m_wait_stat.aggregate(&stat->m_wait_stat);
+    m_read_lock_stat.aggregate(&stat->m_read_lock_stat);
+    m_write_lock_stat.aggregate(&stat->m_write_lock_stat);
+  }
+
+  inline void reset(void)
+  {
+    m_wait_stat.reset();
+    m_read_lock_stat.reset();
+    m_write_lock_stat.reset();
+  }
+};
+
 /** Statistics for COND usage. */
 struct PFS_cond_stat
 {
-  /** Number of times a condition was signalled. */
+  /** Wait statistics. */
+  PFS_single_stat m_wait_stat;
+  /**
+    Number of times a condition was signalled.
+    This statistic is not exposed in user visible tables yet.
+  */
   ulonglong m_signal_count;
-  /** Number of times a condition was broadcasted. */
+  /**
+    Number of times a condition was broadcast.
+    This statistic is not exposed in user visible tables yet.
+  */
   ulonglong m_broadcast_count;
+
+  inline void aggregate(const PFS_cond_stat *stat)
+  {
+    m_wait_stat.aggregate(&stat->m_wait_stat);
+    m_signal_count+= stat->m_signal_count;
+    m_broadcast_count+= stat->m_broadcast_count;
+  }
+
+  inline void reset(void)
+  {
+    m_wait_stat.reset();
+    m_signal_count= 0;
+    m_broadcast_count= 0;
+  }
 };
 
 /** Statistics for FILE IO. Used for both waits and byte counts. */
@@ -198,6 +275,11 @@ struct PFS_file_stat
   /** File IO statistics. */
   PFS_file_io_stat m_io_stat;
 
+  inline void aggregate(const PFS_file_stat *stat)
+  {
+    m_io_stat.aggregate(&stat->m_io_stat);
+  }
+
   /** Reset file statistics. */
   inline void reset(void)
   {
@@ -329,6 +411,7 @@ struct PFS_statement_stat
 /** Single table io statistic. */
 struct PFS_table_io_stat
 {
+  bool m_has_data;
   /** FETCH statistics */
   PFS_single_stat m_fetch;
   /** INSERT statistics */
@@ -338,8 +421,14 @@ struct PFS_table_io_stat
   /** DELETE statistics */
   PFS_single_stat m_delete;
 
+  PFS_table_io_stat()
+  {
+    m_has_data= false;
+  }
+
   inline void reset(void)
   {
+    m_has_data= false;
     m_fetch.reset();
     m_insert.reset();
     m_update.reset();
@@ -348,18 +437,25 @@ struct PFS_table_io_stat
 
   inline void aggregate(const PFS_table_io_stat *stat)
   {
-    m_fetch.aggregate(&stat->m_fetch);
-    m_insert.aggregate(&stat->m_insert);
-    m_update.aggregate(&stat->m_update);
-    m_delete.aggregate(&stat->m_delete);
+    if (stat->m_has_data)
+    {
+      m_has_data= true;
+      m_fetch.aggregate(&stat->m_fetch);
+      m_insert.aggregate(&stat->m_insert);
+      m_update.aggregate(&stat->m_update);
+      m_delete.aggregate(&stat->m_delete);
+    }
   }
 
   inline void sum(PFS_single_stat *result)
   {
-    result->aggregate(& m_fetch);
-    result->aggregate(& m_insert);
-    result->aggregate(& m_update);
-    result->aggregate(& m_delete);
+    if (m_has_data)
+    {
+      result->aggregate(& m_fetch);
+      result->aggregate(& m_insert);
+      result->aggregate(& m_update);
+      result->aggregate(& m_delete);
+    }
   }
 };
 
@@ -419,10 +515,10 @@ struct PFS_table_stat
 {
   /**
     Statistics, per index.
-    Each index stat is in [0, MAX_KEY-1],
-    stats when using no index are in [MAX_KEY].
+    Each index stat is in [0, MAX_INDEXES-1],
+    stats when using no index are in [MAX_INDEXES].
   */
-  PFS_table_io_stat m_index_stat[MAX_KEY + 1];
+  PFS_table_io_stat m_index_stat[MAX_INDEXES + 1];
 
   /**
     Statistics, per lock type.
@@ -433,7 +529,7 @@ struct PFS_table_stat
   inline void reset_io(void)
   {
     PFS_table_io_stat *stat= & m_index_stat[0];
-    PFS_table_io_stat *stat_last= & m_index_stat[MAX_KEY + 1];
+    PFS_table_io_stat *stat_last= & m_index_stat[MAX_INDEXES + 1];
     for ( ; stat < stat_last ; stat++)
       stat->reset();
   }
@@ -466,13 +562,25 @@ struct PFS_table_stat
     memcpy(this, & g_reset_template, sizeof(*this));
   }
 
-  inline void aggregate_io(const PFS_table_stat *stat)
+  inline void aggregate_io(const PFS_table_stat *stat, uint key_count)
   {
-    PFS_table_io_stat *to_stat= & m_index_stat[0];
-    PFS_table_io_stat *to_stat_last= & m_index_stat[MAX_KEY + 1];
-    const PFS_table_io_stat *from_stat= & stat->m_index_stat[0];
+    PFS_table_io_stat *to_stat;
+    PFS_table_io_stat *to_stat_last;
+    const PFS_table_io_stat *from_stat;
+
+    DBUG_ASSERT(key_count <= MAX_INDEXES);
+
+    /* Aggregate stats for each index, if any */
+    to_stat= & m_index_stat[0];
+    to_stat_last= to_stat + key_count;
+    from_stat= & stat->m_index_stat[0];
     for ( ; to_stat < to_stat_last ; from_stat++, to_stat++)
       to_stat->aggregate(from_stat);
+
+    /* Aggregate stats for the table */
+    to_stat= & m_index_stat[MAX_INDEXES];
+    from_stat= & stat->m_index_stat[MAX_INDEXES];
+    to_stat->aggregate(from_stat);
   }
 
   inline void aggregate_lock(const PFS_table_stat *stat)
@@ -480,18 +588,27 @@ struct PFS_table_stat
     m_lock_stat.aggregate(& stat->m_lock_stat);
   }
 
-  inline void aggregate(const PFS_table_stat *stat)
+  inline void aggregate(const PFS_table_stat *stat, uint key_count)
   {
-    aggregate_io(stat);
+    aggregate_io(stat, key_count);
     aggregate_lock(stat);
   }
 
-  inline void sum_io(PFS_single_stat *result)
+  inline void sum_io(PFS_single_stat *result, uint key_count)
   {
-    PFS_table_io_stat *stat= & m_index_stat[0];
-    PFS_table_io_stat *stat_last= & m_index_stat[MAX_KEY + 1];
+    PFS_table_io_stat *stat;
+    PFS_table_io_stat *stat_last;
+
+    DBUG_ASSERT(key_count <= MAX_INDEXES);
+
+    /* Sum stats for each index, if any */
+    stat= & m_index_stat[0];
+    stat_last= stat + key_count;
     for ( ; stat < stat_last ; stat++)
       stat->sum(result);
+
+    /* Sum stats for the table */
+    m_index_stat[MAX_INDEXES].sum(result);
   }
 
   inline void sum_lock(PFS_single_stat *result)
@@ -499,9 +616,9 @@ struct PFS_table_stat
     m_lock_stat.sum(result);
   }
 
-  inline void sum(PFS_single_stat *result)
+  inline void sum(PFS_single_stat *result, uint key_count)
   {
-    sum_io(result);
+    sum_io(result, key_count);
     sum_lock(result);
   }
 
diff --git a/storage/perfschema/pfs_timer.cc b/storage/perfschema/pfs_timer.cc
index 3d8d2e07ce5..8c3553db2b2 100644
--- a/storage/perfschema/pfs_timer.cc
+++ b/storage/perfschema/pfs_timer.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -117,6 +117,75 @@ void init_timers(void)
 
   to_pico_data[TIMER_NAME_TICK].m_v0= tick_v0;
   to_pico_data[TIMER_NAME_TICK].m_factor= tick_to_pico;
+
+  /*
+    Depending on the platform and build options,
+    some timers may not be available.
+    Pick best replacements.
+  */
+
+  /*
+    For STAGE and STATEMENT, a timer with a fixed frequency is better.
+    The prefered timer is nanosecond, or lower resolutions.
+  */
+
+  if (nanosec_to_pico != 0)
+  {
+    /* Normal case. */
+    stage_timer= TIMER_NAME_NANOSEC;
+    statement_timer= TIMER_NAME_NANOSEC;
+  }
+  else if (microsec_to_pico != 0)
+  {
+    /* Windows. */
+    stage_timer= TIMER_NAME_MICROSEC;
+    statement_timer= TIMER_NAME_MICROSEC;
+  }
+  else if (millisec_to_pico != 0)
+  {
+    /* Robustness, no known cases. */
+    stage_timer= TIMER_NAME_MILLISEC;
+    statement_timer= TIMER_NAME_MILLISEC;
+  }
+  else if (tick_to_pico != 0)
+  {
+    /* Robustness, no known cases. */
+    stage_timer= TIMER_NAME_TICK;
+    statement_timer= TIMER_NAME_TICK;
+  }
+  else
+  {
+    /* Robustness, no known cases. */
+    stage_timer= TIMER_NAME_CYCLE;
+    statement_timer= TIMER_NAME_CYCLE;
+  }
+
+  /*
+    For IDLE, a timer with a fixed frequency is critical,
+    as the CPU clock may slow down a lot if the server is completely idle.
+    The prefered timer is microsecond, or lower resolutions.
+  */
+
+  if (microsec_to_pico != 0)
+  {
+    /* Normal case. */
+    idle_timer= TIMER_NAME_MICROSEC;
+  }
+  else if (millisec_to_pico != 0)
+  {
+    /* Robustness, no known cases. */
+    idle_timer= TIMER_NAME_MILLISEC;
+  }
+  else if (tick_to_pico != 0)
+  {
+    /* Robustness, no known cases. */
+    idle_timer= TIMER_NAME_TICK;
+  }
+  else
+  {
+    /* Robustness, no known cases. */
+    idle_timer= TIMER_NAME_CYCLE;
+  }
 }
 
 ulonglong get_timer_raw_value(enum_timer_name timer_name)
diff --git a/storage/perfschema/pfs_user.cc b/storage/perfschema/pfs_user.cc
index d7794a131a1..697b5af2f0d 100644
--- a/storage/perfschema/pfs_user.cc
+++ b/storage/perfschema/pfs_user.cc
@@ -42,7 +42,7 @@ static PFS_single_stat *user_instr_class_waits_array= NULL;
 static PFS_stage_stat *user_instr_class_stages_array= NULL;
 static PFS_statement_stat *user_instr_class_statements_array= NULL;
 
-static LF_HASH user_hash;
+LF_HASH user_hash;
 static bool user_hash_inited= false;
 
 /**
@@ -146,10 +146,11 @@ C_MODE_END
 */
 int init_user_hash(void)
 {
-  if (! user_hash_inited)
+  if ((! user_hash_inited) && (user_max > 0))
   {
     lf_hash_init(&user_hash, sizeof(PFS_user*), LF_HASH_UNIQUE,
                  0, 0, user_hash_get_key, &my_charset_bin);
+    user_hash.size= user_max;
     user_hash_inited= true;
   }
   return 0;
diff --git a/storage/perfschema/pfs_user.h b/storage/perfschema/pfs_user.h
index 0f937c6c927..dda7e221ca8 100644
--- a/storage/perfschema/pfs_user.h
+++ b/storage/perfschema/pfs_user.h
@@ -44,7 +44,7 @@ struct PFS_user_key
   uint m_key_length;
 };
 
-struct PFS_user : public PFS_connection_slice
+struct PFS_ALIGNED PFS_user : public PFS_connection_slice
 {
 public:
   inline void init_refcount(void)
@@ -108,6 +108,8 @@ extern ulong user_lost;
 
 extern PFS_user *user_array;
 
+extern LF_HASH user_hash;
+
 /** @} */
 #endif
 
diff --git a/storage/perfschema/pfs_visitor.cc b/storage/perfschema/pfs_visitor.cc
index fe2b16a2f76..616bc27900a 100644
--- a/storage/perfschema/pfs_visitor.cc
+++ b/storage/perfschema/pfs_visitor.cc
@@ -666,7 +666,7 @@ void PFS_connection_wait_visitor::visit_global()
     it is more efficient.
   */
   DBUG_ASSERT(m_index == global_idle_class.m_event_name_index);
-  m_stat.aggregate(& global_instr_class_waits_array[m_index]);
+  m_stat.aggregate(& global_idle_stat);
 }
 
 void PFS_connection_wait_visitor::visit_host(PFS_host *pfs)
@@ -883,54 +883,44 @@ PFS_instance_wait_visitor::PFS_instance_wait_visitor()
 PFS_instance_wait_visitor::~PFS_instance_wait_visitor()
 {}
 
-void PFS_instance_wait_visitor::visit_mutex_class(PFS_mutex_class *pfs) 
+void PFS_instance_wait_visitor::visit_mutex_class(PFS_mutex_class *pfs)
 {
-  uint index= pfs->m_event_name_index;
-  m_stat.aggregate(& global_instr_class_waits_array[index]);
+  m_stat.aggregate(&pfs->m_mutex_stat.m_wait_stat);
 }
 
-void PFS_instance_wait_visitor::visit_rwlock_class(PFS_rwlock_class *pfs) 
+void PFS_instance_wait_visitor::visit_rwlock_class(PFS_rwlock_class *pfs)
 {
-  uint index= pfs->m_event_name_index;
-  m_stat.aggregate(& global_instr_class_waits_array[index]);
+  m_stat.aggregate(&pfs->m_rwlock_stat.m_wait_stat);
 }
 
-void PFS_instance_wait_visitor::visit_cond_class(PFS_cond_class *pfs) 
+void PFS_instance_wait_visitor::visit_cond_class(PFS_cond_class *pfs)
 {
-  uint index= pfs->m_event_name_index;
-  m_stat.aggregate(& global_instr_class_waits_array[index]);
+  m_stat.aggregate(&pfs->m_cond_stat.m_wait_stat);
 }
 
-void PFS_instance_wait_visitor::visit_file_class(PFS_file_class *pfs) 
+void PFS_instance_wait_visitor::visit_file_class(PFS_file_class *pfs)
 {
-  uint index= pfs->m_event_name_index;
-  m_stat.aggregate(& global_instr_class_waits_array[index]);
+  pfs->m_file_stat.m_io_stat.sum_waits(&m_stat);
 }
 
-void PFS_instance_wait_visitor::visit_socket_class(PFS_socket_class *pfs) 
+void PFS_instance_wait_visitor::visit_socket_class(PFS_socket_class *pfs)
 {
-  /* Collect global wait stats */
-  uint index= pfs->m_event_name_index;
-  m_stat.aggregate(&global_instr_class_waits_array[index]);
-
-  /* If deferred, then pull wait stats directly from the socket class. */
-  if (pfs->is_deferred())
-    pfs->m_socket_stat.m_io_stat.sum_waits(&m_stat);
+  pfs->m_socket_stat.m_io_stat.sum_waits(&m_stat);
 }
 
-void PFS_instance_wait_visitor::visit_mutex(PFS_mutex *pfs) 
+void PFS_instance_wait_visitor::visit_mutex(PFS_mutex *pfs)
 {
-  m_stat.aggregate(& pfs->m_wait_stat);
+  m_stat.aggregate(& pfs->m_mutex_stat.m_wait_stat);
 }
 
-void PFS_instance_wait_visitor::visit_rwlock(PFS_rwlock *pfs) 
+void PFS_instance_wait_visitor::visit_rwlock(PFS_rwlock *pfs)
 {
-  m_stat.aggregate(& pfs->m_wait_stat);
+  m_stat.aggregate(& pfs->m_rwlock_stat.m_wait_stat);
 }
 
-void PFS_instance_wait_visitor::visit_cond(PFS_cond *pfs) 
+void PFS_instance_wait_visitor::visit_cond(PFS_cond *pfs)
 {
-  m_stat.aggregate(& pfs->m_wait_stat);
+  m_stat.aggregate(& pfs->m_cond_stat.m_wait_stat);
 }
 
 void PFS_instance_wait_visitor::visit_file(PFS_file *pfs) 
@@ -959,23 +949,24 @@ PFS_object_wait_visitor::~PFS_object_wait_visitor()
 
 void PFS_object_wait_visitor::visit_global()
 {
-  uint index;
-
-  index= global_table_io_class.m_event_name_index;
-  m_stat.aggregate(& global_instr_class_waits_array[index]);
-
-  index= global_table_lock_class.m_event_name_index;
-  m_stat.aggregate(& global_instr_class_waits_array[index]);
+  global_table_io_stat.sum(& m_stat);
+  global_table_lock_stat.sum(& m_stat);
 }
 
 void PFS_object_wait_visitor::visit_table_share(PFS_table_share *pfs)
 {
-  pfs->m_table_stat.sum(& m_stat);
+  uint safe_key_count= sanitize_index_count(pfs->m_key_count);
+  pfs->m_table_stat.sum(& m_stat, safe_key_count);
 }
 
 void PFS_object_wait_visitor::visit_table(PFS_table *pfs)
 {
-  pfs->m_table_stat.sum(& m_stat);
+  PFS_table_share *table_share= sanitize_table_share(pfs->m_share);
+  if (table_share != NULL)
+  {
+    uint safe_key_count= sanitize_index_count(table_share->m_key_count);
+    pfs->m_table_stat.sum(& m_stat, safe_key_count);
+  }
 }
 
 PFS_table_io_wait_visitor::PFS_table_io_wait_visitor()
@@ -986,21 +977,21 @@ PFS_table_io_wait_visitor::~PFS_table_io_wait_visitor()
 
 void PFS_table_io_wait_visitor::visit_global()
 {
-  uint index= global_table_io_class.m_event_name_index;
-  m_stat.aggregate(& global_instr_class_waits_array[index]);
+  global_table_io_stat.sum(& m_stat);
 }
 
 void PFS_table_io_wait_visitor::visit_table_share(PFS_table_share *pfs)
 {
   PFS_table_io_stat io_stat;
+  uint safe_key_count= sanitize_index_count(pfs->m_key_count);
   uint index;
 
   /* Aggregate index stats */
-  for (index= 0; index < pfs->m_key_count; index++)
+  for (index= 0; index < safe_key_count; index++)
     io_stat.aggregate(& pfs->m_table_stat.m_index_stat[index]);
 
   /* Aggregate global stats */
-  io_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_KEY]);
+  io_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_INDEXES]);
 
   io_stat.sum(& m_stat);
 }
@@ -1012,14 +1003,15 @@ void PFS_table_io_wait_visitor::visit_table(PFS_table *pfs)
   if (likely(safe_share != NULL))
   {
     PFS_table_io_stat io_stat;
+    uint safe_key_count= sanitize_index_count(safe_share->m_key_count);
     uint index;
 
     /* Aggregate index stats */
-    for (index= 0; index < safe_share->m_key_count; index++)
+    for (index= 0; index < safe_key_count; index++)
       io_stat.aggregate(& pfs->m_table_stat.m_index_stat[index]);
 
     /* Aggregate global stats */
-    io_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_KEY]);
+    io_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_INDEXES]);
 
     io_stat.sum(& m_stat);
   }
@@ -1035,14 +1027,15 @@ PFS_table_io_stat_visitor::~PFS_table_io_stat_visitor()
 
 void PFS_table_io_stat_visitor::visit_table_share(PFS_table_share *pfs)
 {
+  uint safe_key_count= sanitize_index_count(pfs->m_key_count);
   uint index;
 
   /* Aggregate index stats */
-  for (index= 0; index < pfs->m_key_count; index++)
+  for (index= 0; index < safe_key_count; index++)
     m_stat.aggregate(& pfs->m_table_stat.m_index_stat[index]);
 
   /* Aggregate global stats */
-  m_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_KEY]);
+  m_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_INDEXES]);
 }
 
 void PFS_table_io_stat_visitor::visit_table(PFS_table *pfs)
@@ -1051,14 +1044,15 @@ void PFS_table_io_stat_visitor::visit_table(PFS_table *pfs)
 
   if (likely(safe_share != NULL))
   {
+    uint safe_key_count= sanitize_index_count(safe_share->m_key_count);
     uint index;
 
     /* Aggregate index stats */
-    for (index= 0; index < safe_share->m_key_count; index++)
+    for (index= 0; index < safe_key_count; index++)
       m_stat.aggregate(& pfs->m_table_stat.m_index_stat[index]);
 
     /* Aggregate global stats */
-    m_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_KEY]);
+    m_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_INDEXES]);
   }
 }
 
@@ -1090,8 +1084,7 @@ PFS_table_lock_wait_visitor::~PFS_table_lock_wait_visitor()
 
 void PFS_table_lock_wait_visitor::visit_global()
 {
-  uint index= global_table_lock_class.m_event_name_index;
-  m_stat.aggregate(& global_instr_class_waits_array[index]);
+  global_table_lock_stat.sum(& m_stat);
 }
 
 void PFS_table_lock_wait_visitor::visit_table_share(PFS_table_share *pfs)
diff --git a/storage/perfschema/table_esgs_by_thread_by_event_name.cc b/storage/perfschema/table_esgs_by_thread_by_event_name.cc
index 2a69ec24277..eeef6c3fbb2 100644
--- a/storage/perfschema/table_esgs_by_thread_by_event_name.cc
+++ b/storage/perfschema/table_esgs_by_thread_by_event_name.cc
@@ -33,7 +33,7 @@ static const TABLE_FIELD_TYPE field_types[]=
 {
   {
     { C_STRING_WITH_LEN("THREAD_ID") },
-    { C_STRING_WITH_LEN("int(11)") },
+    { C_STRING_WITH_LEN("bigint(20)") },
     { NULL, 0}
   },
   {
@@ -212,7 +212,7 @@ int table_esgs_by_thread_by_event_name
       switch(f->field_index)
       {
       case 0: /* THREAD_ID */
-        set_field_ulong(f, m_row.m_thread_internal_id);
+        set_field_ulonglong(f, m_row.m_thread_internal_id);
         break;
       case 1: /* NAME */
         m_row.m_event_name.set_field(f);
diff --git a/storage/perfschema/table_esgs_by_thread_by_event_name.h b/storage/perfschema/table_esgs_by_thread_by_event_name.h
index 049c8997396..5295a9eacdf 100644
--- a/storage/perfschema/table_esgs_by_thread_by_event_name.h
+++ b/storage/perfschema/table_esgs_by_thread_by_event_name.h
@@ -39,7 +39,7 @@
 struct row_esgs_by_thread_by_event_name
 {
   /** Column THREAD_ID. */
-  ulong m_thread_internal_id;
+  ulonglong m_thread_internal_id;
   /** Column EVENT_NAME. */
   PFS_event_name_row m_event_name;
   /** Columns COUNT_STAR, SUM/MIN/AVG/MAX TIMER_WAIT. */
diff --git a/storage/perfschema/table_esgs_global_by_event_name.cc b/storage/perfschema/table_esgs_global_by_event_name.cc
index 2ac22fb1551..276ac8d7704 100644
--- a/storage/perfschema/table_esgs_global_by_event_name.cc
+++ b/storage/perfschema/table_esgs_global_by_event_name.cc
@@ -95,6 +95,9 @@ int
 table_esgs_global_by_event_name::delete_all_rows(void)
 {
   reset_events_stages_by_thread();
+  reset_events_stages_by_account();
+  reset_events_stages_by_user();
+  reset_events_stages_by_host();
   reset_events_stages_global();
   return 0;
 }
diff --git a/storage/perfschema/table_esms_by_digest.cc b/storage/perfschema/table_esms_by_digest.cc
index dac8d3b01dc..d0250c14e5d 100644
--- a/storage/perfschema/table_esms_by_digest.cc
+++ b/storage/perfschema/table_esms_by_digest.cc
@@ -36,6 +36,11 @@ THR_LOCK table_esms_by_digest::m_table_lock;
 static const TABLE_FIELD_TYPE field_types[]=
 {
   {
+    { C_STRING_WITH_LEN("SCHEMA_NAME") },
+    { C_STRING_WITH_LEN("varchar(64)") },
+    { NULL, 0}
+  },
+  {
     { C_STRING_WITH_LEN("DIGEST") },
     { C_STRING_WITH_LEN("varchar(32)") },
     { NULL, 0}
@@ -45,7 +50,7 @@ static const TABLE_FIELD_TYPE field_types[]=
     { C_STRING_WITH_LEN("longtext") },
     { NULL, 0}
   },
-  { 
+  {
     { C_STRING_WITH_LEN("COUNT_STAR") },
     { C_STRING_WITH_LEN("bigint(20)") },
     { NULL, 0}
@@ -170,7 +175,7 @@ static const TABLE_FIELD_TYPE field_types[]=
     { C_STRING_WITH_LEN("timestamp") },
     { NULL, 0}
   },
-  { 
+  {
     { C_STRING_WITH_LEN("LAST_SEEN") },
     { C_STRING_WITH_LEN("timestamp") },
     { NULL, 0}
@@ -179,7 +184,7 @@ static const TABLE_FIELD_TYPE field_types[]=
 
 TABLE_FIELD_DEF
 table_esms_by_digest::m_field_def=
-{ 28, field_types };
+{ 29, field_types };
 
 PFS_engine_table_share
 table_esms_by_digest::m_share=
@@ -303,18 +308,19 @@ int table_esms_by_digest
     {
       switch(f->field_index)
       {
-      case 0: /* DIGEST */
-      case 1: /* DIGEST_TEXT */
+      case 0: /* SCHEMA_NAME */
+      case 1: /* DIGEST */
+      case 2: /* DIGEST_TEXT */
         m_row.m_digest.set_field(f->field_index, f);
         break;
-      case 26: /* FIRST_SEEN */
+      case 27: /* FIRST_SEEN */
         set_field_timestamp(f, m_row.m_first_seen);
         break;
-      case 27: /* LAST_SEEN */
+      case 28: /* LAST_SEEN */
         set_field_timestamp(f, m_row.m_last_seen);
         break;
-      default: /* 1, ... COUNT/SUM/MIN/AVG/MAX */
-        m_row.m_stat.set_field(f->field_index - 2, f);
+      default: /* 3, ... COUNT/SUM/MIN/AVG/MAX */
+        m_row.m_stat.set_field(f->field_index - 3, f);
         break;
       }
     }
diff --git a/storage/perfschema/table_esms_by_thread_by_event_name.cc b/storage/perfschema/table_esms_by_thread_by_event_name.cc
index 5a7faca1b79..fccdf5dea60 100644
--- a/storage/perfschema/table_esms_by_thread_by_event_name.cc
+++ b/storage/perfschema/table_esms_by_thread_by_event_name.cc
@@ -33,7 +33,7 @@ static const TABLE_FIELD_TYPE field_types[]=
 {
   {
     { C_STRING_WITH_LEN("THREAD_ID") },
-    { C_STRING_WITH_LEN("int(11)") },
+    { C_STRING_WITH_LEN("bigint(20)") },
     { NULL, 0}
   },
   {
@@ -308,7 +308,7 @@ int table_esms_by_thread_by_event_name
       switch(f->field_index)
       {
       case 0: /* THREAD_ID */
-        set_field_ulong(f, m_row.m_thread_internal_id);
+        set_field_ulonglong(f, m_row.m_thread_internal_id);
         break;
       case 1: /* EVENT_NAME */
         m_row.m_event_name.set_field(f);
diff --git a/storage/perfschema/table_esms_by_thread_by_event_name.h b/storage/perfschema/table_esms_by_thread_by_event_name.h
index 2f36606a5e1..9fb9f7c58dc 100644
--- a/storage/perfschema/table_esms_by_thread_by_event_name.h
+++ b/storage/perfschema/table_esms_by_thread_by_event_name.h
@@ -39,7 +39,7 @@
 struct row_esms_by_thread_by_event_name
 {
   /** Column THREAD_ID. */
-  ulong m_thread_internal_id;
+  ulonglong m_thread_internal_id;
   /** Column EVENT_NAME. */
   PFS_event_name_row m_event_name;
   /** Columns COUNT_STAR, SUM/MIN/AVG/MAX TIMER_WAIT. */
diff --git a/storage/perfschema/table_esms_global_by_event_name.cc b/storage/perfschema/table_esms_global_by_event_name.cc
index 22c87f09137..efcb5b6fa7c 100644
--- a/storage/perfschema/table_esms_global_by_event_name.cc
+++ b/storage/perfschema/table_esms_global_by_event_name.cc
@@ -190,6 +190,9 @@ int
 table_esms_global_by_event_name::delete_all_rows(void)
 {
   reset_events_statements_by_thread();
+  reset_events_statements_by_account();
+  reset_events_statements_by_user();
+  reset_events_statements_by_host();
   reset_events_statements_global();
   return 0;
 }
diff --git a/storage/perfschema/table_events_stages.cc b/storage/perfschema/table_events_stages.cc
index e438249fbd3..854e1be15cd 100644
--- a/storage/perfschema/table_events_stages.cc
+++ b/storage/perfschema/table_events_stages.cc
@@ -32,7 +32,7 @@ static const TABLE_FIELD_TYPE field_types[]=
 {
   {
     { C_STRING_WITH_LEN("THREAD_ID") },
-    { C_STRING_WITH_LEN("int(11)") },
+    { C_STRING_WITH_LEN("bigint(20)") },
     { NULL, 0}
   },
   {
@@ -207,7 +207,7 @@ int table_events_stages_common::read_row_values(TABLE *table,
       switch(f->field_index)
       {
       case 0: /* THREAD_ID */
-        set_field_ulong(f, m_row.m_thread_internal_id);
+        set_field_ulonglong(f, m_row.m_thread_internal_id);
         break;
       case 1: /* EVENT_ID */
         set_field_ulonglong(f, m_row.m_event_id);
diff --git a/storage/perfschema/table_events_stages.h b/storage/perfschema/table_events_stages.h
index 6bc712c15a5..09c555c80fd 100644
--- a/storage/perfschema/table_events_stages.h
+++ b/storage/perfschema/table_events_stages.h
@@ -36,7 +36,7 @@ struct PFS_thread;
 struct row_events_stages
 {
   /** Column THREAD_ID. */
-  ulong m_thread_internal_id;
+  ulonglong m_thread_internal_id;
   /** Column EVENT_ID. */
   ulonglong m_event_id;
   /** Column END_EVENT_ID. */
diff --git a/storage/perfschema/table_events_statements.cc b/storage/perfschema/table_events_statements.cc
index d453b14470f..fb2b4b242d4 100644
--- a/storage/perfschema/table_events_statements.cc
+++ b/storage/perfschema/table_events_statements.cc
@@ -35,7 +35,7 @@ static const TABLE_FIELD_TYPE field_types[]=
 {
   {
     { C_STRING_WITH_LEN("THREAD_ID") },
-    { C_STRING_WITH_LEN("int(11)") },
+    { C_STRING_WITH_LEN("bigint(20)") },
     { NULL, 0}
   },
   {
@@ -372,7 +372,7 @@ void table_events_statements_common::make_row(PFS_events_statements *statement)
   PSI_digest_storage *digest= & statement->m_digest_storage;
   if (digest->m_byte_count > 0)
   {
-    PFS_digest_hash md5;
+    PFS_digest_key md5;
     compute_md5_hash((char *) md5.m_md5,
                      (char *) digest->m_token_array,
                      digest->m_byte_count);
@@ -420,7 +420,7 @@ int table_events_statements_common::read_row_values(TABLE *table,
       switch(f->field_index)
       {
       case 0: /* THREAD_ID */
-        set_field_ulong(f, m_row.m_thread_internal_id);
+        set_field_ulonglong(f, m_row.m_thread_internal_id);
         break;
       case 1: /* EVENT_ID */
         set_field_ulonglong(f, m_row.m_event_id);
diff --git a/storage/perfschema/table_events_statements.h b/storage/perfschema/table_events_statements.h
index acd82de4fcf..dcc6611f555 100644
--- a/storage/perfschema/table_events_statements.h
+++ b/storage/perfschema/table_events_statements.h
@@ -37,7 +37,7 @@ struct PFS_thread;
 struct row_events_statements
 {
   /** Column THREAD_ID. */
-  ulong m_thread_internal_id;
+  ulonglong m_thread_internal_id;
   /** Column EVENT_ID. */
   ulonglong m_event_id;
   /** Column END_EVENT_ID. */
diff --git a/storage/perfschema/table_events_waits.cc b/storage/perfschema/table_events_waits.cc
index d1c82e81f75..82d8ba2a0cc 100644
--- a/storage/perfschema/table_events_waits.cc
+++ b/storage/perfschema/table_events_waits.cc
@@ -34,7 +34,7 @@ static const TABLE_FIELD_TYPE field_types[]=
 {
   {
     { C_STRING_WITH_LEN("THREAD_ID") },
-    { C_STRING_WITH_LEN("int(11)") },
+    { C_STRING_WITH_LEN("bigint(20)") },
     { NULL, 0}
   },
   {
@@ -239,7 +239,8 @@ int table_events_waits_common::make_table_object_columns(volatile PFS_events_wai
 
     /* INDEX NAME */
     safe_index= wait->m_index;
-    if (safe_index < MAX_KEY && safe_index < safe_table_share->m_key_count)
+    uint safe_key_count= sanitize_index_count(safe_table_share->m_key_count);
+    if (safe_index < safe_key_count)
     {
       PFS_table_key *key= & safe_table_share->m_keys[safe_index];
       m_row.m_index_name_length= key->m_name_length;
@@ -602,7 +603,7 @@ int table_events_waits_common::read_row_values(TABLE *table,
       switch(f->field_index)
       {
       case 0: /* THREAD_ID */
-        set_field_ulong(f, m_row.m_thread_internal_id);
+        set_field_ulonglong(f, m_row.m_thread_internal_id);
         break;
       case 1: /* EVENT_ID */
         set_field_ulonglong(f, m_row.m_event_id);
diff --git a/storage/perfschema/table_events_waits.h b/storage/perfschema/table_events_waits.h
index 72065c765ca..065bf95e5a6 100644
--- a/storage/perfschema/table_events_waits.h
+++ b/storage/perfschema/table_events_waits.h
@@ -36,7 +36,7 @@ struct PFS_thread;
 struct row_events_waits
 {
   /** Column THREAD_ID. */
-  ulong m_thread_internal_id;
+  ulonglong m_thread_internal_id;
   /** Column EVENT_ID. */
   ulonglong m_event_id;
   /** Column END_EVENT_ID. */
diff --git a/storage/perfschema/table_events_waits_summary.cc b/storage/perfschema/table_events_waits_summary.cc
index 2a144a07344..f437e83f3ff 100644
--- a/storage/perfschema/table_events_waits_summary.cc
+++ b/storage/perfschema/table_events_waits_summary.cc
@@ -139,7 +139,7 @@ void table_events_waits_summary_by_instance::make_mutex_row(PFS_mutex *pfs)
   if (unlikely(safe_class == NULL))
     return;
 
-  make_instr_row(pfs, safe_class, pfs->m_identity, &pfs->m_wait_stat);
+  make_instr_row(pfs, safe_class, pfs->m_identity, &pfs->m_mutex_stat.m_wait_stat);
 }
 
 /**
@@ -153,7 +153,7 @@ void table_events_waits_summary_by_instance::make_rwlock_row(PFS_rwlock *pfs)
   if (unlikely(safe_class == NULL))
     return;
 
-  make_instr_row(pfs, safe_class, pfs->m_identity, &pfs->m_wait_stat);
+  make_instr_row(pfs, safe_class, pfs->m_identity, &pfs->m_rwlock_stat.m_wait_stat);
 }
 
 /**
@@ -167,7 +167,7 @@ void table_events_waits_summary_by_instance::make_cond_row(PFS_cond *pfs)
   if (unlikely(safe_class == NULL))
     return;
 
-  make_instr_row(pfs, safe_class, pfs->m_identity, &pfs->m_wait_stat);
+  make_instr_row(pfs, safe_class, pfs->m_identity, &pfs->m_cond_stat.m_wait_stat);
 }
 
 /**
@@ -181,11 +181,13 @@ void table_events_waits_summary_by_instance::make_file_row(PFS_file *pfs)
   if (unlikely(safe_class == NULL))
     return;
 
+  PFS_single_stat sum;
+  pfs->m_file_stat.m_io_stat.sum_waits(& sum);
   /*
     Files don't have a in memory structure associated to it,
     so we use the address of the PFS_file buffer as object_instance_begin
   */
-  make_instr_row(pfs, safe_class, pfs, &pfs->m_wait_stat);
+  make_instr_row(pfs, safe_class, pfs, & sum);
 }
 
 /**
diff --git a/storage/perfschema/table_ews_by_thread_by_event_name.cc b/storage/perfschema/table_ews_by_thread_by_event_name.cc
index 25e3cf395c4..4db97b1c98c 100644
--- a/storage/perfschema/table_ews_by_thread_by_event_name.cc
+++ b/storage/perfschema/table_ews_by_thread_by_event_name.cc
@@ -33,7 +33,7 @@ static const TABLE_FIELD_TYPE field_types[]=
 {
   {
     { C_STRING_WITH_LEN("THREAD_ID") },
-    { C_STRING_WITH_LEN("int(11)") },
+    { C_STRING_WITH_LEN("bigint(20)") },
     { NULL, 0}
   },
   {
@@ -282,7 +282,7 @@ int table_ews_by_thread_by_event_name
       switch(f->field_index)
       {
       case 0: /* THREAD_ID */
-        set_field_ulong(f, m_row.m_thread_internal_id);
+        set_field_ulonglong(f, m_row.m_thread_internal_id);
         break;
       case 1: /* EVENT_NAME */
         m_row.m_event_name.set_field(f);
diff --git a/storage/perfschema/table_ews_by_thread_by_event_name.h b/storage/perfschema/table_ews_by_thread_by_event_name.h
index b0710bb8a57..989356be646 100644
--- a/storage/perfschema/table_ews_by_thread_by_event_name.h
+++ b/storage/perfschema/table_ews_by_thread_by_event_name.h
@@ -39,7 +39,7 @@
 struct row_ews_by_thread_by_event_name
 {
   /** Column THREAD_ID. */
-  ulong m_thread_internal_id;
+  ulonglong m_thread_internal_id;
   /** Column EVENT_NAME. */
   PFS_event_name_row m_event_name;
   /** Columns COUNT_STAR, SUM/MIN/AVG/MAX TIMER_WAIT. */
diff --git a/storage/perfschema/table_ews_global_by_event_name.cc b/storage/perfschema/table_ews_global_by_event_name.cc
index c71a1ed479e..1e165c36bc5 100644
--- a/storage/perfschema/table_ews_global_by_event_name.cc
+++ b/storage/perfschema/table_ews_global_by_event_name.cc
@@ -97,7 +97,7 @@ table_ews_global_by_event_name::delete_all_rows(void)
   reset_events_waits_by_instance();
   reset_table_waits_by_table_handle();
   reset_table_waits_by_table();
-  reset_events_waits_global();
+  reset_events_waits_by_class();
   return 0;
 }
 
@@ -121,9 +121,6 @@ int table_ews_global_by_event_name::rnd_next(void)
   PFS_socket_class *socket_class;
   PFS_instr_class *instr_class;
 
-  if (global_instr_class_waits_array == NULL)
-    return HA_ERR_END_OF_FILE;
-
   for (m_pos.set_at(&m_next_pos);
        m_pos.has_more_view();
        m_pos.next_view())
@@ -218,9 +215,6 @@ table_ews_global_by_event_name::rnd_pos(const void *pos)
 
   set_position(pos);
 
-  if (global_instr_class_waits_array == NULL)
-    return HA_ERR_END_OF_FILE;
-
   switch (m_pos.m_index_1)
   {
   case pos_ews_global_by_event_name::VIEW_MUTEX:
diff --git a/storage/perfschema/table_helper.cc b/storage/perfschema/table_helper.cc
index d3954179539..9f803434ab6 100644
--- a/storage/perfschema/table_helper.cc
+++ b/storage/perfschema/table_helper.cc
@@ -110,26 +110,30 @@ int PFS_digest_row::make_row(PFS_statements_digest_stat* pfs)
   */
   if (pfs->m_digest_storage.m_byte_count != 0)
   {
+    m_schema_name_length= pfs->m_digest_key.m_schema_name_length;
+    if (m_schema_name_length > 0)
+      memcpy(m_schema_name, pfs->m_digest_key.m_schema_name, m_schema_name_length);
     /*
       Calculate digest from MD5 HASH collected to be shown as
       DIGEST in this row.
     */
-    MD5_HASH_TO_STRING(pfs->m_digest_hash.m_md5, m_digest);
+    MD5_HASH_TO_STRING(pfs->m_digest_key.m_md5, m_digest);
     m_digest_length= MD5_HASH_TO_STRING_LENGTH;
 
-    /* 
-      Caclulate digest_text information from the token array collected
+    /*
+      Calculate digest_text information from the token array collected
       to be shown as DIGEST_TEXT column.
-    */ 
+    */
     get_digest_text(m_digest_text, &pfs->m_digest_storage);
     m_digest_text_length= strlen(m_digest_text);
   }
   else
   {
+    m_schema_name_length= 0;
     m_digest_length= 0;
     m_digest_text_length= 0;
   }
-  
+
   return 0;
 }
 
@@ -137,14 +141,21 @@ void PFS_digest_row::set_field(uint index, Field *f)
 {
   switch (index)
   {
-    case 0: /* DIGEST */
+    case 0: /* SCHEMA_NAME */
+      if (m_schema_name_length > 0)
+        PFS_engine_table::set_field_varchar_utf8(f, m_schema_name,
+                                                 m_schema_name_length);
+      else
+        f->set_null();
+      break;
+    case 1: /* DIGEST */
       if (m_digest_length > 0)
         PFS_engine_table::set_field_varchar_utf8(f, m_digest,
                                                  m_digest_length);
       else
         f->set_null();
       break;
-    case 1: /* DIGEST_TEXT */
+    case 2: /* DIGEST_TEXT */
       if (m_digest_text_length > 0)
         PFS_engine_table::set_field_longtext_utf8(f, m_digest_text,
                                                   m_digest_text_length);
@@ -199,7 +210,7 @@ int PFS_index_row::make_row(PFS_table_share *pfs, uint table_index)
   if (m_object_row.make_row(pfs))
     return 1;
 
-  if (table_index < MAX_KEY)
+  if (table_index < MAX_INDEXES)
   {
     PFS_table_key *key= &pfs->m_keys[table_index];
     m_index_name_length= key->m_name_length;
diff --git a/storage/perfschema/table_helper.h b/storage/perfschema/table_helper.h
index 798ff16f4e5..769122570eb 100644
--- a/storage/perfschema/table_helper.h
+++ b/storage/perfschema/table_helper.h
@@ -127,6 +127,10 @@ struct PFS_account_row
 /** Row fragment for columns DIGEST, DIGEST_TEXT. */
 struct PFS_digest_row
 {
+  /** Column SCHEMA_NAME. */
+  char m_schema_name[NAME_LEN];
+  /** Length in bytes of @c m_schema_name. */
+  uint m_schema_name_length;
   /** Column DIGEST. */
   char m_digest[COL_DIGEST_SIZE];
   /** Length in bytes of @c m_digest. */
diff --git a/storage/perfschema/table_host_cache.cc b/storage/perfschema/table_host_cache.cc
index d243204ddcd..9c44a1fe235 100644
--- a/storage/perfschema/table_host_cache.cc
+++ b/storage/perfschema/table_host_cache.cc
@@ -266,9 +266,6 @@ void table_host_cache::materialize(THD *thd)
     index++;
     row++;
     current= current->next();
-    /* Host cache is a circular linked list. */
-    if (current == first)
-      break;
   }
 
   m_all_rows= rows;
diff --git a/storage/perfschema/table_os_global_by_type.cc b/storage/perfschema/table_os_global_by_type.cc
index 82d176cd5b2..70d9d6819ac 100644
--- a/storage/perfschema/table_os_global_by_type.cc
+++ b/storage/perfschema/table_os_global_by_type.cc
@@ -174,6 +174,7 @@ void table_os_global_by_type::make_row(PFS_table_share *share)
 {
   pfs_lock lock;
   PFS_single_stat cumulated_stat;
+  uint safe_key_count;
 
   m_row_exists= false;
 
@@ -184,7 +185,11 @@ void table_os_global_by_type::make_row(PFS_table_share *share)
   m_row.m_schema_name_length= share->m_schema_name_length;
   memcpy(m_row.m_object_name, share->m_table_name, share->m_table_name_length);
   m_row.m_object_name_length= share->m_table_name_length;
-  share->m_table_stat.sum(& cumulated_stat);
+
+  /* This is a dirty read, some thread can write data while we are reading it */
+  safe_key_count= sanitize_index_count(share->m_key_count);
+
+  share->m_table_stat.sum(& cumulated_stat, safe_key_count);
 
   if (! share->m_lock.end_optimistic_lock(&lock))
     return;
@@ -204,7 +209,7 @@ void table_os_global_by_type::make_row(PFS_table_share *share)
           If the opened table handle is for this table share,
           aggregate the table handle statistics.
         */
-        table->m_table_stat.sum(& cumulated_stat);
+        table->m_table_stat.sum(& cumulated_stat, safe_key_count);
       }
     }
   }
diff --git a/storage/perfschema/table_session_account_connect_attrs.cc b/storage/perfschema/table_session_account_connect_attrs.cc
new file mode 100644
index 00000000000..4a3fcc22341
--- /dev/null
+++ b/storage/perfschema/table_session_account_connect_attrs.cc
@@ -0,0 +1,70 @@
+/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software Foundation,
+  51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#include "table_session_account_connect_attrs.h"
+
+THR_LOCK table_session_account_connect_attrs::m_table_lock;
+
+PFS_engine_table_share
+table_session_account_connect_attrs::m_share=
+{
+  { C_STRING_WITH_LEN("session_account_connect_attrs") },
+  &pfs_readonly_acl,
+  &table_session_account_connect_attrs::create,
+  NULL, /* write_row */
+  NULL, /* delete_all_rows */
+  NULL, /* get_row_count */
+  1000, /* records */
+  sizeof(pos_connect_attr_by_thread_by_attr), /* ref length */
+  &m_table_lock,
+  &m_field_def,
+  false /* checked */
+};
+
+PFS_engine_table* table_session_account_connect_attrs::create()
+{
+  return new table_session_account_connect_attrs();
+}
+
+table_session_account_connect_attrs::table_session_account_connect_attrs()
+  : table_session_connect(&m_share)
+{}
+
+bool
+table_session_account_connect_attrs::thread_fits(PFS_thread *thread)
+{
+  PFS_thread *current_thread= PFS_thread::get_current_thread();
+  /* The current thread may not have instrumentation attached. */
+  if (current_thread == NULL)
+    return false;
+
+  /* The thread we compare to, by definition, has some instrumentation. */
+  DBUG_ASSERT(thread != NULL);
+
+  uint username_length= current_thread->m_username_length;
+  uint hostname_length= current_thread->m_hostname_length;
+
+  if (   (thread->m_username_length != username_length)
+      || (thread->m_hostname_length != hostname_length))
+    return false;
+
+  if (memcmp(thread->m_username, current_thread->m_username, username_length) != 0)
+    return false;
+
+  if (memcmp(thread->m_hostname, current_thread->m_hostname, hostname_length) != 0)
+    return false;
+
+  return true;
+}
diff --git a/storage/perfschema/table_session_account_connect_attrs.h b/storage/perfschema/table_session_account_connect_attrs.h
new file mode 100644
index 00000000000..ba8893e7cad
--- /dev/null
+++ b/storage/perfschema/table_session_account_connect_attrs.h
@@ -0,0 +1,50 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software Foundation,
+  51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#ifndef TABLE_SESSION_ACCOUNT_CONNECT_ATTRS_H
+#define TABLE_SESSION_ACCOUNT_CONNECT_ATTRS_H
+
+#include "table_session_connect.h"
+/**
+  \addtogroup Performance_schema_tables
+  @{
+*/
+
+/** Table PERFORMANCE_SCHEMA.SESSION_ACCOUNT_CONNECT_ATTRS. */
+class table_session_account_connect_attrs : public table_session_connect
+{
+public:
+  /** Table share */
+  static PFS_engine_table_share m_share;
+  /** Table builder */
+  static PFS_engine_table* create();
+
+protected:
+  table_session_account_connect_attrs();
+
+public:
+  ~table_session_account_connect_attrs()
+  {}
+
+protected:
+  virtual bool thread_fits(PFS_thread *thread);
+
+private:
+  /** Table share lock. */
+  static THR_LOCK m_table_lock;
+};
+
+/** @} */
+#endif
diff --git a/storage/perfschema/table_session_connect.cc b/storage/perfschema/table_session_connect.cc
new file mode 100644
index 00000000000..bd905b5756c
--- /dev/null
+++ b/storage/perfschema/table_session_connect.cc
@@ -0,0 +1,268 @@
+/* Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software Foundation,
+  51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#include "table_session_connect.h"
+
+static const TABLE_FIELD_TYPE field_types[]=
+{
+  {
+    { C_STRING_WITH_LEN("PROCESSLIST_ID") },
+    { C_STRING_WITH_LEN("int(11)") },
+    { NULL, 0}
+  },
+  {
+    { C_STRING_WITH_LEN("ATTR_NAME") },
+    { C_STRING_WITH_LEN("varchar(32)") },
+    { NULL, 0}
+  },
+  {
+    { C_STRING_WITH_LEN("ATTR_VALUE") },
+    { C_STRING_WITH_LEN("varchar(1024)") },
+    { NULL, 0}
+  },
+  {
+    { C_STRING_WITH_LEN("ORDINAL_POSITION") },
+    { C_STRING_WITH_LEN("int(11)") },
+    { NULL, 0}
+  }
+};
+
+TABLE_FIELD_DEF table_session_connect::m_field_def=
+{ 4, field_types };
+
+table_session_connect::table_session_connect(const PFS_engine_table_share *share) :
+                       cursor_by_thread_connect_attr(share)
+{}
+
+/**
+  Take a length encoded string
+
+  @arg ptr  inout       the input string array
+  @arg dest             where to store the result
+  @arg dest_size        max size of @c dest
+  @arg copied_len       the actual length of the data copied
+  @arg start_ptr        pointer to the start of input
+  @arg input_length     the length of the incoming data
+  @arg copy_data        copy the data or just skip the input
+  @arg from_cs          character set in which @c ptr is encoded
+  @arg nchars_max       maximum number of characters to read
+  @return status
+    @retval true    parsing failed
+    @retval false   parsing succeeded
+*/
+bool parse_length_encoded_string(const char **ptr,
+                 char *dest, uint dest_size,
+                 uint *copied_len,
+                 const char *start_ptr, uint input_length,
+                 bool copy_data,
+                 const CHARSET_INFO *from_cs,
+                 uint nchars_max)
+{
+  ulong copy_length, data_length;
+  const char *well_formed_error_pos= NULL, *cannot_convert_error_pos= NULL,
+        *from_end_pos= NULL;
+
+  copy_length= data_length= net_field_length((uchar **) ptr);
+
+  /* we don't tolerate NULL as a length */
+  if (data_length == NULL_LENGTH)
+    return true;
+
+  if (*ptr - start_ptr + data_length > input_length)
+    return true;
+
+  copy_length= well_formed_copy_nchars(&my_charset_utf8_bin, dest, dest_size,
+                                       from_cs, *ptr, data_length, nchars_max,
+                                       &well_formed_error_pos,
+                                       &cannot_convert_error_pos,
+                                       &from_end_pos);
+  *copied_len= copy_length;
+  (*ptr)+= data_length;
+
+  return false;
+}
+
+/**
+  Take the nth attribute name/value pair
+
+  Parse the attributes blob form the beginning, skipping the attributes
+  whose number is lower than the one we seek.
+  When we reach the attribute at an index we're looking for the values
+  are copied to the output parameters.
+  If parsing fails or no more attributes are found the function stops
+  and returns an error code.
+
+  @arg connect_attrs            pointer to the connect attributes blob
+  @arg connect_attrs_length     length of @c connect_attrs
+  @arg connect_attrs_cs         character set used to encode @c connect_attrs
+  @arg ordinal                  index of the attribute we need
+  @arg attr_name [out]          buffer to receive the attribute name
+  @arg max_attr_name            max size of @c attr_name in bytes
+  @arg attr_name_length [out]   number of bytes written in @attr_name
+  @arg attr_value [out]         buffer to receive the attribute name
+  @arg max_attr_value           max size of @c attr_value in bytes
+  @arg attr_value_length [out]  number of bytes written in @attr_value
+  @return status
+    @retval true    requested attribute pair is found and copied
+    @retval false   error. Either because of parsing or too few attributes.
+*/
+bool read_nth_attr(const char *connect_attrs,
+                   uint connect_attrs_length,
+                   const CHARSET_INFO *connect_attrs_cs,
+                   uint ordinal,
+                   char *attr_name, uint max_attr_name,
+                   uint *attr_name_length,
+                   char *attr_value, uint max_attr_value,
+                   uint *attr_value_length)
+{
+  uint idx;
+  const char *ptr;
+
+  for (ptr= connect_attrs, idx= 0;
+       (uint)(ptr - connect_attrs) < connect_attrs_length && idx <= ordinal;
+      idx++)
+  {
+    uint copy_length;
+    /* do the copying only if we absolutely have to */
+    bool fill_in_attr_name= idx == ordinal;
+    bool fill_in_attr_value= idx == ordinal;
+
+    /* read the key */
+    if (parse_length_encoded_string(&ptr,
+                                    attr_name, max_attr_name, &copy_length,
+                                    connect_attrs,
+                                    connect_attrs_length,
+                                    fill_in_attr_name,
+                                    connect_attrs_cs, 32) ||
+        !copy_length
+        )
+      return false;
+
+    if (idx == ordinal)
+      *attr_name_length= copy_length;
+      
+    /* read the value */
+    if (parse_length_encoded_string(&ptr,
+                                    attr_value, max_attr_value, &copy_length,
+                                    connect_attrs,
+                                    connect_attrs_length,
+                                    fill_in_attr_value,
+                                    connect_attrs_cs, 1024))
+      return false;
+
+    if (idx == ordinal)
+      *attr_value_length= copy_length;
+
+    if (idx == ordinal)
+      return true;
+  }
+
+  return false;
+}
+
+void table_session_connect::make_row(PFS_thread *pfs, uint ordinal)
+{
+  pfs_lock lock;
+  PFS_thread_class *safe_class;
+
+  m_row_exists= false;
+
+  /* Protect this reader against thread termination */
+  pfs->m_lock.begin_optimistic_lock(&lock);
+  safe_class= sanitize_thread_class(pfs->m_class);
+  if (unlikely(safe_class == NULL))
+    return;
+
+  /* Filtering threads must be done under the protection of the optimistic lock. */
+  if (! thread_fits(pfs))
+    return;
+
+  /* populate the row */
+  if (read_nth_attr(pfs->m_session_connect_attrs,
+                    pfs->m_session_connect_attrs_length,
+                    pfs->m_session_connect_attrs_cs,
+                    ordinal,
+                    m_row.m_attr_name, (uint) sizeof(m_row.m_attr_name),
+                    &m_row.m_attr_name_length,
+                    m_row.m_attr_value, (uint) sizeof(m_row.m_attr_value),
+                    &m_row.m_attr_value_length))
+  {
+    /* we don't expect internal threads to have connection attributes */
+    DBUG_ASSERT(pfs->m_processlist_id != 0);
+
+    m_row.m_ordinal_position= ordinal;
+    m_row.m_process_id= pfs->m_processlist_id;
+  }
+  else
+    return;
+
+  if (pfs->m_lock.end_optimistic_lock(& lock))
+    m_row_exists= true;
+}
+
+int table_session_connect::read_row_values(TABLE *table,
+                                         unsigned char *buf,
+                                         Field **fields,
+                                         bool read_all)
+{
+  Field *f;
+
+  if (unlikely(!m_row_exists))
+    return HA_ERR_RECORD_DELETED;
+
+  /* Set the null bits */
+  DBUG_ASSERT(table->s->null_bytes == 1);
+  buf[0]= 0;
+
+  for (; (f= *fields) ; fields++)
+  {
+    if (read_all || bitmap_is_set(table->read_set, f->field_index))
+    {
+      switch(f->field_index)
+      {
+      case FO_PROCESS_ID:
+        if (m_row.m_process_id != 0)
+          set_field_ulong(f, m_row.m_process_id);
+        else
+          f->set_null();
+        break;
+      case FO_ATTR_NAME:
+        set_field_varchar_utf8(f, m_row.m_attr_name,
+                               m_row.m_attr_name_length);
+        break;
+      case FO_ATTR_VALUE:
+        if (m_row.m_attr_value_length)
+          set_field_varchar_utf8(f, m_row.m_attr_value,
+                                 m_row.m_attr_value_length);
+        else
+          f->set_null();
+        break;
+      case FO_ORDINAL_POSITION:
+        set_field_ulong(f, m_row.m_ordinal_position);
+        break;
+      default:
+        DBUG_ASSERT(false);
+      }
+    }
+  }
+  return 0;
+}
+
+bool
+table_session_connect::thread_fits(PFS_thread *thread)
+{
+  return true;
+}
+
diff --git a/storage/perfschema/table_session_connect.h b/storage/perfschema/table_session_connect.h
new file mode 100644
index 00000000000..097623d2c80
--- /dev/null
+++ b/storage/perfschema/table_session_connect.h
@@ -0,0 +1,77 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software Foundation,
+  51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#ifndef TABLE_SESSION_CONNECT_H
+#define TABLE_SESSION_CONNECT_H
+
+#include "pfs_column_types.h"
+#include "cursor_by_thread_connect_attr.h"
+#include "table_helper.h"
+
+#define MAX_ATTR_NAME_CHARS 32
+#define MAX_ATTR_VALUE_CHARS 1024
+#define MAX_UTF8_BYTES 6
+
+/** symbolic names for field offsets, keep in sync with field_types */
+enum field_offsets {
+  FO_PROCESS_ID,
+  FO_ATTR_NAME,
+  FO_ATTR_VALUE,
+  FO_ORDINAL_POSITION
+};
+
+/**
+  A row of PERFORMANCE_SCHEMA.SESSION_CONNECT_ATTRS and
+  PERFORMANCE_SCHEMA.SESSION_ACCOUNT_CONNECT_ATTRS.
+*/
+struct row_session_connect_attrs
+{
+  /** Column PROCESS_ID. */
+  ulong m_process_id;
+  /** Column ATTR_NAME. In UTF-8 */
+  char m_attr_name[MAX_ATTR_NAME_CHARS * MAX_UTF8_BYTES];
+  /** Length in bytes of @c m_attr_name. */
+  uint m_attr_name_length;
+  /** Column ATTR_VALUE. In UTF-8 */
+  char m_attr_value[MAX_ATTR_VALUE_CHARS * MAX_UTF8_BYTES];
+  /** Length in bytes of @c m_attr_name. */
+  uint m_attr_value_length;
+  /** Column ORDINAL_POSITION. */
+  ulong m_ordinal_position;
+};
+
+class table_session_connect : public cursor_by_thread_connect_attr
+{
+protected:
+  table_session_connect(const PFS_engine_table_share *share);
+
+public:
+  ~table_session_connect()
+  {}
+
+protected:
+  virtual void make_row(PFS_thread *pfs, uint ordinal);
+  virtual bool thread_fits(PFS_thread *thread);
+  virtual int read_row_values(TABLE *table, unsigned char *buf,
+                              Field **fields, bool read_all);
+protected:
+  /** Fields definition. */
+  static TABLE_FIELD_DEF m_field_def;
+  /** Current row. */
+  row_session_connect_attrs m_row;
+};
+
+/** @} */
+#endif
diff --git a/storage/perfschema/table_session_connect_attrs.cc b/storage/perfschema/table_session_connect_attrs.cc
new file mode 100644
index 00000000000..9e1804b7294
--- /dev/null
+++ b/storage/perfschema/table_session_connect_attrs.cc
@@ -0,0 +1,43 @@
+/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software Foundation,
+  51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#include "table_session_connect_attrs.h"
+
+THR_LOCK table_session_connect_attrs::m_table_lock;
+
+PFS_engine_table_share
+table_session_connect_attrs::m_share=
+{
+  { C_STRING_WITH_LEN("session_connect_attrs") },
+  &pfs_readonly_acl,
+  &table_session_connect_attrs::create,
+  NULL, /* write_row */
+  NULL, /* delete_all_rows */
+  NULL, /* get_row_count */
+  1000, /* records */
+  sizeof(pos_connect_attr_by_thread_by_attr), /* ref length */
+  &m_table_lock,
+  &m_field_def,
+  false /* checked */
+};
+
+PFS_engine_table* table_session_connect_attrs::create()
+{
+  return new table_session_connect_attrs();
+}
+
+table_session_connect_attrs::table_session_connect_attrs()
+  : table_session_connect(&m_share)
+{}
diff --git a/storage/perfschema/table_session_connect_attrs.h b/storage/perfschema/table_session_connect_attrs.h
new file mode 100644
index 00000000000..b10b106ba0d
--- /dev/null
+++ b/storage/perfschema/table_session_connect_attrs.h
@@ -0,0 +1,47 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software Foundation,
+  51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#ifndef TABLE_SESSION_CONNECT_ATTRS_H
+#define TABLE_SESSION_CONNECT_ATTRS_H
+
+#include "table_session_connect.h"
+/**
+  \addtogroup Performance_schema_tables
+  @{
+*/
+
+/** Table PERFORMANCE_SCHEMA.SESSION_CONNECT_ATTRS. */
+class table_session_connect_attrs : public table_session_connect
+{
+public:
+  /** Table share */
+  static PFS_engine_table_share m_share;
+  /** Table builder */
+  static PFS_engine_table* create();
+
+protected:
+  table_session_connect_attrs();
+
+public:
+  ~table_session_connect_attrs()
+  {}
+
+private:
+  /** Table share lock. */
+  static THR_LOCK m_table_lock;
+};
+
+/** @} */
+#endif
diff --git a/storage/perfschema/table_setup_actors.cc b/storage/perfschema/table_setup_actors.cc
index 15d3d9d22a8..91dbb942ead 100644
--- a/storage/perfschema/table_setup_actors.cc
+++ b/storage/perfschema/table_setup_actors.cc
@@ -105,6 +105,9 @@ int table_setup_actors::write_row(TABLE *table, unsigned char *buf,
     }
   }
 
+  if (user->length() == 0 || host->length() == 0 || role->length() == 0)
+    return HA_ERR_WRONG_COMMAND;
+
   return insert_setup_actor(user, host, role);
 }
 
@@ -264,39 +267,13 @@ int table_setup_actors::delete_row_values(TABLE *table,
                                           const unsigned char *buf,
                                           Field **fields)
 {
-  Field *f;
-  String user_data("", 0, &my_charset_utf8_bin);
-  String host_data("", 0, &my_charset_utf8_bin);
-  String role_data("", 0, &my_charset_utf8_bin);
-  String *user= NULL;
-  String *host= NULL;
-  String *role= NULL;
-
-  for (; (f= *fields) ; fields++)
-  {
-    if (bitmap_is_set(table->read_set, f->field_index))
-    {
-      switch(f->field_index)
-      {
-      case 0: /* HOST */
-        host= get_field_char_utf8(f, &host_data);
-        break;
-      case 1: /* USER */
-        user= get_field_char_utf8(f, &user_data);
-        break;
-      case 2: /* ROLE */
-        role= get_field_char_utf8(f, &role_data);
-        break;
-      default:
-        DBUG_ASSERT(false);
-      }
-    }
-  }
+  DBUG_ASSERT(m_row_exists);
 
-  DBUG_ASSERT(user != NULL);
-  DBUG_ASSERT(host != NULL);
-  DBUG_ASSERT(role != NULL);
+  CHARSET_INFO *cs= &my_charset_utf8_bin;
+  String user(m_row.m_username, m_row.m_username_length, cs);
+  String role(m_row.m_rolename, m_row.m_rolename_length, cs);
+  String host(m_row.m_hostname, m_row.m_hostname_length, cs);
 
-  return delete_setup_actor(user, host, role);
+  return delete_setup_actor(&user, &host, &role);
 }
 
diff --git a/storage/perfschema/table_setup_objects.cc b/storage/perfschema/table_setup_objects.cc
index 33e360e989b..11fab913ac4 100644
--- a/storage/perfschema/table_setup_objects.cc
+++ b/storage/perfschema/table_setup_objects.cc
@@ -339,42 +339,15 @@ int table_setup_objects::delete_row_values(TABLE *table,
                                            const unsigned char *buf,
                                            Field **fields)
 {
-  int result;
-  Field *f;
-  enum_object_type object_type= OBJECT_TYPE_TABLE;
-  String object_schema_data("", 0, &my_charset_utf8_bin);
-  String object_name_data("", 0, &my_charset_utf8_bin);
-  String *object_schema= NULL;
-  String *object_name= NULL;
+  DBUG_ASSERT(m_row_exists);
 
-  for (; (f= *fields) ; fields++)
-  {
-    if (bitmap_is_set(table->read_set, f->field_index))
-    {
-      switch(f->field_index)
-      {
-      case 0: /* OBJECT_TYPE */
-        object_type= (enum_object_type) get_field_enum(f);
-        break;
-      case 1: /* OBJECT_SCHEMA */
-        object_schema= get_field_varchar_utf8(f, &object_schema_data);
-        break;
-      case 2: /* OBJECT_NAME */
-        object_name= get_field_varchar_utf8(f, &object_name_data);
-        break;
-      case 3: /* ENABLED */
-      case 4: /* TIMED */
-        break;
-      default:
-        DBUG_ASSERT(false);
-      }
-    }
-  }
+  CHARSET_INFO *cs= &my_charset_utf8_bin;
+  enum_object_type object_type= OBJECT_TYPE_TABLE;
+  String object_schema(m_row.m_schema_name, m_row.m_schema_name_length, cs);
+  String object_name(m_row.m_object_name, m_row.m_object_name_length, cs);
 
-  DBUG_ASSERT(object_schema != NULL);
-  DBUG_ASSERT(object_name != NULL);
+  int result= delete_setup_object(object_type, &object_schema, &object_name);
 
-  result= delete_setup_object(object_type, object_schema, object_name);
   if (result == 0)
     result= update_derived_flags();
   return result;
diff --git a/storage/perfschema/table_socket_instances.cc b/storage/perfschema/table_socket_instances.cc
index f913c8fcc65..0fa1d2b1a3a 100644
--- a/storage/perfschema/table_socket_instances.cc
+++ b/storage/perfschema/table_socket_instances.cc
@@ -42,7 +42,7 @@ static const TABLE_FIELD_TYPE field_types[]=
   },
   {
     { C_STRING_WITH_LEN("THREAD_ID") },
-    { C_STRING_WITH_LEN("int(11)") },
+    { C_STRING_WITH_LEN("bigint(20)") },
     { NULL, 0}
   },
   {
@@ -205,7 +205,7 @@ int table_socket_instances::read_row_values(TABLE *table,
         break;
       case 2: /* THREAD_ID */
         if (m_row.m_thread_id_set)
-          set_field_ulong(f, m_row.m_thread_id);
+          set_field_ulonglong(f, m_row.m_thread_id);
         else
           f->set_null();
         break;
diff --git a/storage/perfschema/table_socket_instances.h b/storage/perfschema/table_socket_instances.h
index 2a80aeaa76a..080f11c1ba8 100644
--- a/storage/perfschema/table_socket_instances.h
+++ b/storage/perfschema/table_socket_instances.h
@@ -39,7 +39,7 @@ struct row_socket_instances
   /** Column OBJECT_INSTANCE_BEGIN */
   const void *m_identity;
   /** Column THREAD_ID */
-  uint m_thread_id;
+  ulonglong m_thread_id;
   /** True if thread_is is set */
   bool m_thread_id_set;
   /** Column SOCKET_ID */
diff --git a/storage/perfschema/table_sync_instances.cc b/storage/perfschema/table_sync_instances.cc
index 9631c5fb205..4d7c48efdc1 100644
--- a/storage/perfschema/table_sync_instances.cc
+++ b/storage/perfschema/table_sync_instances.cc
@@ -43,7 +43,7 @@ static const TABLE_FIELD_TYPE mutex_field_types[]=
   },
   {
     { C_STRING_WITH_LEN("LOCKED_BY_THREAD_ID") },
-    { C_STRING_WITH_LEN("int(11)") },
+    { C_STRING_WITH_LEN("bigint(20)") },
     { NULL, 0}
   }
 };
@@ -178,7 +178,7 @@ int table_mutex_instances::read_row_values(TABLE *table,
         break;
       case 2: /* LOCKED_BY_THREAD_ID */
         if (m_row.m_locked)
-          set_field_ulong(f, m_row.m_locked_by_thread_id);
+          set_field_ulonglong(f, m_row.m_locked_by_thread_id);
         else
           f->set_null();
         break;
@@ -207,7 +207,7 @@ static const TABLE_FIELD_TYPE rwlock_field_types[]=
   },
   {
     { C_STRING_WITH_LEN("WRITE_LOCKED_BY_THREAD_ID") },
-    { C_STRING_WITH_LEN("int(11)") },
+    { C_STRING_WITH_LEN("bigint(20)") },
     { NULL, 0}
   },
   {
@@ -351,7 +351,7 @@ int table_rwlock_instances::read_row_values(TABLE *table,
         break;
       case 2: /* WRITE_LOCKED_BY_THREAD_ID */
         if (m_row.m_write_locked)
-          set_field_ulong(f, m_row.m_write_locked_by_thread_id);
+          set_field_ulonglong(f, m_row.m_write_locked_by_thread_id);
         else
           f->set_null();
         break;
diff --git a/storage/perfschema/table_sync_instances.h b/storage/perfschema/table_sync_instances.h
index b6fc78e1cd5..ff7b2765a11 100644
--- a/storage/perfschema/table_sync_instances.h
+++ b/storage/perfschema/table_sync_instances.h
@@ -45,7 +45,7 @@ struct row_mutex_instances
   /** True if column LOCKED_BY_THREAD_ID is not null. */
   bool m_locked;
   /** Column LOCKED_BY_THREAD_ID. */
-  ulong m_locked_by_thread_id;
+  ulonglong m_locked_by_thread_id;
 };
 
 /** Table PERFORMANCE_SCHEMA.MUTEX_INSTANCES. */
@@ -102,7 +102,7 @@ struct row_rwlock_instances
   /** True if column WRITE_LOCKED_BY_THREAD_ID is not null. */
   bool m_write_locked;
   /** Column WRITE_LOCKED_BY_THREAD_ID. */
-  ulong m_write_locked_by_thread_id;
+  ulonglong m_write_locked_by_thread_id;
   /** Column READ_LOCKED_BY_COUNT. */
   ulong m_readers;
 };
diff --git a/storage/perfschema/table_threads.cc b/storage/perfschema/table_threads.cc
index 91300d6b67e..b1ec2ad754e 100644
--- a/storage/perfschema/table_threads.cc
+++ b/storage/perfschema/table_threads.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -26,7 +26,7 @@ static const TABLE_FIELD_TYPE field_types[]=
 {
   {
     { C_STRING_WITH_LEN("THREAD_ID") },
-    { C_STRING_WITH_LEN("int(11)") },
+    { C_STRING_WITH_LEN("bigint(20)") },
     { NULL, 0}
   },
   {
@@ -41,7 +41,7 @@ static const TABLE_FIELD_TYPE field_types[]=
   },
   {
     { C_STRING_WITH_LEN("PROCESSLIST_ID") },
-    { C_STRING_WITH_LEN("int(11)") },
+    { C_STRING_WITH_LEN("bigint(20)") },
     { NULL, 0}
   },
   {
@@ -81,7 +81,7 @@ static const TABLE_FIELD_TYPE field_types[]=
   },
   {
     { C_STRING_WITH_LEN("PARENT_THREAD_ID") },
-    { C_STRING_WITH_LEN("int(11)") },
+    { C_STRING_WITH_LEN("bigint(20)") },
     { NULL, 0}
   },
   {
@@ -129,6 +129,7 @@ table_threads::table_threads()
 void table_threads::make_row(PFS_thread *pfs)
 {
   pfs_lock lock;
+  pfs_lock processlist_lock;
   PFS_thread_class *safe_class;
 
   m_row_exists= false;
@@ -142,7 +143,7 @@ void table_threads::make_row(PFS_thread *pfs)
 
   m_row.m_thread_internal_id= pfs->m_thread_internal_id;
   m_row.m_parent_thread_internal_id= pfs->m_parent_thread_internal_id;
-  m_row.m_thread_id= pfs->m_thread_id;
+  m_row.m_processlist_id= pfs->m_processlist_id;
   m_row.m_name= safe_class->m_name;
   m_row.m_name_length= safe_class->m_name_length;
 
@@ -166,12 +167,30 @@ void table_threads::make_row(PFS_thread *pfs)
 
   m_row.m_command= pfs->m_command;
   m_row.m_start_time= pfs->m_start_time;
+
+  /* Protect this reader against attribute changes. */
+  pfs->m_processlist_lock.begin_optimistic_lock(&processlist_lock);
+
   /* FIXME: need to copy it ? */
   m_row.m_processlist_state_ptr= pfs->m_processlist_state_ptr;
   m_row.m_processlist_state_length= pfs->m_processlist_state_length;
   /* FIXME: need to copy it ? */
   m_row.m_processlist_info_ptr= pfs->m_processlist_info_ptr;
   m_row.m_processlist_info_length= pfs->m_processlist_info_length;
+
+  if (! pfs->m_processlist_lock.end_optimistic_lock(& processlist_lock))
+  {
+    /*
+      Columns PROCESSLIST_STATE or PROCESSLIST_INFO are being
+      updated while we read them, and are unsafe to use.
+      Do not discard the entire row.
+      Do not loop waiting for a stable value.
+      Just return NULL values for these columns.
+    */
+    m_row.m_processlist_state_length= 0;
+    m_row.m_processlist_info_length= 0;
+  }
+
   m_row.m_enabled_ptr= &pfs->m_enabled;
 
   if (pfs->m_lock.end_optimistic_lock(& lock))
@@ -200,20 +219,20 @@ int table_threads::read_row_values(TABLE *table,
       switch(f->field_index)
       {
       case 0: /* THREAD_ID */
-        set_field_ulong(f, m_row.m_thread_internal_id);
+        set_field_ulonglong(f, m_row.m_thread_internal_id);
         break;
       case 1: /* NAME */
         set_field_varchar_utf8(f, m_row.m_name, m_row.m_name_length);
         break;
       case 2: /* TYPE */
-        if (m_row.m_thread_id != 0)
+        if (m_row.m_processlist_id != 0)
           set_field_varchar_utf8(f, "FOREGROUND", 10);
         else
           set_field_varchar_utf8(f, "BACKGROUND", 10);
         break;
       case 3: /* PROCESSLIST_ID */
-        if (m_row.m_thread_id != 0)
-          set_field_ulong(f, m_row.m_thread_id);
+        if (m_row.m_processlist_id != 0)
+          set_field_ulonglong(f, m_row.m_processlist_id);
         else
           f->set_null();
         break;
@@ -239,7 +258,7 @@ int table_threads::read_row_values(TABLE *table,
           f->set_null();
         break;
       case 7: /* PROCESSLIST_COMMAND */
-        if (m_row.m_thread_id != 0)
+        if (m_row.m_processlist_id != 0)
           set_field_varchar_utf8(f, command_name[m_row.m_command].str,
                                  command_name[m_row.m_command].length);
         else
@@ -271,7 +290,7 @@ int table_threads::read_row_values(TABLE *table,
         break;
       case 11: /* PARENT_THREAD_ID */
         if (m_row.m_parent_thread_internal_id != 0)
-          set_field_ulong(f, m_row.m_parent_thread_internal_id);
+          set_field_ulonglong(f, m_row.m_parent_thread_internal_id);
         else
           f->set_null();
         break;
diff --git a/storage/perfschema/table_threads.h b/storage/perfschema/table_threads.h
index 9819822f8c8..bce45c0cbce 100644
--- a/storage/perfschema/table_threads.h
+++ b/storage/perfschema/table_threads.h
@@ -32,9 +32,9 @@ struct PFS_thread;
 struct row_threads
 {
   /** Column THREAD_ID. */
-  ulong m_thread_internal_id;
+  ulonglong m_thread_internal_id;
   /** Column PROCESSLIST_ID. */
-  ulong m_thread_id;
+  ulonglong m_processlist_id;
   /** Column NAME. */
   const char* m_name;
   /** Length in bytes of @c m_name. */
@@ -66,7 +66,7 @@ struct row_threads
   /** Column INSTRUMENTED. */
   bool *m_enabled_ptr;
   /** Column PARENT_THREAD_ID. */
-  ulong m_parent_thread_internal_id;
+  ulonglong m_parent_thread_internal_id;
 };
 
 /** Table PERFORMANCE_SCHEMA.THREADS. */
diff --git a/storage/perfschema/table_tiws_by_index_usage.cc b/storage/perfschema/table_tiws_by_index_usage.cc
index d354c40d3ed..71455793516 100644
--- a/storage/perfschema/table_tiws_by_index_usage.cc
+++ b/storage/perfschema/table_tiws_by_index_usage.cc
@@ -290,15 +290,16 @@ int table_tiws_by_index_usage::rnd_next(void)
     table_share= &table_share_array[m_pos.m_index_1];
     if (table_share->m_lock.is_populated())
     {
-      if (m_pos.m_index_2 < table_share->m_key_count)
+      uint safe_key_count= sanitize_index_count(table_share->m_key_count);
+      if (m_pos.m_index_2 < safe_key_count)
       {
         make_row(table_share, m_pos.m_index_2);
         m_next_pos.set_after(&m_pos);
         return 0;
       }
-      if (m_pos.m_index_2 <= MAX_KEY)
+      if (m_pos.m_index_2 <= MAX_INDEXES)
       {
-        m_pos.m_index_2= MAX_KEY;
+        m_pos.m_index_2= MAX_INDEXES;
         make_row(table_share, m_pos.m_index_2);
         m_next_pos.set_after(&m_pos);
         return 0;
@@ -319,12 +320,13 @@ table_tiws_by_index_usage::rnd_pos(const void *pos)
   table_share= &table_share_array[m_pos.m_index_1];
   if (table_share->m_lock.is_populated())
   {
-    if (m_pos.m_index_2 < table_share->m_key_count)
+    uint safe_key_count= sanitize_index_count(table_share->m_key_count);
+    if (m_pos.m_index_2 < safe_key_count)
     {
       make_row(table_share, m_pos.m_index_2);
       return 0;
     }
-    if (m_pos.m_index_2 == MAX_KEY)
+    if (m_pos.m_index_2 == MAX_INDEXES)
     {
       make_row(table_share, m_pos.m_index_2);
       return 0;
diff --git a/storage/perfschema/unittest/CMakeLists.txt b/storage/perfschema/unittest/CMakeLists.txt
index 757bc24c566..c3a7fe5c72f 100644
--- a/storage/perfschema/unittest/CMakeLists.txt
+++ b/storage/perfschema/unittest/CMakeLists.txt
@@ -1,5 +1,4 @@
-# Copyright (c) 2009, 2010 Sun Microsystems, Inc.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -11,18 +10,44 @@
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02111-1307  USA
 
 INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include
                     ${CMAKE_SOURCE_DIR}/include/mysql
                     ${CMAKE_SOURCE_DIR}/regex
                     ${CMAKE_SOURCE_DIR}/sql
-                    ${CMAKE_SOURCE_DIR}/extra/yassl/include
+                    ${SSL_INCLUDE_DIRS}
                     ${CMAKE_SOURCE_DIR}/unittest/mytap
                     ${CMAKE_SOURCE_DIR}/storage/perfschema)
 
-ADD_DEFINITIONS(-DMYSQL_SERVER)
+ADD_DEFINITIONS(-DMYSQL_SERVER ${SSL_DEFINES})
 
 MY_ADD_TESTS(pfs_instr_class pfs_instr_class-oom pfs_instr pfs_instr-oom  pfs_account-oom pfs_host-oom pfs_user-oom pfs
   EXT "cc" LINK_LIBRARIES perfschema mysys)
+
+IF(WIN32)
+  SET(MYSQLD_EXTRA_SOURCES ${CMAKE_SOURCE_DIR}/sql/nt_servc.cc)
+ENDIF()
+
+# We need the server libs to test the blob parser.
+# Add sql_builtin.cc here, to force linkage of plugins below.
+# Also add mysys/string.c (see Bug#45488)
+ADD_EXECUTABLE(pfs_connect_attr-t
+  pfs_connect_attr-t.cc
+  ${CMAKE_BINARY_DIR}/sql/sql_builtin.cc
+  ${CMAKE_SOURCE_DIR}/mysys/string.c
+  ${MYSQLD_EXTRA_SOURCES}
+)
+ADD_DEPENDENCIES(pfs_connect_attr-t GenServerSource)
+TARGET_LINK_LIBRARIES(pfs_connect_attr-t mytap perfschema)
+# We need to explicitly link in everything referenced in sql/sql_builtin.cc
+TARGET_LINK_LIBRARIES(pfs_connect_attr-t ${MYSQLD_STATIC_PLUGIN_LIBS})
+TARGET_LINK_LIBRARIES(pfs_connect_attr-t sql binlog rpl master slave sql)
+TARGET_LINK_LIBRARIES(pfs_connect_attr-t mysys mysys_ssl)
+TARGET_LINK_LIBRARIES(pfs_connect_attr-t vio ${SSL_LIBRARIES})
+TARGET_LINK_LIBRARIES(pfs_connect_attr-t strings dbug regex mysys zlib)
+ADD_TEST(pfs_connect_attr pfs_connect_attr-t)
+
+# On windows, pfs_connect_attr-t may depend on openssl dlls.
+COPY_OPENSSL_DLLS(copy_openssl_pfs_unittest)
diff --git a/storage/perfschema/unittest/pfs-t.cc b/storage/perfschema/unittest/pfs-t.cc
index 6b30c0cc498..31f68195f1b 100644
--- a/storage/perfschema/unittest/pfs-t.cc
+++ b/storage/perfschema/unittest/pfs-t.cc
@@ -111,6 +111,7 @@ void test_bootstrap()
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
   param.m_digest_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   boot= initialize_performance_schema(& param);
   ok(boot != NULL, "boot");
@@ -168,6 +169,7 @@ PSI * load_perfschema()
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
   param.m_digest_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   /* test_bootstrap() covered this, assuming it just works */
   boot= initialize_performance_schema(& param);
@@ -759,21 +761,21 @@ void test_init_disabled()
   /* disabled S-A + disabled T-1: no instrumentation */
 
   socket_class_A->m_enabled= false;
-  socket_A1= psi->init_socket(socket_key_A, NULL);
+  socket_A1= psi->init_socket(socket_key_A, NULL, NULL, 0);
   ok(socket_A1 == NULL, "socket_A1 not instrumented");
 
   /* enabled S-A + disabled T-1: instrumentation (for later) */
 
   socket_class_A->m_enabled= true;
-  socket_A1= psi->init_socket(socket_key_A, NULL);
+  socket_A1= psi->init_socket(socket_key_A, NULL, NULL, 0);
   ok(socket_A1 != NULL, "socket_A1 instrumented");
 
   /* broken key + disabled T-1: no instrumentation */
 
   socket_class_A->m_enabled= true;
-  socket_A1= psi->init_socket(0, NULL);
+  socket_A1= psi->init_socket(0, NULL, NULL, 0);
   ok(socket_A1 == NULL, "socket key 0 not instrumented");
-  socket_A1= psi->init_socket(99, NULL);
+  socket_A1= psi->init_socket(99, NULL, NULL, 0);
   ok(socket_A1 == NULL, "broken socket key not instrumented");
   
   /* Pretend thread T-1 is enabled */
@@ -892,16 +894,16 @@ void test_init_disabled()
   /* enabled S-A + enabled T-1: instrumentation */
 
   socket_class_A->m_enabled= true;
-  socket_A1= psi->init_socket(socket_key_A, NULL);
+  socket_A1= psi->init_socket(socket_key_A, NULL, NULL, 0);
   ok(socket_A1 != NULL, "instrumented");
   psi->destroy_socket(socket_A1);
 
   /* broken key + enabled T-1: no instrumentation */
 
   socket_class_A->m_enabled= true;
-  socket_A1= psi->init_socket(0, NULL);
+  socket_A1= psi->init_socket(0, NULL, NULL, 0);
   ok(socket_A1 == NULL, "not instrumented");
-  socket_A1= psi->init_socket(99, NULL);
+  socket_A1= psi->init_socket(99, NULL, NULL, 0);
   ok(socket_A1 == NULL, "not instrumented");
 
   /* Pretend the running thread is not instrumented */
@@ -996,21 +998,21 @@ void test_init_disabled()
   /* disabled S-A + unknown thread: no instrumentation */
 
   socket_class_A->m_enabled= false;
-  socket_A1= psi->init_socket(socket_key_A, NULL);
+  socket_A1= psi->init_socket(socket_key_A, NULL, NULL, 0);
   ok(socket_A1 == NULL, "socket_A1 not instrumented");
 
   /* enabled S-A + unknown thread: instrumentation (for later) */
 
   socket_class_A->m_enabled= true;
-  socket_A1= psi->init_socket(socket_key_A, NULL);
+  socket_A1= psi->init_socket(socket_key_A, NULL, NULL, 0);
   ok(socket_A1 != NULL, "socket_A1 instrumented");
 
   /* broken key + unknown thread: no instrumentation */
 
   socket_class_A->m_enabled= true;
-  socket_A1= psi->init_socket(0, NULL);
+  socket_A1= psi->init_socket(0, NULL, NULL, 0);
   ok(socket_A1 == NULL, "socket key 0 not instrumented");
-  socket_A1= psi->init_socket(99, NULL);
+  socket_A1= psi->init_socket(99, NULL, NULL, 0);
   ok(socket_A1 == NULL, "broken socket key not instrumented");
   
   shutdown_performance_schema();
@@ -1126,7 +1128,7 @@ void test_locker_disabled()
   ok(file_A1 != NULL, "instrumented");
 
   socket_class_A->m_enabled= true;
-  socket_A1= psi->init_socket(socket_key_A, NULL);
+  socket_A1= psi->init_socket(socket_key_A, NULL, NULL, 0);
   ok(socket_A1 != NULL, "instrumented");
 
   /* Socket lockers require a thread owner */
@@ -1294,10 +1296,10 @@ void test_locker_disabled()
   cond_locker= psi->start_cond_wait(&cond_state, cond_A1, mutex_A1, PSI_COND_WAIT, __FILE__, __LINE__);
   ok(cond_locker != NULL, "locker");
   psi->end_cond_wait(cond_locker, 0);
-  file_locker= psi->get_thread_file_name_locker(&file_state, file_key_A, PSI_FILE_OPEN, "xxx", NULL);
+  file_locker= psi->get_thread_file_name_locker(&file_state, file_key_A, PSI_FILE_STREAM_OPEN, "xxx", NULL);
   ok(file_locker != NULL, "locker");
   psi->start_file_open_wait(file_locker, __FILE__, __LINE__);
-  psi->end_file_open_wait(file_locker);
+  psi->end_file_open_wait(file_locker, NULL);
   file_locker= psi->get_thread_file_stream_locker(&file_state, file_A1, PSI_FILE_READ);
   ok(file_locker != NULL, "locker");
   psi->start_file_wait(file_locker, 10, __FILE__, __LINE__);
@@ -1314,7 +1316,7 @@ void test_locker_disabled()
   /* ---------------------------------------------- */
 
   socket_class_A->m_enabled= true;
-  socket_A1= psi->init_socket(socket_key_A, NULL);
+  socket_A1= psi->init_socket(socket_key_A, NULL, NULL, 0);
   ok(socket_A1 != NULL, "instrumented");
   /* Socket thread owner has not been set */
   socket_locker= psi->start_socket_wait(&socket_state, socket_A1, PSI_SOCKET_SEND, 12, "foo.cc", 12);
@@ -1485,6 +1487,8 @@ void test_event_name_index()
   memset(& param, 0xFF, sizeof(param));
   param.m_enabled= true;
 
+  /* NOTE: Need to add 3 to each index: table io, table lock, idle */
+
   /* Per mutex info waits should be at [0..9] */
   param.m_mutex_class_sizing= 10;
   /* Per rwlock info waits should be at [10..29] */
@@ -1509,6 +1513,7 @@ void test_event_name_index()
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
   param.m_digest_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   param.m_mutex_sizing= 0;
   param.m_rwlock_sizing= 0;
@@ -1540,10 +1545,10 @@ void test_event_name_index()
   psi->register_mutex("X", dummy_mutexes, 2);
   mutex_class= find_mutex_class(dummy_mutex_key_1);
   ok(mutex_class != NULL, "mutex class 1");
-  ok(mutex_class->m_event_name_index == 0, "index 0");
+  ok(mutex_class->m_event_name_index == 3, "index 3");
   mutex_class= find_mutex_class(dummy_mutex_key_2);
   ok(mutex_class != NULL, "mutex class 2");
-  ok(mutex_class->m_event_name_index == 1, "index 1");
+  ok(mutex_class->m_event_name_index == 4, "index 4");
 
   PFS_rwlock_class *rwlock_class;
   PSI_rwlock_key dummy_rwlock_key_1;
@@ -1557,10 +1562,10 @@ void test_event_name_index()
   psi->register_rwlock("X", dummy_rwlocks, 2);
   rwlock_class= find_rwlock_class(dummy_rwlock_key_1);
   ok(rwlock_class != NULL, "rwlock class 1");
-  ok(rwlock_class->m_event_name_index == 10, "index 10");
+  ok(rwlock_class->m_event_name_index == 13, "index 13");
   rwlock_class= find_rwlock_class(dummy_rwlock_key_2);
   ok(rwlock_class != NULL, "rwlock class 2");
-  ok(rwlock_class->m_event_name_index == 11, "index 11");
+  ok(rwlock_class->m_event_name_index == 14, "index 14");
 
   PFS_cond_class *cond_class;
   PSI_cond_key dummy_cond_key_1;
@@ -1574,10 +1579,10 @@ void test_event_name_index()
   psi->register_cond("X", dummy_conds, 2);
   cond_class= find_cond_class(dummy_cond_key_1);
   ok(cond_class != NULL, "cond class 1");
-  ok(cond_class->m_event_name_index == 30, "index 30");
+  ok(cond_class->m_event_name_index == 33, "index 33");
   cond_class= find_cond_class(dummy_cond_key_2);
   ok(cond_class != NULL, "cond class 2");
-  ok(cond_class->m_event_name_index == 31, "index 31");
+  ok(cond_class->m_event_name_index == 34, "index 34");
 
   PFS_file_class *file_class;
   PSI_file_key dummy_file_key_1;
@@ -1591,10 +1596,10 @@ void test_event_name_index()
   psi->register_file("X", dummy_files, 2);
   file_class= find_file_class(dummy_file_key_1);
   ok(file_class != NULL, "file class 1");
-  ok(file_class->m_event_name_index == 70, "index 70");
+  ok(file_class->m_event_name_index == 73, "index 73");
   file_class= find_file_class(dummy_file_key_2);
   ok(file_class != NULL, "file class 2");
-  ok(file_class->m_event_name_index == 71, "index 71");
+  ok(file_class->m_event_name_index == 74, "index 74");
 
   PFS_socket_class *socket_class;
   PSI_socket_key dummy_socket_key_1;
@@ -1608,13 +1613,13 @@ void test_event_name_index()
   psi->register_socket("X", dummy_sockets, 2);
   socket_class= find_socket_class(dummy_socket_key_1);
   ok(socket_class != NULL, "socket class 1");
-  ok(socket_class->m_event_name_index == 150, "index 150");
+  ok(socket_class->m_event_name_index == 153, "index 153");
   socket_class= find_socket_class(dummy_socket_key_2);
   ok(socket_class != NULL, "socket class 2");
-  ok(socket_class->m_event_name_index == 151, "index 151");
+  ok(socket_class->m_event_name_index == 154, "index 154");
 
-  ok(global_table_io_class.m_event_name_index == 310, "index 310");
-  ok(global_table_lock_class.m_event_name_index == 311, "index 311");
+  ok(global_table_io_class.m_event_name_index == 0, "index 0");
+  ok(global_table_lock_class.m_event_name_index == 1, "index 1");
   ok(wait_class_max= 313, "313 event names"); // 3 global classes
 }
 
diff --git a/storage/perfschema/unittest/pfs_account-oom-t.cc b/storage/perfschema/unittest/pfs_account-oom-t.cc
index d0c139476b0..0e48ab68ef7 100644
--- a/storage/perfschema/unittest/pfs_account-oom-t.cc
+++ b/storage/perfschema/unittest/pfs_account-oom-t.cc
@@ -59,6 +59,7 @@ void test_oom()
   param.m_statement_class_sizing= 50;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   /* Setup */
 
diff --git a/storage/perfschema/unittest/pfs_connect_attr-t.cc b/storage/perfschema/unittest/pfs_connect_attr-t.cc
new file mode 100644
index 00000000000..7bee1d063a1
--- /dev/null
+++ b/storage/perfschema/unittest/pfs_connect_attr-t.cc
@@ -0,0 +1,345 @@
+/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software Foundation,
+  51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#include <my_global.h>
+#include <my_pthread.h>
+#include <pfs_server.h>
+#include <pfs_instr_class.h>
+#include <pfs_instr.h>
+#include <pfs_global.h>
+#include <tap.h>
+
+
+#include <string.h>
+#include <memory.h>
+
+/* test helpers, to inspect data */
+bool read_nth_attr(const char *connect_attrs, uint connect_attrs_length,
+                          const CHARSET_INFO *connect_attrs_cs,
+                          uint ordinal,
+                          char *attr_name, uint max_attr_name,
+                          uint *attr_name_length,
+                          char *attr_value, uint max_attr_value,
+                          uint *attr_value_length);
+
+void test_blob_parser()
+{
+  char name[100], value[4096];
+  unsigned char packet[10000], *ptr;
+  uint name_len, value_len, idx, packet_length;
+  bool result;
+  const CHARSET_INFO *cs= &my_charset_utf8_bin;
+
+  diag("test_blob_parser");
+
+  result= read_nth_attr("", 0, cs, 0,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == false, "zero length blob");
+
+
+  result= read_nth_attr("\x1", 1, cs, 0,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == false, "invalid key length");
+
+
+  result= read_nth_attr("\x2k1\x1", 4, cs, 0,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == false, "invalid value length");
+
+
+  result= read_nth_attr("\x2k1\x2v1", 6, cs, 0,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == true, "one pair return");
+  ok(name_len == 2, "one pair attr name length");
+  ok(!strncmp(name, "k1", name_len), "one pair attr name");
+  ok(value_len == 2, "one pair value length");
+  ok(!strncmp(value, "v1", value_len), "one pair value");
+
+  result= read_nth_attr("\x2k1\x2v1", 6, cs, 1,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == false, "no second arg");
+
+  result= read_nth_attr("\x2k1\x2v1\x2k2\x2v2", 12, cs, 1,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == true, "two pairs return");
+  ok(name_len == 2, "two pairs attr name length");
+  ok(!strncmp(name, "k2", name_len), "two pairs attr name");
+  ok(value_len == 2, "two pairs value length");
+  ok(!strncmp(value, "v2", value_len), "two pairs value");
+
+  result= read_nth_attr("\x2k1\xff\x2k2\x2v2", 12, cs, 1,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == false, "two pairs first value bad return");
+
+  result= read_nth_attr("\x2k1\x2v1\x2k2\x2v2", 10, cs, 1,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == false, "two pairs wrong global length");
+
+  result= read_nth_attr("\x21z123456789z123456789z123456789z12\x2v1", 37, cs, 0,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == true, "attr name overflow");
+  ok(name_len == 32, "attr name overflow length");
+  ok(!strncmp(name, "z123456789z123456789z123456789z1", name_len),
+     "attr name overflow name");
+  ok(value_len == 2, "attr name overflow value length");
+  ok(!strncmp(value, "v1", value_len), "attr name overflow value");
+
+  packet[0]= 2;
+  packet[1]= 'k';
+  packet[2]= '1';
+  ptr= net_store_length(packet + 3, 1025);
+  for (idx= 0; idx < 1025; idx++)
+    *ptr++= '0' + (idx % 10);
+  packet_length= (uint) (ptr - packet);
+  result= read_nth_attr((char *) packet, packet_length, cs, 0,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == true, "attr value overflow");
+  ok(name_len == 2, "attr value overflow length");
+  ok(!strncmp(name, "k1", name_len), "attr value overflow name");
+  ok(value_len == 1024, "attr value overflow value length");
+  for (idx= 0; idx < 1024; idx++)
+  {
+    if (value[idx] != (char) ('0' + (idx % 10)))
+      break;
+  }
+  ok (idx == 1024, "attr value overflow value");
+
+  result= read_nth_attr("\x21z123456789z123456789z123456789z12\x2v1\x2k2\x2v2",
+                        43, cs, 1,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == true, "prev attr name overflow");
+  ok(name_len == 2, "prev attr name overflow length");
+  ok(!strncmp(name, "k2", name_len),
+     "prev attr name overflow name");
+  ok(value_len == 2, "prev attr name overflow value length");
+  ok(!strncmp(value, "v2", value_len), "prev attr name overflow value");
+
+
+  packet[1]= 'k';
+  packet[2]= '1';
+  packet[3]= 2;
+  packet[4]= 'v';
+  packet[5]= '1';
+
+  for(idx= 251; idx < 256; idx++)
+  {
+    packet[0]= idx;
+    result= read_nth_attr((char *) packet, 6, cs, 0,
+                          name, 32, &name_len, value, 1024, &value_len);
+    ok(result == false, "invalid string length %d", idx);
+  }
+
+  memset(packet, 0, sizeof(packet));
+  for (idx=0; idx < 1660 /* *6 = 9960 */; idx++)
+    memcpy(packet + idx * 6, "\x2k1\x2v1", 6);
+  result= read_nth_attr((char *) packet, 8192, cs, 1364,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == true, "last valid attribute %d", 1364);
+  result= read_nth_attr((char *) packet, 8192, cs, 1365,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == false, "first attribute that's cut %d", 1365);
+}
+
+void test_multibyte_lengths()
+{
+  char name[100], value[4096];
+  uint name_len, value_len;
+  bool result;
+  const CHARSET_INFO *cs= &my_charset_utf8_bin;
+
+  unsigned char var_len_packet[] = {
+    252, 2, 0, 'k', '1',
+    253, 2, 0, 0, 'v', '1',
+    254, 2, 0, 0, 0, 0, 0, 0, 0, 'k', '2',
+    254, 2, 0, 0, 0, 0, 0, 0, 0, 'v', '2'
+  };
+
+  result= read_nth_attr((char *) var_len_packet, sizeof(var_len_packet), cs, 0,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == true, "multibyte lengths return");
+  ok(name_len == 2, "multibyte lengths name length");
+  ok(!strncmp(name, "k1", name_len), "multibyte lengths attr name");
+  ok(value_len == 2, "multibyte lengths value length");
+  ok(!strncmp(value, "v1", value_len), "multibyte lengths value");
+
+  result= read_nth_attr((char *) var_len_packet, sizeof(var_len_packet), cs, 1,
+                        name, 32, &name_len, value, 1024, &value_len);
+  ok(result == true, "multibyte lengths second attr return");
+  ok(name_len == 2, "multibyte lengths second attr name length");
+  ok(!strncmp(name, "k2", name_len), "multibyte lengths second attr attr name");
+  ok(value_len == 2, "multibyte lengths value length");
+  ok(!strncmp(value, "v2", value_len), "multibyte lengths second attr value");
+}
+
+
+void test_utf8_parser()
+{
+  /* utf8 max byte length per character is 6 */
+  char name[33 * 6], value[1024 * 6], packet[1500 * 6], *ptr;
+  uint name_len, value_len;
+  bool result;
+  const CHARSET_INFO *cs= &my_charset_utf8_bin;
+
+  /* note : this is encoded in utf-8 */
+  const char *attr1= "Георги";
+  const char *val1= "Кодинов";
+  const char *attr2= "Пловдив";
+  const char *val2= "България";
+
+  ptr= packet;
+  *ptr++= strlen(attr1);
+  memcpy(ptr, attr1, strlen(attr1));
+  ptr+= strlen(attr1);
+  *ptr++= strlen(val1);
+  memcpy(ptr, val1, strlen(val1));
+  ptr+= strlen(val1);
+
+  *ptr++= strlen(attr2);
+  memcpy(ptr, attr2, strlen(attr2));
+  ptr+= strlen(attr2);
+  *ptr++= strlen(val2);
+  memcpy(ptr, val2, strlen(val2));
+  ptr+= strlen(val2);
+
+  diag("test_utf8_parser attr pair #1");
+
+  result= read_nth_attr((char *) packet, ptr - packet, cs, 0,
+                        name, sizeof(name), &name_len,
+                        value, sizeof(value), &value_len);
+  ok(result == true, "return");
+  ok(name_len == strlen(attr1), "name length");
+  ok(!strncmp(name, attr1, name_len), "attr name");
+  ok(value_len == strlen(val1), "value length");
+  ok(!strncmp(value, val1, value_len), "value");
+
+  diag("test_utf8_parser attr pair #2");
+  result= read_nth_attr((char *) packet, ptr - packet, cs, 1,
+                        name, sizeof(name), &name_len,
+                        value, sizeof(value), &value_len);
+  ok(result == true, "return");
+  ok(name_len == strlen(attr2), "name length");
+  ok(!strncmp(name, attr2, name_len), "attr name");
+  ok(value_len == strlen(val2), "value length");
+  ok(!strncmp(value, val2, value_len), "value");
+}
+
+
+void test_utf8_parser_bad_encoding()
+{
+  /* utf8 max byte length per character is 3*/
+  char name[33 * 3], value[1024 * 3], packet[1500 * 3], *ptr;
+  uint name_len, value_len;
+  bool result;
+  const CHARSET_INFO *cs= &my_charset_utf8_bin;
+
+  /* note : this is encoded in utf-8 */
+  const char *attr= "Георги";
+  const char *val= "Кодинов";
+
+  ptr= packet;
+  *ptr++= strlen(attr);
+  memcpy(ptr, attr, strlen(attr));
+  ptr[0]= 0xFA; // invalid UTF-8 char
+  ptr+= strlen(attr);
+  *ptr++= strlen(val);
+  memcpy(ptr, val, strlen(val));
+  ptr+= strlen(val);
+
+  diag("test_utf8_parser_bad_encoding");
+
+  result= read_nth_attr((char *) packet, ptr - packet, cs, 0,
+                        name, sizeof(name), &name_len,
+                        value, sizeof(value), &value_len);
+  ok(result == false, "return");
+}
+
+const CHARSET_INFO *cs_cp1251;
+
+void test_cp1251_parser()
+{
+  /* utf8 max byte length per character is 3*/
+  char name[33 * 3], value[1024 * 3], packet[1500 * 3], *ptr;
+  uint name_len, value_len;
+  bool result;
+
+  /* note : this is Георги in windows-1251 */
+  const char *attr1= "\xc3\xe5\xee\xf0\xe3\xe8";
+  /* note : this is Кодинов in windows-1251 */
+  const char *val1= "\xca\xee\xe4\xe8\xed\xee\xe2";
+  /* note : this is Пловдив in windows-1251 */
+  const char *attr2= "\xcf\xeb\xee\xe2\xe4\xe8\xe2";
+  /* note : this is България in windows-1251 */
+  const char *val2= "\xc1\xfa\xeb\xe3\xe0\xf0\xe8\xff";
+
+  ptr= packet;
+  *ptr++= strlen(attr1);
+  memcpy(ptr, attr1, strlen(attr1));
+  ptr+= strlen(attr1);
+  *ptr++= strlen(val1);
+  memcpy(ptr, val1, strlen(val1));
+  ptr+= strlen(val1);
+
+  *ptr++= strlen(attr2);
+  memcpy(ptr, attr2, strlen(attr2));
+  ptr+= strlen(attr2);
+  *ptr++= strlen(val2);
+  memcpy(ptr, val2, strlen(val2));
+  ptr+= strlen(val2);
+
+  diag("test_cp1251_parser attr pair #1");
+
+  result= read_nth_attr((char *) packet, ptr - packet, cs_cp1251, 0,
+                        name, sizeof(name), &name_len,
+                        value, sizeof(value), &value_len);
+  ok(result == true, "return");
+  /* need to compare to the UTF-8 equivalents */
+  ok(name_len == strlen("Георги"), "name length");
+  ok(!strncmp(name, "Георги", name_len), "attr name");
+  ok(value_len == strlen("Кодинов"), "value length");
+  ok(!strncmp(value, "Кодинов", value_len), "value");
+
+  diag("test_cp1251_parser attr pair #2");
+  result= read_nth_attr((char *) packet, ptr - packet, cs_cp1251, 1,
+                        name, sizeof(name), &name_len,
+                        value, sizeof(value), &value_len);
+  ok(result == true, "return");
+  /* need to compare to the UTF-8 equivalents */
+  ok(name_len == strlen("Пловдив"), "name length");
+  ok(!strncmp(name, "Пловдив", name_len), "attr name");
+  ok(value_len == strlen("България"), "value length");
+  ok(!strncmp(value, "България", value_len), "value");
+}
+
+
+void do_all_tests()
+{
+  test_blob_parser();
+  test_multibyte_lengths();
+  test_utf8_parser();
+  test_utf8_parser_bad_encoding();
+  test_cp1251_parser();
+}
+
+int main(int, char **)
+{
+  MY_INIT("pfs_connect_attr-t");
+
+  cs_cp1251= get_charset_by_csname("cp1251", MY_CS_PRIMARY, MYF(0));
+  if (!cs_cp1251)
+    diag("skipping the cp1251 tests : missing character set");
+  plan(59 + (cs_cp1251 ? 10 : 0));
+  do_all_tests();
+  return 0;
+}
diff --git a/storage/perfschema/unittest/pfs_host-oom-t.cc b/storage/perfschema/unittest/pfs_host-oom-t.cc
index a4fb36b0b08..cc445620496 100644
--- a/storage/perfschema/unittest/pfs_host-oom-t.cc
+++ b/storage/perfschema/unittest/pfs_host-oom-t.cc
@@ -59,6 +59,7 @@ void test_oom()
   param.m_statement_class_sizing= 50;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   /* Setup */
 
diff --git a/storage/perfschema/unittest/pfs_instr-oom-t.cc b/storage/perfschema/unittest/pfs_instr-oom-t.cc
index 41bb4ed6c5a..5d9873d7927 100644
--- a/storage/perfschema/unittest/pfs_instr-oom-t.cc
+++ b/storage/perfschema/unittest/pfs_instr-oom-t.cc
@@ -63,6 +63,7 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   init_event_name_sizing(& param);
   rc= init_instruments(& param);
@@ -98,6 +99,7 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   init_event_name_sizing(& param);
   rc= init_instruments(& param);
@@ -133,6 +135,7 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   init_event_name_sizing(& param);
   rc= init_instruments(& param);
@@ -168,6 +171,7 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   init_event_name_sizing(& param);
   rc= init_instruments(& param);
@@ -201,6 +205,7 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   init_event_name_sizing(& param);
   rc= init_instruments(& param);
@@ -236,6 +241,7 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   init_event_name_sizing(& param);
   rc= init_instruments(& param);
@@ -271,6 +277,7 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   init_event_name_sizing(& param);
   rc= init_instruments(& param);
@@ -308,6 +315,7 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   stub_alloc_fails_after_count= 2;
   init_event_name_sizing(& param);
@@ -341,6 +349,7 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   stub_alloc_fails_after_count= 2;
   init_event_name_sizing(& param);
@@ -383,8 +392,9 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
-  stub_alloc_fails_after_count= 2;
+  stub_alloc_fails_after_count= 1;
   init_event_name_sizing(& param);
   rc= init_instruments(& param);
   ok(rc == 1, "oom (per thread waits)");
@@ -417,6 +427,7 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   stub_alloc_fails_after_count= 3;
   init_event_name_sizing(& param);
@@ -451,6 +462,7 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   stub_alloc_fails_after_count= 2;
   init_event_name_sizing(& param);
@@ -485,6 +497,7 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 10;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   stub_alloc_fails_after_count= 2;
   init_event_name_sizing(& param);
@@ -519,6 +532,7 @@ void test_oom()
   param.m_statement_class_sizing= 50;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   stub_alloc_fails_after_count= 2;
   init_event_name_sizing(& param);
@@ -553,6 +567,7 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   stub_alloc_fails_after_count= 1;
   init_event_name_sizing(& param);
@@ -587,6 +602,7 @@ void test_oom()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   stub_alloc_fails_after_count= 3;
   init_event_name_sizing(& param);
@@ -624,6 +640,7 @@ void test_oom()
   param.m_statement_class_sizing= 20;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   stub_alloc_fails_after_count= 3;
   init_event_name_sizing(& param);
diff --git a/storage/perfschema/unittest/pfs_instr-t.cc b/storage/perfschema/unittest/pfs_instr-t.cc
index b0839de70b2..4ef240ea819 100644
--- a/storage/perfschema/unittest/pfs_instr-t.cc
+++ b/storage/perfschema/unittest/pfs_instr-t.cc
@@ -60,6 +60,8 @@ void test_no_instruments()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_digest_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   init_event_name_sizing(& param);
   rc= init_instruments(& param);
@@ -117,6 +119,8 @@ void test_no_instances()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_digest_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   init_event_name_sizing(& param);
   rc= init_instruments(& param);
@@ -153,19 +157,19 @@ void test_no_instances()
   PFS_thread fake_thread;
   fake_thread.m_filename_hash_pins= NULL;
 
-  file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5);
+  file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5, true);
   ok(file == NULL, "no file");
   ok(file_lost == 1, "lost 1");
-  file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5);
+  file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5, true);
   ok(file == NULL, "no file");
   ok(file_lost == 2, "lost 2");
 
   init_file_hash();
 
-  file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5);
+  file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5, true);
   ok(file == NULL, "no file");
   ok(file_lost == 3, "lost 3");
-  file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5);
+  file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5, true);
   ok(file == NULL, "no file");
   ok(file_lost == 4, "lost 4");
 
@@ -173,7 +177,7 @@ void test_no_instances()
   int size= sizeof(long_file_name);
   memset(long_file_name, 'X', size);
 
-  file= find_or_create_file(& fake_thread, & dummy_file_class, long_file_name, size);
+  file= find_or_create_file(& fake_thread, & dummy_file_class, long_file_name, size, true);
   ok(file == NULL, "no file");
   ok(file_lost == 5, "lost 5");
 
@@ -184,10 +188,10 @@ void test_no_instances()
   ok(table == NULL, "no table");
   ok(table_lost == 2, "lost 2");
 
-  socket= create_socket(& dummy_socket_class, NULL);
+  socket= create_socket(& dummy_socket_class, NULL, NULL, 0);
   ok(socket == NULL, "no socket");
   ok(socket_lost == 1, "lost 1");
-  socket= create_socket(& dummy_socket_class, NULL);
+  socket= create_socket(& dummy_socket_class, NULL, NULL, 0);
   ok(socket == NULL, "no socket");
   ok(socket_lost == 2, "lost 2");
 
@@ -255,6 +259,8 @@ void test_with_instances()
   param.m_statement_class_sizing= 0;
   param.m_events_statements_history_sizing= 0;
   param.m_events_statements_history_long_sizing= 0;
+  param.m_digest_sizing= 0;
+  param.m_session_connect_attrs_sizing= 0;
 
   init_event_name_sizing(& param);
   rc= init_instruments(& param);
@@ -325,50 +331,50 @@ void test_with_instances()
   PFS_thread fake_thread;
   fake_thread.m_filename_hash_pins= NULL;
 
-  file_1= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5);
+  file_1= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5, true);
   ok(file_1 == NULL, "no file");
   ok(file_lost == 1, "lost 1");
-  file_1= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5);
+  file_1= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5, true);
   ok(file_1 == NULL, "no file");
   ok(file_lost == 2, "lost 2");
 
   init_file_hash();
   file_lost= 0;
 
-  file_1= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_A", 7);
+  file_1= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_A", 7, true);
   ok(file_1 != NULL, "file");
   ok(file_1->m_file_stat.m_open_count == 1, "open count 1");
   ok(file_lost == 0, "not lost");
-  file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_A", 7);
+  file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_A", 7, true);
   ok(file_1 == file_2, "same file");
   ok(file_1->m_file_stat.m_open_count == 2, "open count 2");
   ok(file_lost == 0, "not lost");
   release_file(file_2);
   ok(file_1->m_file_stat.m_open_count == 1, "open count 1");
-  file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_B", 7);
+  file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_B", 7, true);
   ok(file_2 != NULL, "file");
   ok(file_lost == 0, "not lost");
-  file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_C", 7);
+  file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_C", 7, true);
   ok(file_2 == NULL, "no file");
   ok(file_lost == 1, "lost");
   release_file(file_1);
   /* the file still exists, not destroyed */
   ok(file_1->m_file_stat.m_open_count == 0, "open count 0");
-  file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_D", 7);
+  file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_D", 7, true);
   ok(file_2 == NULL, "no file");
   ok(file_lost == 2, "lost");
 
-  socket_1= create_socket(& dummy_socket_class, NULL);
+  socket_1= create_socket(& dummy_socket_class, NULL, NULL, 0);
   ok(socket_1 != NULL, "socket");
   ok(socket_lost == 0, "not lost");
-  socket_2= create_socket(& dummy_socket_class, NULL);
+  socket_2= create_socket(& dummy_socket_class, NULL, NULL, 0);
   ok(socket_2 != NULL, "socket");
   ok(socket_lost == 0, "not lost");
-  socket_2= create_socket(& dummy_socket_class, NULL);
+  socket_2= create_socket(& dummy_socket_class, NULL, NULL, 0);
   ok(socket_2 == NULL, "no socket");
   ok(socket_lost == 1, "lost 1");
   destroy_socket(socket_1);
-  socket_2= create_socket(& dummy_socket_class, NULL);
+  socket_2= create_socket(& dummy_socket_class, NULL, NULL, 0);
   ok(socket_2 != NULL, "socket");
   ok(socket_lost == 1, "no new loss");
 
diff --git a/storage/perfschema/unittest/pfs_instr_class-t.cc b/storage/perfschema/unittest/pfs_instr_class-t.cc
index 9e3efde656e..7b3ffccffcc 100644
--- a/storage/perfschema/unittest/pfs_instr_class-t.cc
+++ b/storage/perfschema/unittest/pfs_instr_class-t.cc
@@ -475,6 +475,7 @@ void test_table_registration()
 #endif
 }
 
+#ifdef LATER
 void set_wait_stat(PFS_instr_class *klass)
 {
   PFS_single_stat *stat;
@@ -501,6 +502,7 @@ bool is_empty_stat(PFS_instr_class *klass)
     return false;
   return true;
 }
+#endif
 
 void test_instruments_reset()
 {
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index 7c7d8c7b2f5..f77e705525c 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -870,7 +870,7 @@ static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)),
                              const uchar *b, size_t b_length,
                              my_bool b_is_prefix)
 {
-  size_t length= min(a_length, b_length);
+  size_t length= MY_MIN(a_length, b_length);
   int res= my_strnncoll_big5_internal(&a, &b, length);
   return res ? res : (int)((b_is_prefix ? length : a_length) - b_length);
 }
@@ -883,7 +883,7 @@ static int my_strnncollsp_big5(CHARSET_INFO * cs __attribute__((unused)),
 			       const uchar *b, size_t b_length,
                                my_bool diff_if_only_endspace_difference)
 {
-  size_t length= min(a_length, b_length);
+  size_t length= MY_MIN(a_length, b_length);
   int res= my_strnncoll_big5_internal(&a, &b, length);
 
 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index d28d576c661..80ed047ebf2 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -80,7 +80,7 @@ static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)),
                                const uchar *t, size_t tlen,
                                my_bool t_is_prefix)
 {
-  size_t len=min(slen,tlen);
+  size_t len=MY_MIN(slen,tlen);
   int cmp= memcmp(s,t,len);
   return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
 }
@@ -131,7 +131,7 @@ static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
                                  const uchar *t, size_t tlen,
                                  my_bool t_is_prefix)
 {
-  size_t len=min(slen,tlen);
+  size_t len=MY_MIN(slen,tlen);
   int cmp= memcmp(s,t,len);
   return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
 }
@@ -175,7 +175,7 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
   diff_if_only_endspace_difference= 0;
 #endif
 
-  end= a + (length= min(a_length, b_length));
+  end= a + (length= MY_MIN(a_length, b_length));
   while (a < end)
   {
     if (*a++ != *b++)
@@ -401,7 +401,7 @@ static size_t my_strnxfrm_bin(CHARSET_INFO *cs __attribute__((unused)),
                               const uchar *src, size_t srclen)
 {
   if (dest != src)
-    memcpy(dest, src, min(dstlen,srclen));
+    memcpy(dest, src, MY_MIN(dstlen,srclen));
   if (dstlen > srclen)
     bfill(dest + srclen, dstlen - srclen, 0);
   return dstlen;
@@ -414,7 +414,7 @@ size_t my_strnxfrm_8bit_bin(CHARSET_INFO *cs __attribute__((unused)),
                             const uchar *src, size_t srclen)
 {
   if (dest != src)
-    memcpy(dest, src, min(dstlen,srclen));
+    memcpy(dest, src, MY_MIN(dstlen,srclen));
   if (dstlen > srclen)
     bfill(dest + srclen, dstlen - srclen, ' ');
   return dstlen;
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 8b37de4a5e7..e21c406d2a9 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -3470,7 +3470,7 @@ int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)),
                      const uchar *b, size_t b_length,
                      my_bool b_is_prefix)
 {
-  size_t length= min(a_length, b_length);
+  size_t length= MY_MIN(a_length, b_length);
   int res= my_strnncoll_gbk_internal(&a, &b, length);
   return res ? res : (int) ((b_is_prefix ? length : a_length) - b_length);
 }
@@ -3481,7 +3481,7 @@ static int my_strnncollsp_gbk(CHARSET_INFO * cs __attribute__((unused)),
 			      const uchar *b, size_t b_length,
                               my_bool diff_if_only_endspace_difference)
 {
-  size_t length= min(a_length, b_length);
+  size_t length= MY_MIN(a_length, b_length);
   int res= my_strnncoll_gbk_internal(&a, &b, length);
 
 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 5879bdf7978..5efc6348516 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -473,7 +473,7 @@ my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
                     const uchar *t, size_t tlen,
                     my_bool t_is_prefix)
 {
-  size_t len=min(slen,tlen);
+  size_t len=MY_MIN(slen,tlen);
   int cmp= memcmp(s,t,len);
   return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen);
 }
@@ -518,7 +518,7 @@ my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
   diff_if_only_endspace_difference= 0;
 #endif
   
-  end= a + (length= min(a_length, b_length));
+  end= a + (length= MY_MIN(a_length, b_length));
   while (a < end)
   {
     if (*a++ != *b++)
@@ -557,7 +557,7 @@ static size_t my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
                                  const uchar *src, size_t srclen)
 {
   if (dest != src)
-    memcpy(dest, src, min(dstlen, srclen));
+    memcpy(dest, src, MY_MIN(dstlen, srclen));
   if (dstlen > srclen)
     bfill(dest + srclen, dstlen - srclen, ' ');
   return dstlen;
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index e25c0783abf..4b47996e315 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -160,7 +160,7 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length,
   diff_if_only_endspace_difference= 0;
 #endif
 
-  end= a + (length= min(a_length, b_length));
+  end= a + (length= MY_MIN(a_length, b_length));
   while (a < end)
   {
     if (map[*a++] != map[*b++])
@@ -770,7 +770,7 @@ size_t my_long10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)),
     val= new_val;
   }
   
-  len= min(len, (size_t) (e-p));
+  len= MY_MIN(len, (size_t) (e-p));
   memcpy(dst, p, len);
   return len+sign;
 }
@@ -824,7 +824,7 @@ size_t my_longlong10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)),
     long_val= quo;
   }
   
-  len= min(len, (size_t) (e-p));
+  len= MY_MIN(len, (size_t) (e-p));
 cnv:
   memcpy(dst, p, len);
   return len+sign;
@@ -1055,7 +1055,7 @@ size_t my_well_formed_len_8bit(CHARSET_INFO *cs __attribute__((unused)),
 {
   size_t nbytes= (size_t) (end-start);
   *error= 0;
-  return min(nbytes, nchars);
+  return MY_MIN(nbytes, nchars);
 }
 
 
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index d97f8e5ff08..d84d43a67bd 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -566,7 +566,7 @@ int my_strnncollsp_tis620(CHARSET_INFO * cs __attribute__((unused)),
   a_length= thai2sortable(a, a_length);
   b_length= thai2sortable(b, b_length);
   
-  end= a + (length= min(a_length, b_length));
+  end= a + (length= MY_MIN(a_length, b_length));
   while (a < end)
   {
     if (*a++ != *b++)
@@ -623,7 +623,7 @@ size_t my_strnxfrm_tis620(CHARSET_INFO *cs __attribute__((unused)),
                           const uchar *src, size_t srclen)
 {
   size_t dstlen= len;
-  len= (size_t) (strmake((char*) dest, (char*) src, min(len, srclen)) -
+  len= (size_t) (strmake((char*) dest, (char*) src, MY_MIN(len, srclen)) -
                  (char*) dest);
   len= thai2sortable(dest, len);
   if (dstlen > len)
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index e4a8d7a4067..5feeb661d2a 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -7726,7 +7726,7 @@ static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem,
 {
   char tail[30];
   size_t len= lexem->end - lexem->prev;
-  strmake (tail, lexem->prev, (size_t) min(len, sizeof(tail)-1));
+  strmake (tail, lexem->prev, (size_t) MY_MIN(len, sizeof(tail)-1));
   errstr[errsize-1]= '\0';
   my_snprintf(errstr,errsize-1,"%s at '%s'", txt, tail);
 }
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 6ebbae8fb5a..8e9b0ab7a04 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -58,7 +58,7 @@ my_bincmp(const uchar *s, const uchar *se,
           const uchar *t, const uchar *te)
 {
   int slen= (int) (se - s), tlen= (int) (te - t);
-  int len= min(slen, tlen);
+  int len= MY_MIN(slen, tlen);
   int cmp= memcmp(s, t, len);
   return cmp ? cmp : slen - tlen;
 }
@@ -2433,7 +2433,7 @@ my_strnncollsp_utf32_bin(CHARSET_INFO *cs __attribute__((unused)),
   se= s + slen;
   te= t + tlen;
 
-  for (minlen= min(slen, tlen); minlen; minlen-= 4)
+  for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 4)
   {
     my_wc_t s_wc= my_utf32_get(s);
     my_wc_t t_wc= my_utf32_get(t);
@@ -2860,7 +2860,7 @@ static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
   se= s + slen;
   te= t + tlen;
 
-  for (minlen= min(slen, tlen); minlen; minlen-= 2)
+  for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2)
   {
     int s_wc = uni_plane[s[0]] ? (int) uni_plane[s[0]][s[1]].sort :
                                  (((int) s[0]) << 8) + (int) s[1];
@@ -2937,7 +2937,7 @@ size_t my_well_formed_len_ucs2(CHARSET_INFO *cs __attribute__((unused)),
   size_t nbytes= ((size_t) (e-b)) & ~(size_t) 1;
   *error= 0;
   nchars*= 2;
-  return min(nbytes, nchars);
+  return MY_MIN(nbytes, nchars);
 }
 
 
@@ -3012,7 +3012,7 @@ static int my_strnncollsp_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
   se= s + slen;
   te= t + tlen;
 
-  for (minlen= min(slen, tlen); minlen; minlen-= 2)
+  for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2)
   {
     int s_wc= s[0] * 256 + s[1];
     int t_wc= t[0] * 256 + t[1];
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 88bab1fac76..6020f9c962f 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2244,7 +2244,7 @@ static inline int bincmp(const uchar *s, const uchar *se,
                          const uchar *t, const uchar *te)
 {
   int slen= (int) (se-s), tlen= (int) (te-t);
-  int len=min(slen,tlen);
+  int len=MY_MIN(slen,tlen);
   int cmp= memcmp(s,t,len);
   return cmp ? cmp : slen-tlen;
 }
@@ -4667,7 +4667,7 @@ bincmp_utf8mb4(const uchar *s, const uchar *se,
                const uchar *t, const uchar *te)
 {
   int slen= (int) (se - s), tlen= (int) (te - t);
-  int len= min(slen, tlen);
+  int len= MY_MIN(slen, tlen);
   int cmp= memcmp(s, t, len);
   return cmp ? cmp : slen - tlen;
 }
diff --git a/strings/ctype.c b/strings/ctype.c
index 6b6983ada60..adff69ad680 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -428,3 +428,144 @@ my_charset_is_ascii_compatible(CHARSET_INFO *cs)
   }
   return 1;
 }
+
+
+/*
+  Convert a string between two character sets.
+  'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.
+
+  @param  to[OUT]       Store result here
+  @param  to_length     Size of "to" buffer
+  @param  to_cs         Character set of result string
+  @param  from          Copy from here
+  @param  from_length   Length of the "from" string
+  @param  from_cs       Character set of the "from" string
+  @param  errors[OUT]   Number of conversion errors
+
+  @return Number of bytes copied to 'to' string
+*/
+
+static uint32
+my_convert_internal(char *to, uint32 to_length,
+                    const CHARSET_INFO *to_cs,
+                    const char *from, uint32 from_length,
+                    const CHARSET_INFO *from_cs, uint *errors)
+{
+  int         cnvres;
+  my_wc_t     wc;
+  const uchar *from_end= (const uchar*) from + from_length;
+  char *to_start= to;
+  uchar *to_end= (uchar*) to + to_length;
+  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
+  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
+  uint error_count= 0;
+
+  while (1)
+  {
+    if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
+      from+= cnvres;
+    else if (cnvres == MY_CS_ILSEQ)
+    {
+      error_count++;
+      from++;
+      wc= '?';
+    }
+    else if (cnvres > MY_CS_TOOSMALL)
+    {
+      /*
+        A correct multibyte sequence detected
+        But it doesn't have Unicode mapping.
+      */
+      error_count++;
+      from+= (-cnvres);
+      wc= '?';
+    }
+    else
+      break;  // Not enough characters
+
+outp:
+    if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
+      to+= cnvres;
+    else if (cnvres == MY_CS_ILUNI && wc != '?')
+    {
+      error_count++;
+      wc= '?';
+      goto outp;
+    }
+    else
+      break;
+  }
+  *errors= error_count;
+  return (uint32) (to - to_start);
+}
+
+
+/*
+  Convert a string between two character sets.
+   Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
+  'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.
+
+  @param  to[OUT]       Store result here
+  @param  to_length     Size of "to" buffer
+  @param  to_cs         Character set of result string
+  @param  from          Copy from here
+  @param  from_length   Length of the "from" string
+  @param  from_cs       Character set of the "from" string
+  @param  errors[OUT]   Number of conversion errors
+
+  @return Number of bytes copied to 'to' string
+*/
+
+uint32
+my_convert(char *to, uint32 to_length, const CHARSET_INFO *to_cs,
+           const char *from, uint32 from_length,
+           const CHARSET_INFO *from_cs, uint *errors)
+{
+  uint32 length, length2;
+  /*
+    If any of the character sets is not ASCII compatible,
+    immediately switch to slow mb_wc->wc_mb method.
+  */
+  if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
+    return my_convert_internal(to, to_length, to_cs,
+                               from, from_length, from_cs, errors);
+
+  length= length2= MY_MIN(to_length, from_length);
+
+#if defined(__i386__)
+  /*
+    Special loop for i386, it allows to refer to a
+    non-aligned memory block as UINT32, which makes
+    it possible to copy four bytes at once. This
+    gives about 10% performance improvement comparing
+    to byte-by-byte loop.
+  */
+  for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
+  {
+    if ((*(uint32*)from) & 0x80808080)
+      break;
+    *((uint32*) to)= *((const uint32*) from);
+  }
+#endif /* __i386__ */
+
+  for (; ; *to++= *from++, length--)
+  {
+    if (!length)
+    {
+      *errors= 0;
+      return length2;
+    }
+    if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
+    {
+      uint32 copied_length= length2 - length;
+      to_length-= copied_length;
+      from_length-= copied_length;
+      return copied_length + my_convert_internal(to, to_length, to_cs,
+                                                 from, from_length, from_cs,
+                                                 errors);
+    }
+  }
+
+  DBUG_ASSERT(FALSE); // Should never get to here
+  return 0;           // Make compiler happy
+}
diff --git a/strings/decimal.c b/strings/decimal.c
index f318a234d3f..75d72890557 100644
--- a/strings/decimal.c
+++ b/strings/decimal.c
@@ -396,7 +396,7 @@ int decimal2string(const decimal_t *from, char *to, int *to_len,
     for (; frac>0; frac-=DIG_PER_DEC1)
     {
       dec1 x=*buf++;
-      for (i=min(frac, DIG_PER_DEC1); i; i--)
+      for (i=MY_MIN(frac, DIG_PER_DEC1); i; i--)
       {
         dec1 y=x/DIG_MASK;
         *s1++='0'+(uchar)y;
@@ -419,7 +419,7 @@ int decimal2string(const decimal_t *from, char *to, int *to_len,
     for (buf=buf0+ROUND_UP(intg); intg>0; intg-=DIG_PER_DEC1)
     {
       dec1 x=*--buf;
-      for (i=min(intg, DIG_PER_DEC1); i; i--)
+      for (i=MY_MIN(intg, DIG_PER_DEC1); i; i--)
       {
         dec1 y=x/10;
         *--s='0'+(uchar)(x-y*10);
@@ -1511,8 +1511,8 @@ decimal_round(const decimal_t *from, decimal_t *to, int scale,
 
   if (to != from)
   {
-    dec1 *p0= buf0+intg0+max(frac1, frac0);
-    dec1 *p1= buf1+intg0+max(frac1, frac0);
+    dec1 *p0= buf0+intg0+MY_MAX(frac1, frac0);
+    dec1 *p1= buf1+intg0+MY_MAX(frac1, frac0);
 
     DBUG_ASSERT(p0 - buf0 <= len);
     DBUG_ASSERT(p1 - buf1 <= len);
@@ -1523,7 +1523,7 @@ decimal_round(const decimal_t *from, decimal_t *to, int scale,
     buf0=to->buf;
     buf1=to->buf;
     to->sign=from->sign;
-    to->intg=min(intg0, len)*DIG_PER_DEC1;
+    to->intg=MY_MIN(intg0, len)*DIG_PER_DEC1;
   }
 
   if (frac0 > frac1)
@@ -1625,7 +1625,7 @@ decimal_round(const decimal_t *from, decimal_t *to, int scale,
         scale=frac0*DIG_PER_DEC1;
         error=E_DEC_TRUNCATED; /* XXX */
       }
-      for (buf1=to->buf+intg0+max(frac0,0); buf1 > to->buf; buf1--)
+      for (buf1=to->buf+intg0+MY_MAX(frac0,0); buf1 > to->buf; buf1--)
       {
         buf1[0]=buf1[-1];
       }
@@ -1644,7 +1644,7 @@ decimal_round(const decimal_t *from, decimal_t *to, int scale,
         /* making 'zero' with the proper scale */
         dec1 *p0= to->buf + frac0 + 1;
         to->intg=1;
-        to->frac= max(scale, 0);
+        to->frac= MY_MAX(scale, 0);
         to->sign= 0;
         for (buf1= to->buf; buf1<p0; buf1++)
           *buf1= 0;
@@ -1693,11 +1693,11 @@ int decimal_result_size(decimal_t *from1, decimal_t *from2, char op, int param)
 {
   switch (op) {
   case '-':
-    return ROUND_UP(max(from1->intg, from2->intg)) +
-           ROUND_UP(max(from1->frac, from2->frac));
+    return ROUND_UP(MY_MAX(from1->intg, from2->intg)) +
+           ROUND_UP(MY_MAX(from1->frac, from2->frac));
   case '+':
-    return ROUND_UP(max(from1->intg, from2->intg)+1) +
-           ROUND_UP(max(from1->frac, from2->frac));
+    return ROUND_UP(MY_MAX(from1->intg, from2->intg)+1) +
+           ROUND_UP(MY_MAX(from1->frac, from2->frac));
   case '*':
     return ROUND_UP(from1->intg+from2->intg)+
            ROUND_UP(from1->frac)+ROUND_UP(from2->frac);
@@ -1712,7 +1712,7 @@ static int do_add(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
 {
   int intg1=ROUND_UP(from1->intg), intg2=ROUND_UP(from2->intg),
       frac1=ROUND_UP(from1->frac), frac2=ROUND_UP(from2->frac),
-      frac0=max(frac1, frac2), intg0=max(intg1, intg2), error;
+      frac0=MY_MAX(frac1, frac2), intg0=MY_MAX(intg1, intg2), error;
   dec1 *buf1, *buf2, *buf0, *stop, *stop2, x, carry;
 
   sanity(to);
@@ -1737,7 +1737,7 @@ static int do_add(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
   buf0=to->buf+intg0+frac0;
 
   to->sign=from1->sign;
-  to->frac=max(from1->frac, from2->frac);
+  to->frac=MY_MAX(from1->frac, from2->frac);
   to->intg=intg0*DIG_PER_DEC1;
   if (unlikely(error))
   {
@@ -1748,7 +1748,7 @@ static int do_add(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
     set_if_smaller(intg2, intg0);
   }
 
-  /* part 1 - max(frac) ... min (frac) */
+  /* part 1 - MY_MAX(frac) ... min (frac) */
   if (frac1 > frac2)
   {
     buf1=from1->buf+intg1+frac1;
@@ -1766,14 +1766,14 @@ static int do_add(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
   while (buf1 > stop)
     *--buf0=*--buf1;
 
-  /* part 2 - min(frac) ... min(intg) */
+  /* part 2 - MY_MIN(frac) ... MY_MIN(intg) */
   carry=0;
   while (buf1 > stop2)
   {
     ADD(*--buf0, *--buf1, *--buf2, carry);
   }
 
-  /* part 3 - min(intg) ... max(intg) */
+  /* part 3 - MY_MIN(intg) ... MY_MAX(intg) */
   buf1= intg1 > intg2 ? ((stop=from1->buf)+intg1-intg2) :
                         ((stop=from2->buf)+intg2-intg1) ;
   while (buf1 > stop)
@@ -1794,7 +1794,7 @@ static int do_sub(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
 {
   int intg1=ROUND_UP(from1->intg), intg2=ROUND_UP(from2->intg),
       frac1=ROUND_UP(from1->frac), frac2=ROUND_UP(from2->frac);
-  int frac0=max(frac1, frac2), error;
+  int frac0=MY_MAX(frac1, frac2), error;
   dec1 *buf1, *buf2, *buf0, *stop1, *stop2, *start1, *start2;
   my_bool carry=0;
 
@@ -1870,7 +1870,7 @@ static int do_sub(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
   FIX_INTG_FRAC_ERROR(to->len, intg1, frac0, error);
   buf0=to->buf+intg1+frac0;
 
-  to->frac=max(from1->frac, from2->frac);
+  to->frac=MY_MAX(from1->frac, from2->frac);
   to->intg=intg1*DIG_PER_DEC1;
   if (unlikely(error))
   {
@@ -1881,7 +1881,7 @@ static int do_sub(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
   }
   carry=0;
 
-  /* part 1 - max(frac) ... min (frac) */
+  /* part 1 - MY_MAX(frac) ... min (frac) */
   if (frac1 > frac2)
   {
     buf1=start1+intg1+frac1;
@@ -1905,7 +1905,7 @@ static int do_sub(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
     }
   }
 
-  /* part 2 - min(frac) ... intg2 */
+  /* part 2 - MY_MIN(frac) ... intg2 */
   while (buf2 > start2)
   {
     SUB(*--buf0, *--buf1, *--buf2, carry);
@@ -2168,11 +2168,11 @@ static int do_div_mod(const decimal_t *from1, const decimal_t *from2,
   {
     /* we're calculating N1 % N2.
        The result will have
-         frac=max(frac1, frac2), as for subtraction
+         frac=MY_MAX(frac1, frac2), as for subtraction
          intg=intg2
     */
     to->sign=from1->sign;
-    to->frac=max(from1->frac, from2->frac);
+    to->frac=MY_MAX(from1->frac, from2->frac);
     frac0=0;
   }
   else
@@ -2305,7 +2305,7 @@ static int do_div_mod(const decimal_t *from1, const decimal_t *from2,
     /*
       now the result is in tmp1, it has
         intg=prec1-frac1
-        frac=max(frac1, frac2)=to->frac
+        frac=MY_MAX(frac1, frac2)=to->frac
     */
     if (dcarry)
       *--start1=dcarry;
@@ -2343,7 +2343,7 @@ static int do_div_mod(const decimal_t *from1, const decimal_t *from2,
       }
       DBUG_ASSERT(intg0 <= ROUND_UP(from2->intg));
       stop1=start1+frac0+intg0;
-      to->intg=min(intg0*DIG_PER_DEC1, from2->intg);
+      to->intg=MY_MIN(intg0*DIG_PER_DEC1, from2->intg);
     }
     if (unlikely(intg0+frac0 > to->len))
     {
diff --git a/strings/dtoa.c b/strings/dtoa.c
index 6b216056f66..f3498a7bb1e 100644
--- a/strings/dtoa.c
+++ b/strings/dtoa.c
@@ -132,7 +132,7 @@ size_t my_fcvt(double x, int precision, char *to, my_bool *error)
     if (len <= decpt)
       *dst++= '.';
     
-    for (i= precision - max(0, (len - decpt)); i > 0; i--)
+    for (i= precision - MY_MAX(0, (len - decpt)); i > 0; i--)
       *dst++= '0';
   }
   
@@ -221,7 +221,7 @@ size_t my_gcvt(double x, my_gcvt_arg_type type, int width, char *to,
   if (x < 0.)
     width--;
 
-  res= dtoa(x, 4, type == MY_GCVT_ARG_DOUBLE ? width : min(width, FLT_DIG),
+  res= dtoa(x, 4, type == MY_GCVT_ARG_DOUBLE ? width : MY_MIN(width, FLT_DIG),
             &decpt, &sign, &end, buf, sizeof(buf));
   if (decpt == DTOA_OVERFLOW)
   {
@@ -2182,7 +2182,7 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
           1 ==> like 0, but with Steele & White stopping rule;
                 e.g. with IEEE P754 arithmetic , mode 0 gives
                 1e23 whereas mode 1 gives 9.999999999999999e22.
-          2 ==> max(1,ndigits) significant digits.  This gives a
+          2 ==> MY_MAX(1,ndigits) significant digits.  This gives a
                 return value similar to that of ecvt, except
                 that trailing zeros are suppressed.
           3 ==> through ndigits past the decimal point.  This
diff --git a/strings/my_vsnprintf.c b/strings/my_vsnprintf.c
index 2073d5a93d9..a05f60decf9 100644
--- a/strings/my_vsnprintf.c
+++ b/strings/my_vsnprintf.c
@@ -96,7 +96,7 @@ static const char *get_length_arg(const char *fmt, ARGS_INFO *args_arr,
                                   uint *arg_count, size_t *length, uint *flags)
 {
   fmt= get_length(fmt+1, length, flags);
-  *arg_count= max(*arg_count, (uint) *length);
+  *arg_count= MY_MAX(*arg_count, (uint) *length);
   (*length)--;    
   DBUG_ASSERT(*fmt == '$' && *length < MAX_ARGS);
   args_arr[*length].arg_type= 'd';
@@ -243,7 +243,7 @@ static char *process_dbl_arg(char *to, char *end, size_t width,
     width= FLT_DIG; /* width not set, use default */
   else if (width >= NOT_FIXED_DEC)
     width= NOT_FIXED_DEC - 1; /* max.precision for my_fcvt() */
-  width= min(width, (size_t)(end-to) - 1);
+  width= MY_MIN(width, (size_t)(end-to) - 1);
   
   if (arg_type == 'f')
     to+= my_fcvt(par, (int)width , to, NULL);
@@ -292,7 +292,7 @@ static char *process_int_arg(char *to, const char *end, size_t length,
   /* If %#d syntax was used, we have to pre-zero/pre-space the string */
   if (store_start == buff)
   {
-    length= min(length, to_length);
+    length= MY_MIN(length, to_length);
     if (res_length < length)
     {
       size_t diff= (length- res_length);
@@ -512,7 +512,7 @@ start:
         break;
 
       /* Copy data after the % format expression until next % */
-      length= min(end - to , print_arr[i].end - print_arr[i].begin);
+      length= MY_MIN(end - to , print_arr[i].end - print_arr[i].begin);
       if (to + length < end)
         length++;
       to= strnmov(to, print_arr[i].begin, length);
@@ -533,7 +533,7 @@ start:
     fmt= get_length(fmt, &arg_index, &unused_flags);
     DBUG_ASSERT(*fmt == '$');
     fmt++;
-    arg_count= max(arg_count, arg_index);
+    arg_count= MY_MAX(arg_count, arg_index);
     goto start;
   }
 
diff --git a/strings/str2int.c b/strings/str2int.c
index 64d4e169891..ec89503af5e 100644
--- a/strings/str2int.c
+++ b/strings/str2int.c
@@ -94,7 +94,7 @@ char *str2int(register const char *src, register int radix, long int lower,
       machines all, if +|n| is representable, so is -|n|, but on
       twos complement machines the converse is not true.  So the
       "maximum" representable number has a negative representative.
-      Limit is set to min(-|lower|,-|upper|); this is the "largest"
+      Limit is set to MY_MIN(-|lower|,-|upper|); this is the "largest"
       number we are concerned with.	*/
 
   /*  Calculate Limit using Scale as a scratch variable  */
author	Michael Widenius <monty@askmonty.org>	2013-03-26 00:03:13 +0200
committer	Michael Widenius <monty@askmonty.org>	2013-03-26 00:03:13 +0200
commit	068c61978e3a81836d52b8caf11e044290159ad1 (patch)
tree	2cbca861ab2cebe3bd99379ca9668bb483ca0d2a
parent	35bc8f9f4353b64da215e52ff6f1612a8ce66f43 (diff)
download	mariadb-git-068c61978e3a81836d52b8caf11e044290159ad1.tar.gz